Commit 7c5f64f8 authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Linus Torvalds

mm: oom: deduplicate victim selection code for memcg and global oom

When selecting an oom victim, we use the same heuristic for both memory
cgroup and global oom.  The only difference is the scope of tasks to
select the victim from.  So we could just export an iterator over all
memcg tasks and keep all oom related logic in oom_kill.c, but instead we
duplicate pieces of it in memcontrol.c reusing some initially private
functions of oom_kill.c in order to not duplicate all of it.  That looks
ugly and error prone, because any modification of select_bad_process
should also be propagated to mem_cgroup_out_of_memory.

Let's rework this as follows: keep all oom heuristic related code private
to oom_kill.c and make oom_kill.c use exported memcg functions when it's
really necessary (like in case of iterating over memcg tasks).

Link: http://lkml.kernel.org/r/1470056933-7505-1-git-send-email-vdavydov@virtuozzo.comSigned-off-by: default avatarVladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 48e509ec
......@@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
int mem_cgroup_scan_tasks(struct mem_cgroup *,
int (*)(struct task_struct *, void *), void *);
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
......@@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
void mem_cgroup_handle_over_high(void);
unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
......@@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
{
}
static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
int (*fn)(struct task_struct *, void *), void *arg)
{
return 0;
}
static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
return 0;
......@@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
return 0;
}
static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
return 0;
}
static inline void
mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
......
......@@ -34,23 +34,11 @@ struct oom_control {
* for display purposes.
*/
const int order;
};
/*
* Types of limitations to the nodes from which allocations may occur
*/
enum oom_constraint {
CONSTRAINT_NONE,
CONSTRAINT_CPUSET,
CONSTRAINT_MEMORY_POLICY,
CONSTRAINT_MEMCG,
};
enum oom_scan_t {
OOM_SCAN_OK, /* scan thread and find its badness */
OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */
OOM_SCAN_ABORT, /* abort the iteration and return */
OOM_SCAN_SELECT, /* always select this thread first */
/* Used by oom implementation, do not set */
unsigned long totalpages;
struct task_struct *chosen;
unsigned long chosen_points;
};
extern struct mutex oom_lock;
......@@ -70,30 +58,10 @@ static inline bool oom_task_origin(const struct task_struct *p)
return p->signal->oom_flag_origin;
}
extern void mark_oom_victim(struct task_struct *tsk);
#ifdef CONFIG_MMU
extern void wake_oom_reaper(struct task_struct *tsk);
#else
static inline void wake_oom_reaper(struct task_struct *tsk)
{
}
#endif
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
unsigned int points, unsigned long totalpages,
const char *message);
extern void check_panic_on_oom(struct oom_control *oc,
enum oom_constraint constraint);
extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
struct task_struct *task);
extern bool out_of_memory(struct oom_control *oc);
extern void exit_oom_victim(struct task_struct *tsk);
......@@ -101,14 +69,11 @@ extern void exit_oom_victim(struct task_struct *tsk);
extern int register_oom_notifier(struct notifier_block *nb);
extern int unregister_oom_notifier(struct notifier_block *nb);
extern bool oom_killer_disabled;
extern bool oom_killer_disable(void);
extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
bool task_will_free_mem(struct task_struct *task);
/* sysctls */
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
......
......@@ -920,6 +920,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
iter != NULL; \
iter = mem_cgroup_iter(NULL, iter, NULL))
/**
* mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
* @memcg: hierarchy root
* @fn: function to call for each task
* @arg: argument passed to @fn
*
* This function iterates over tasks attached to @memcg or to any of its
* descendants and calls @fn for each task. If @fn returns a non-zero
* value, the function breaks the iteration loop and returns the value.
* Otherwise, it will iterate over all tasks and return 0.
*
* This function must not be called for the root memory cgroup.
*/
int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
int (*fn)(struct task_struct *, void *), void *arg)
{
struct mem_cgroup *iter;
int ret = 0;
BUG_ON(memcg == root_mem_cgroup);
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
struct task_struct *task;
css_task_iter_start(&iter->css, &it);
while (!ret && (task = css_task_iter_next(&it)))
ret = fn(task, arg);
css_task_iter_end(&it);
if (ret) {
mem_cgroup_iter_break(memcg, iter);
break;
}
}
return ret;
}
/**
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
......@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
/*
* Return the memory (and swap, if configured) limit for a memcg.
*/
static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
unsigned long limit;
......@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
.gfp_mask = gfp_mask,
.order = order,
};
struct mem_cgroup *iter;
unsigned long chosen_points = 0;
unsigned long totalpages;
unsigned int points = 0;
struct task_struct *chosen = NULL;
bool ret;
mutex_lock(&oom_lock);
/*
* If current has a pending SIGKILL or is exiting, then automatically
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
if (task_will_free_mem(current)) {
mark_oom_victim(current);
wake_oom_reaper(current);
goto unlock;
}
check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
totalpages = mem_cgroup_get_limit(memcg) ? : 1;
for_each_mem_cgroup_tree(iter, memcg) {
struct css_task_iter it;
struct task_struct *task;
css_task_iter_start(&iter->css, &it);
while ((task = css_task_iter_next(&it))) {
switch (oom_scan_process_thread(&oc, task)) {
case OOM_SCAN_SELECT:
if (chosen)
put_task_struct(chosen);
chosen = task;
chosen_points = ULONG_MAX;
get_task_struct(chosen);
/* fall through */
case OOM_SCAN_CONTINUE:
continue;
case OOM_SCAN_ABORT:
css_task_iter_end(&it);
mem_cgroup_iter_break(memcg, iter);
if (chosen)
put_task_struct(chosen);
/* Set a dummy value to return "true". */
chosen = (void *) 1;
goto unlock;
case OOM_SCAN_OK:
break;
};
points = oom_badness(task, memcg, NULL, totalpages);
if (!points || points < chosen_points)
continue;
/* Prefer thread group leaders for display purposes */
if (points == chosen_points &&
thread_group_leader(chosen))
continue;
if (chosen)
put_task_struct(chosen);
chosen = task;
chosen_points = points;
get_task_struct(chosen);
}
css_task_iter_end(&it);
}
if (chosen) {
points = chosen_points * 1000 / totalpages;
oom_kill_process(&oc, chosen, points, totalpages,
"Memory cgroup out of memory");
}
unlock:
ret = out_of_memory(&oc);
mutex_unlock(&oom_lock);
return chosen;
return ret;
}
#if MAX_NUMNODES > 1
......@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
if (!memcg)
return false;
if (!handle || oom_killer_disabled)
if (!handle)
goto cleanup;
owait.memcg = memcg;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment