Commit 4530eddb authored by Tejun Heo's avatar Tejun Heo

cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()

It wasn't explicitly documented but, when a process is being migrated,
cpuset and memcg depend on cgroup_taskset_first() returning the
threadgroup leader; however, this approach is somewhat ghetto and
would no longer work for the planned multi-process migration.

This patch introduces explicit cgroup_taskset_for_each_leader() which
iterates over only the threadgroup leaders and replaces
cgroup_taskset_first() usages for accessing the leader with it.

This prepares both memcg and cpuset for multi-process migration.  This
patch also updates the documentation for cgroup_taskset_for_each() to
clarify the iteration rules and removes comments mentioning task
ordering in tasksets.

v2: A previous patch which added threadgroup leader test was dropped.
    Patch updated accordingly.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarZefan Li <lizefan@huawei.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
parent 3df9ca0a
...@@ -232,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it); ...@@ -232,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it);
* cgroup_taskset_for_each - iterate cgroup_taskset * cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor * @task: the loop cursor
* @tset: taskset to iterate * @tset: taskset to iterate
*
* @tset may contain multiple tasks and they may belong to multiple
* processes. When there are multiple tasks in @tset, if a task of a
* process is in @tset, all tasks of the process are in @tset. Also, all
* are guaranteed to share the same source and destination csses.
*
* Iteration is not in any specific order.
*/ */
#define cgroup_taskset_for_each(task, tset) \ #define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \ for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset))) (task) = cgroup_taskset_next((tset)))
/**
* cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
* @leader: the loop cursor
* @tset: takset to iterate
*
* Iterate threadgroup leaders of @tset. For single-task migrations, @tset
* may not contain any.
*/
#define cgroup_taskset_for_each_leader(leader, tset) \
for ((leader) = cgroup_taskset_first((tset)); (leader); \
(leader) = cgroup_taskset_next((tset))) \
if ((leader) != (leader)->group_leader) \
; \
else
/* /*
* Inline functions. * Inline functions.
*/ */
......
...@@ -2217,13 +2217,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, ...@@ -2217,13 +2217,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
get_css_set(new_cset); get_css_set(new_cset);
rcu_assign_pointer(tsk->cgroups, new_cset); rcu_assign_pointer(tsk->cgroups, new_cset);
/*
* Use move_tail so that cgroup_taskset_first() still returns the
* leader after migration. This works because cgroup_migrate()
* ensures that the dst_cset of the leader is the first on the
* tset's dst_csets list.
*/
list_move_tail(&tsk->cg_list, &new_cset->mg_tasks); list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
/* /*
...@@ -2419,10 +2412,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader, ...@@ -2419,10 +2412,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
if (!cset->mg_src_cgrp) if (!cset->mg_src_cgrp)
goto next; goto next;
/*
* cgroup_taskset_first() must always return the leader.
* Take care to avoid disturbing the ordering.
*/
list_move_tail(&task->cg_list, &cset->mg_tasks); list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node)) if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets); list_add_tail(&cset->mg_node, &tset.src_csets);
......
...@@ -1488,7 +1488,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, ...@@ -1488,7 +1488,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
/* static buf protected by cpuset_mutex */ /* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to; static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task; struct task_struct *task;
struct task_struct *leader = cgroup_taskset_first(tset); struct task_struct *leader;
struct cpuset *cs = css_cs(css); struct cpuset *cs = css_cs(css);
struct cpuset *oldcs = cpuset_attach_old_cs; struct cpuset *oldcs = cpuset_attach_old_cs;
...@@ -1514,12 +1514,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css, ...@@ -1514,12 +1514,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
} }
/* /*
* Change mm, possibly for multiple threads in a threadgroup. This * Change mm for all threadgroup leaders. This is expensive and may
* is expensive and may sleep and should be moved outside migration * sleep and should be moved outside migration path proper.
* path proper.
*/ */
cpuset_attach_nodemask_to = cs->effective_mems; cpuset_attach_nodemask_to = cs->effective_mems;
if (thread_group_leader(leader)) { cgroup_taskset_for_each_leader(leader, tset) {
struct mm_struct *mm = get_task_mm(leader); struct mm_struct *mm = get_task_mm(leader);
if (mm) { if (mm) {
......
...@@ -4828,7 +4828,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, ...@@ -4828,7 +4828,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
{ {
struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *from; struct mem_cgroup *from;
struct task_struct *p; struct task_struct *leader, *p;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long move_flags; unsigned long move_flags;
int ret = 0; int ret = 0;
...@@ -4842,7 +4842,20 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, ...@@ -4842,7 +4842,20 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
if (!move_flags) if (!move_flags)
return 0; return 0;
p = cgroup_taskset_first(tset); /*
* Multi-process migrations only happen on the default hierarchy
* where charge immigration is not used. Perform charge
* immigration if @tset contains a leader and whine if there are
* multiple.
*/
p = NULL;
cgroup_taskset_for_each_leader(leader, tset) {
WARN_ON_ONCE(p);
p = leader;
}
if (!p)
return 0;
from = mem_cgroup_from_task(p); from = mem_cgroup_from_task(p);
VM_BUG_ON(from == memcg); VM_BUG_ON(from == memcg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment