Commit 2b021cbf authored by Tejun Heo's avatar Tejun Heo

cgroup: ignore css_sets associated with dead cgroups during migration

Before 2e91fa7f ("cgroup: keep zombies associated with their
original cgroups"), all dead tasks were associated with init_css_set.
If a zombie task is requested for migration, while migration prep
operations would still be performed on init_css_set, the actual
migration would ignore zombie tasks.  As init_css_set is always valid,
this worked fine.

However, after 2e91fa7f, zombie tasks stay with the css_set it was
associated with at the time of death.  Let's say a task T associated
with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2.  After T
becomes a zombie, it would still remain associated with A and B.  If A
only contains zombie tasks, it can be removed.  On removal, A gets
marked offline but stays pinned until all zombies are drained.  At
this point, if migration is initiated on T to a cgroup C on hierarchy
H-2, migration path would try to prepare T's css_set for migration and
trigger the following.

 WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160()
 CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289
 ...
 Call Trace:
  [<ffffffff8127e63c>] dump_stack+0x4e/0x82
  [<ffffffff810445e8>] warn_slowpath_common+0x78/0xb0
  [<ffffffff810446d5>] warn_slowpath_null+0x15/0x20
  [<ffffffff810c33e1>] cgroup_get+0x121/0x160
  [<ffffffff810c349b>] link_css_set+0x7b/0x90
  [<ffffffff810c4fbc>] find_css_set+0x3bc/0x5e0
  [<ffffffff810c5269>] cgroup_migrate_prepare_dst+0x89/0x1f0
  [<ffffffff810c7547>] cgroup_attach_task+0x157/0x230
  [<ffffffff810c7a17>] __cgroup_procs_write+0x2b7/0x470
  [<ffffffff810c7bdc>] cgroup_tasks_write+0xc/0x10
  [<ffffffff810c4790>] cgroup_file_write+0x30/0x1b0
  [<ffffffff811c68fc>] kernfs_fop_write+0x13c/0x180
  [<ffffffff81151673>] __vfs_write+0x23/0xe0
  [<ffffffff81152494>] vfs_write+0xa4/0x1a0
  [<ffffffff811532d4>] SyS_write+0x44/0xa0
  [<ffffffff814af2d7>] entry_SYSCALL_64_fastpath+0x12/0x6f

It doesn't make sense to prepare migration for css_sets pointing to
dead cgroups as they are guaranteed to contain only zombies which are
ignored later during migration.  This patch makes cgroup destruction
path mark all affected css_sets as dead and updates the migration path
to ignore them during preparation.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Fixes: 2e91fa7f ("cgroup: keep zombies associated with their original cgroups")
Cc: stable@vger.kernel.org # v4.4+
parent 6c83e6cb
...@@ -212,6 +212,9 @@ struct css_set { ...@@ -212,6 +212,9 @@ struct css_set {
/* all css_task_iters currently walking this cset */ /* all css_task_iters currently walking this cset */
struct list_head task_iters; struct list_head task_iters;
/* dead and being drained, ignore for migration */
bool dead;
/* For RCU-protected deletion */ /* For RCU-protected deletion */
struct rcu_head rcu_head; struct rcu_head rcu_head;
}; };
......
...@@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, ...@@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock); lockdep_assert_held(&css_set_lock);
/*
* If ->dead, @src_set is associated with one or more dead cgroups
* and doesn't contain any migratable tasks. Ignore it early so
* that the rest of migration path doesn't get confused by it.
*/
if (src_cset->dead)
return;
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
if (!list_empty(&src_cset->mg_preload_node)) if (!list_empty(&src_cset->mg_preload_node))
...@@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex) __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{ {
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
struct cgrp_cset_link *link;
int ssid; int ssid;
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
...@@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
return -EBUSY; return -EBUSY;
/* /*
* Mark @cgrp dead. This prevents further task migration and child * Mark @cgrp and the associated csets dead. The former prevents
* creation by disabling cgroup_lock_live_group(). * further task migration and child creation by disabling
* cgroup_lock_live_group(). The latter makes the csets ignored by
* the migration path.
*/ */
cgrp->self.flags &= ~CSS_ONLINE; cgrp->self.flags &= ~CSS_ONLINE;
spin_lock_bh(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
link->cset->dead = true;
spin_unlock_bh(&css_set_lock);
/* initiate massacre of all css's */ /* initiate massacre of all css's */
for_each_css(css, ssid, cgrp) for_each_css(css, ssid, cgrp)
kill_css(css); kill_css(css);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment