Commit 2f5177f0 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched/cgroup: Fix/cleanup cgroup teardown/init

The CPU controller hasn't kept up with the various changes in the whole
cgroup initialization / destruction sequence, and commit:

  2e91fa7f ("cgroup: keep zombies associated with their original cgroups")

caused it to explode.

The reason for this is that zombies do not inhibit css_offline() from
being called, but do stall css_released(). Now we tear down the cfs_rq
structures on css_offline() but zombies can run after that, leading to
use-after-free issues.

The solution is to move the tear-down to css_released(), which
guarantees nobody (including no zombies) is still using our cgroup.

Furthermore, a few simple cleanups are possible too. There doesn't
appear to be any point to us using css_online() (anymore?) so fold that
in css_alloc().

And since cgroup code guarantees an RCU grace period between
css_released() and css_free() we can forgo using call_rcu() and free the
stuff immediately.
Suggested-by: default avatarTejun Heo <tj@kernel.org>
Reported-by: default avatarKazuki Yamaguchi <k@rhe.jp>
Reported-by: default avatarNiklas Cassel <niklas.cassel@axis.com>
Tested-by: default avatarNiklas Cassel <niklas.cassel@axis.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 2e91fa7f ("cgroup: keep zombies associated with their original cgroups")
Link: http://lkml.kernel.org/r/20160316152245.GY6344@twins.programming.kicks-ass.netSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 42e405f7
...@@ -7536,7 +7536,7 @@ void set_curr_task(int cpu, struct task_struct *p) ...@@ -7536,7 +7536,7 @@ void set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */ /* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock); static DEFINE_SPINLOCK(task_group_lock);
static void free_sched_group(struct task_group *tg) static void sched_free_group(struct task_group *tg)
{ {
free_fair_sched_group(tg); free_fair_sched_group(tg);
free_rt_sched_group(tg); free_rt_sched_group(tg);
...@@ -7562,7 +7562,7 @@ struct task_group *sched_create_group(struct task_group *parent) ...@@ -7562,7 +7562,7 @@ struct task_group *sched_create_group(struct task_group *parent)
return tg; return tg;
err: err:
free_sched_group(tg); sched_free_group(tg);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
...@@ -7582,17 +7582,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) ...@@ -7582,17 +7582,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
} }
/* rcu callback to free various structures associated with a task group */ /* rcu callback to free various structures associated with a task group */
static void free_sched_group_rcu(struct rcu_head *rhp) static void sched_free_group_rcu(struct rcu_head *rhp)
{ {
/* now it should be safe to free those cfs_rqs */ /* now it should be safe to free those cfs_rqs */
free_sched_group(container_of(rhp, struct task_group, rcu)); sched_free_group(container_of(rhp, struct task_group, rcu));
} }
/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg) void sched_destroy_group(struct task_group *tg)
{ {
/* wait for possible concurrent references to cfs_rqs complete */ /* wait for possible concurrent references to cfs_rqs complete */
call_rcu(&tg->rcu, free_sched_group_rcu); call_rcu(&tg->rcu, sched_free_group_rcu);
} }
void sched_offline_group(struct task_group *tg) void sched_offline_group(struct task_group *tg)
...@@ -8051,31 +8050,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -8051,31 +8050,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (IS_ERR(tg)) if (IS_ERR(tg))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
sched_online_group(tg, parent);
return &tg->css; return &tg->css;
} }
static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
{ {
struct task_group *tg = css_tg(css); struct task_group *tg = css_tg(css);
struct task_group *parent = css_tg(css->parent);
if (parent) sched_offline_group(tg);
sched_online_group(tg, parent);
return 0;
} }
static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{ {
struct task_group *tg = css_tg(css); struct task_group *tg = css_tg(css);
sched_destroy_group(tg); /*
} * Relies on the RCU grace period between css_released() and this.
*/
static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) sched_free_group(tg);
{
struct task_group *tg = css_tg(css);
sched_offline_group(tg);
} }
static void cpu_cgroup_fork(struct task_struct *task) static void cpu_cgroup_fork(struct task_struct *task)
...@@ -8435,9 +8429,8 @@ static struct cftype cpu_files[] = { ...@@ -8435,9 +8429,8 @@ static struct cftype cpu_files[] = {
struct cgroup_subsys cpu_cgrp_subsys = { struct cgroup_subsys cpu_cgrp_subsys = {
.css_alloc = cpu_cgroup_css_alloc, .css_alloc = cpu_cgroup_css_alloc,
.css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free, .css_free = cpu_cgroup_css_free,
.css_online = cpu_cgroup_css_online,
.css_offline = cpu_cgroup_css_offline,
.fork = cpu_cgroup_fork, .fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach, .can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach, .attach = cpu_cgroup_attach,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment