Commit 51bee5ab authored by Oleg Nesterov's avatar Oleg Nesterov Committed by Tejun Heo

cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release() to fix the accounting

The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which
needs pids_free() to uncharge the pid.

However, ->free() is called from __put_task_struct()->cgroup_free() and this
is too late. Even the trivial program which does

	for (;;) {
		int pid = fork();
		assert(pid >= 0);
		if (pid)
			wait(NULL);
		else
			exit(0);
	}

can run out of limits because release_task()->call_rcu(delayed_put_task_struct)
implies an RCU gp after the task/pid goes away and before the final put().

Test-case:

	mkdir -p /tmp/CG
	mount -t cgroup2 none /tmp/CG
	echo '+pids' > /tmp/CG/cgroup.subtree_control

	mkdir /tmp/CG/PID
	echo 2 > /tmp/CG/PID/pids.max

	perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' &
	echo $! > /tmp/CG/PID/cgroup.procs

Without this patch the forking process fails soon after migration.

Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite
into the new helper, cgroup_release(), called by release_task() which actually
frees the pid(s).
Reported-by: default avatarHerton R. Krzesinski <hkrzesin@redhat.com>
Reported-by: default avatarJan Stancek <jstancek@redhat.com>
Signed-off-by: default avatarOleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent af0c9af1
...@@ -602,7 +602,7 @@ struct cgroup_subsys { ...@@ -602,7 +602,7 @@ struct cgroup_subsys {
void (*cancel_fork)(struct task_struct *task); void (*cancel_fork)(struct task_struct *task);
void (*fork)(struct task_struct *task); void (*fork)(struct task_struct *task);
void (*exit)(struct task_struct *task); void (*exit)(struct task_struct *task);
void (*free)(struct task_struct *task); void (*release)(struct task_struct *task);
void (*bind)(struct cgroup_subsys_state *root_css); void (*bind)(struct cgroup_subsys_state *root_css);
bool early_init:1; bool early_init:1;
......
...@@ -121,6 +121,7 @@ extern int cgroup_can_fork(struct task_struct *p); ...@@ -121,6 +121,7 @@ extern int cgroup_can_fork(struct task_struct *p);
extern void cgroup_cancel_fork(struct task_struct *p); extern void cgroup_cancel_fork(struct task_struct *p);
extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p);
void cgroup_exit(struct task_struct *p); void cgroup_exit(struct task_struct *p);
void cgroup_release(struct task_struct *p);
void cgroup_free(struct task_struct *p); void cgroup_free(struct task_struct *p);
int cgroup_init_early(void); int cgroup_init_early(void);
...@@ -697,6 +698,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; } ...@@ -697,6 +698,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; }
static inline void cgroup_cancel_fork(struct task_struct *p) {} static inline void cgroup_cancel_fork(struct task_struct *p) {}
static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {}
static inline void cgroup_exit(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {}
static inline void cgroup_release(struct task_struct *p) {}
static inline void cgroup_free(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {}
static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init_early(void) { return 0; }
......
...@@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1; ...@@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1;
*/ */
static u16 have_fork_callback __read_mostly; static u16 have_fork_callback __read_mostly;
static u16 have_exit_callback __read_mostly; static u16 have_exit_callback __read_mostly;
static u16 have_free_callback __read_mostly; static u16 have_release_callback __read_mostly;
static u16 have_canfork_callback __read_mostly; static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */ /* cgroup namespace for init task */
...@@ -5313,7 +5313,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) ...@@ -5313,7 +5313,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
have_fork_callback |= (bool)ss->fork << ss->id; have_fork_callback |= (bool)ss->fork << ss->id;
have_exit_callback |= (bool)ss->exit << ss->id; have_exit_callback |= (bool)ss->exit << ss->id;
have_free_callback |= (bool)ss->free << ss->id; have_release_callback |= (bool)ss->release << ss->id;
have_canfork_callback |= (bool)ss->can_fork << ss->id; have_canfork_callback |= (bool)ss->can_fork << ss->id;
/* At system boot, before all subsystems have been /* At system boot, before all subsystems have been
...@@ -5749,16 +5749,19 @@ void cgroup_exit(struct task_struct *tsk) ...@@ -5749,16 +5749,19 @@ void cgroup_exit(struct task_struct *tsk)
} while_each_subsys_mask(); } while_each_subsys_mask();
} }
void cgroup_free(struct task_struct *task) void cgroup_release(struct task_struct *task)
{ {
struct css_set *cset = task_css_set(task);
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
int ssid; int ssid;
do_each_subsys_mask(ss, ssid, have_free_callback) { do_each_subsys_mask(ss, ssid, have_release_callback) {
ss->free(task); ss->release(task);
} while_each_subsys_mask(); } while_each_subsys_mask();
}
void cgroup_free(struct task_struct *task)
{
struct css_set *cset = task_css_set(task);
put_css_set(cset); put_css_set(cset);
} }
......
...@@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task) ...@@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task)
pids_uncharge(pids, 1); pids_uncharge(pids, 1);
} }
static void pids_free(struct task_struct *task) static void pids_release(struct task_struct *task)
{ {
struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id)); struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
...@@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = { ...@@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = {
.cancel_attach = pids_cancel_attach, .cancel_attach = pids_cancel_attach,
.can_fork = pids_can_fork, .can_fork = pids_can_fork,
.cancel_fork = pids_cancel_fork, .cancel_fork = pids_cancel_fork,
.free = pids_free, .release = pids_release,
.legacy_cftypes = pids_files, .legacy_cftypes = pids_files,
.dfl_cftypes = pids_files, .dfl_cftypes = pids_files,
.threaded = true, .threaded = true,
......
...@@ -219,6 +219,7 @@ void release_task(struct task_struct *p) ...@@ -219,6 +219,7 @@ void release_task(struct task_struct *p)
} }
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
cgroup_release(p);
release_thread(p); release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct); call_rcu(&p->rcu, delayed_put_task_struct);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment