Commit 5cf1cacb authored by Tejun Heo's avatar Tejun Heo

cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback

Since e93ad19d ("cpuset: make mm migration asynchronous"), cpuset
kicks off asynchronous NUMA node migration if necessary during task
migration and flushes it from cpuset_post_attach_flush() which is
called at the end of __cgroup_procs_write().  This is to avoid
performing migration with cgroup_threadgroup_rwsem write-locked which
can lead to deadlock through dependency on kworker creation.

memcg has a similar issue with charge moving, so let's convert it to
an official callback rather than the current one-off cpuset specific
function.  This patch adds cgroup_subsys->post_attach callback and
makes cpuset register cpuset_post_attach_flush() as its ->post_attach.

The conversion is mostly one-to-one except that the new callback is
called under cgroup_mutex.  This is to guarantee that no other
migration operations are started before ->post_attach callbacks are
finished.  cgroup_mutex is one of the outermost mutex in the system
and has never been and shouldn't be a problem.  We can add specialized
synchronization around __cgroup_procs_write() but I don't think
there's any noticeable benefit.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <stable@vger.kernel.org> # 4.4+ prerequisite for the next patch
parent bcc981e9
...@@ -444,6 +444,7 @@ struct cgroup_subsys { ...@@ -444,6 +444,7 @@ struct cgroup_subsys {
int (*can_attach)(struct cgroup_taskset *tset); int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset);
void (*attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset);
void (*post_attach)(void);
int (*can_fork)(struct task_struct *task); int (*can_fork)(struct task_struct *task);
void (*cancel_fork)(struct task_struct *task); void (*cancel_fork)(struct task_struct *task);
void (*fork)(struct task_struct *task); void (*fork)(struct task_struct *task);
......
...@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask) ...@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
task_unlock(current); task_unlock(current);
} }
extern void cpuset_post_attach_flush(void);
#else /* !CONFIG_CPUSETS */ #else /* !CONFIG_CPUSETS */
static inline bool cpusets_enabled(void) { return false; } static inline bool cpusets_enabled(void) { return false; }
...@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq) ...@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
return false; return false;
} }
static inline void cpuset_post_attach_flush(void)
{
}
#endif /* !CONFIG_CPUSETS */ #endif /* !CONFIG_CPUSETS */
#endif /* _LINUX_CPUSET_H */ #endif /* _LINUX_CPUSET_H */
...@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, ...@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup) size_t nbytes, loff_t off, bool threadgroup)
{ {
struct task_struct *tsk; struct task_struct *tsk;
struct cgroup_subsys *ss;
struct cgroup *cgrp; struct cgroup *cgrp;
pid_t pid; pid_t pid;
int ret; int ssid, ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL; return -EINVAL;
...@@ -2875,8 +2876,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, ...@@ -2875,8 +2876,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
rcu_read_unlock(); rcu_read_unlock();
out_unlock_threadgroup: out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem); percpu_up_write(&cgroup_threadgroup_rwsem);
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
cgroup_kn_unlock(of->kn); cgroup_kn_unlock(of->kn);
cpuset_post_attach_flush();
return ret ?: nbytes; return ret ?: nbytes;
} }
......
...@@ -58,7 +58,6 @@ ...@@ -58,7 +58,6 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, ...@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
} }
} }
void cpuset_post_attach_flush(void) static void cpuset_post_attach(void)
{ {
flush_workqueue(cpuset_migrate_mm_wq); flush_workqueue(cpuset_migrate_mm_wq);
} }
...@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { ...@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.can_attach = cpuset_can_attach, .can_attach = cpuset_can_attach,
.cancel_attach = cpuset_cancel_attach, .cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach, .attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind, .bind = cpuset_bind,
.legacy_cftypes = files, .legacy_cftypes = files,
.early_init = true, .early_init = true,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment