Commit e25e2cbb authored by Tejun Heo's avatar Tejun Heo

cgroup: add cgroup_root_mutex

cgroup wants to make threadgroup stable while modifying cgroup
hierarchies which will introduce locking dependency on
cred_guard_mutex from cgroup_mutex.  This unfortunately completes
circular dependency.

 A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
 B. namespace_sem -> cgroup_mutex

B is from cgroup_show_options() and this patch breaks it by
introducing another mutex cgroup_root_mutex which nests inside
cgroup_mutex and protects cgroupfs_root.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: default avatarLi Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
parent 467de1fc
...@@ -63,7 +63,24 @@ ...@@ -63,7 +63,24 @@
#include <linux/atomic.h> #include <linux/atomic.h>
/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it.
*
* cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
* cgroupfs_root of any cgroup hierarchy - subsys list, flags,
* release_agent_path and so on. Modifying requires both cgroup_mutex and
* cgroup_root_mutex. Readers can acquire either of the two. This is to
* break the following locking order cycle.
*
* A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
* B. namespace_sem -> cgroup_mutex
*
* B happens only through cgroup_show_options() and using cgroup_root_mutex
* breaks it.
*/
static DEFINE_MUTEX(cgroup_mutex); static DEFINE_MUTEX(cgroup_mutex);
static DEFINE_MUTEX(cgroup_root_mutex);
/* /*
* Generate an array of cgroup subsystem pointers. At boot time, this is * Generate an array of cgroup subsystem pointers. At boot time, this is
...@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, ...@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
int i; int i;
BUG_ON(!mutex_is_locked(&cgroup_mutex)); BUG_ON(!mutex_is_locked(&cgroup_mutex));
BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
removed_bits = root->actual_subsys_bits & ~final_bits; removed_bits = root->actual_subsys_bits & ~final_bits;
added_bits = final_bits & ~root->actual_subsys_bits; added_bits = final_bits & ~root->actual_subsys_bits;
...@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) ...@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_root_mutex);
for_each_subsys(root, ss) for_each_subsys(root, ss)
seq_printf(seq, ",%s", ss->name); seq_printf(seq, ",%s", ss->name);
if (test_bit(ROOT_NOPREFIX, &root->flags)) if (test_bit(ROOT_NOPREFIX, &root->flags))
...@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) ...@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",clone_children"); seq_puts(seq, ",clone_children");
if (strlen(root->name)) if (strlen(root->name))
seq_printf(seq, ",name=%s", root->name); seq_printf(seq, ",name=%s", root->name);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_root_mutex);
return 0; return 0;
} }
...@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) ...@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
/* See what subsystems are wanted */ /* See what subsystems are wanted */
ret = parse_cgroupfs_options(data, &opts); ret = parse_cgroupfs_options(data, &opts);
...@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) ...@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
out_unlock: out_unlock:
kfree(opts.release_agent); kfree(opts.release_agent);
kfree(opts.name); kfree(opts.name);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
return ret; return ret;
...@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int ret = 0; int ret = 0;
struct super_block *sb; struct super_block *sb;
struct cgroupfs_root *new_root; struct cgroupfs_root *new_root;
struct inode *inode;
/* First find the desired set of subsystems */ /* First find the desired set of subsystems */
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
...@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
/* We used the new root structure, so this is a new hierarchy */ /* We used the new root structure, so this is a new hierarchy */
struct list_head tmp_cg_links; struct list_head tmp_cg_links;
struct cgroup *root_cgrp = &root->top_cgroup; struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
struct cgroupfs_root *existing_root; struct cgroupfs_root *existing_root;
const struct cred *cred; const struct cred *cred;
int i; int i;
...@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
if (strlen(root->name)) { /* Check for name clashes with existing mounts */
/* Check for name clashes with existing mounts */ ret = -EBUSY;
for_each_active_root(existing_root) { if (strlen(root->name))
if (!strcmp(existing_root->name, root->name)) { for_each_active_root(existing_root)
ret = -EBUSY; if (!strcmp(existing_root->name, root->name))
mutex_unlock(&cgroup_mutex); goto unlock_drop;
mutex_unlock(&inode->i_mutex);
goto drop_new_super;
}
}
}
/* /*
* We're accessing css_set_count without locking * We're accessing css_set_count without locking
...@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
* have some link structures left over * have some link structures left over
*/ */
ret = allocate_cg_links(css_set_count, &tmp_cg_links); ret = allocate_cg_links(css_set_count, &tmp_cg_links);
if (ret) { if (ret)
mutex_unlock(&cgroup_mutex); goto unlock_drop;
mutex_unlock(&inode->i_mutex);
goto drop_new_super;
}
ret = rebind_subsystems(root, root->subsys_bits); ret = rebind_subsystems(root, root->subsys_bits);
if (ret == -EBUSY) { if (ret == -EBUSY) {
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
free_cg_links(&tmp_cg_links); free_cg_links(&tmp_cg_links);
goto drop_new_super; goto unlock_drop;
} }
/* /*
* There must be no failure case after here, since rebinding * There must be no failure case after here, since rebinding
...@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
cred = override_creds(&init_cred); cred = override_creds(&init_cred);
cgroup_populate_dir(root_cgrp); cgroup_populate_dir(root_cgrp);
revert_creds(cred); revert_creds(cred);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
} else { } else {
...@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, ...@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
kfree(opts.name); kfree(opts.name);
return dget(sb->s_root); return dget(sb->s_root);
unlock_drop:
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
drop_new_super: drop_new_super:
deactivate_locked_super(sb); deactivate_locked_super(sb);
drop_modules: drop_modules:
...@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { ...@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
BUG_ON(!list_empty(&cgrp->sibling)); BUG_ON(!list_empty(&cgrp->sibling));
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
/* Rebind all subsystems back to the default hierarchy */ /* Rebind all subsystems back to the default hierarchy */
ret = rebind_subsystems(root, 0); ret = rebind_subsystems(root, 0);
...@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) { ...@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
root_count--; root_count--;
} }
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
kill_litter_super(sb); kill_litter_super(sb);
...@@ -2311,7 +2329,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, ...@@ -2311,7 +2329,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
return -EINVAL; return -EINVAL;
if (!cgroup_lock_live_group(cgrp)) if (!cgroup_lock_live_group(cgrp))
return -ENODEV; return -ENODEV;
mutex_lock(&cgroup_root_mutex);
strcpy(cgrp->root->release_agent_path, buffer); strcpy(cgrp->root->release_agent_path, buffer);
mutex_unlock(&cgroup_root_mutex);
cgroup_unlock(); cgroup_unlock();
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment