Commit b12cea91 authored by Al Viro's avatar Al Viro

change the locking order for namespace_sem

Have it nested inside ->i_mutex.  Instead of using follow_down()
under namespace_sem, followed by grabbing i_mutex and checking that
mountpoint to be is not dead, do the following:
	grab i_mutex
	check that it's not dead
	grab namespace_sem
	see if anything is mounted there
	if not, we've won
	otherwise
		drop locks
		put_path on what we had
		replace with what's mounted
		retry everything with new mountpoint to be

New helper (lock_mount()) does that.  do_add_mount(), do_move_mount(),
do_loopback() and pivot_root() switched to it; in case of the last
two that eliminates a race we used to have - original code didn't
do follow_down().
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 27cb1572
...@@ -1663,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, ...@@ -1663,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
return err; return err;
} }
static int lock_mount(struct path *path)
{
struct vfsmount *mnt;
retry:
mutex_lock(&path->dentry->d_inode->i_mutex);
if (unlikely(cant_mount(path->dentry))) {
mutex_unlock(&path->dentry->d_inode->i_mutex);
return -ENOENT;
}
down_write(&namespace_sem);
mnt = lookup_mnt(path);
if (likely(!mnt))
return 0;
up_write(&namespace_sem);
mutex_unlock(&path->dentry->d_inode->i_mutex);
path_put(path);
path->mnt = mnt;
path->dentry = dget(mnt->mnt_root);
goto retry;
}
static void unlock_mount(struct path *path)
{
up_write(&namespace_sem);
mutex_unlock(&path->dentry->d_inode->i_mutex);
}
static int graft_tree(struct vfsmount *mnt, struct path *path) static int graft_tree(struct vfsmount *mnt, struct path *path)
{ {
int err;
if (mnt->mnt_sb->s_flags & MS_NOUSER) if (mnt->mnt_sb->s_flags & MS_NOUSER)
return -EINVAL; return -EINVAL;
...@@ -1673,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) ...@@ -1673,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
S_ISDIR(mnt->mnt_root->d_inode->i_mode)) S_ISDIR(mnt->mnt_root->d_inode->i_mode))
return -ENOTDIR; return -ENOTDIR;
err = -ENOENT; if (d_unlinked(path->dentry))
mutex_lock(&path->dentry->d_inode->i_mutex); return -ENOENT;
if (cant_mount(path->dentry))
goto out_unlock;
if (!d_unlinked(path->dentry)) return attach_recursive_mnt(mnt, path, NULL);
err = attach_recursive_mnt(mnt, path, NULL);
out_unlock:
mutex_unlock(&path->dentry->d_inode->i_mutex);
return err;
} }
/* /*
...@@ -1745,6 +1765,7 @@ static int do_change_type(struct path *path, int flag) ...@@ -1745,6 +1765,7 @@ static int do_change_type(struct path *path, int flag)
static int do_loopback(struct path *path, char *old_name, static int do_loopback(struct path *path, char *old_name,
int recurse) int recurse)
{ {
LIST_HEAD(umount_list);
struct path old_path; struct path old_path;
struct vfsmount *mnt = NULL; struct vfsmount *mnt = NULL;
int err = mount_is_safe(path); int err = mount_is_safe(path);
...@@ -1756,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name, ...@@ -1756,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name,
if (err) if (err)
return err; return err;
down_write(&namespace_sem); err = lock_mount(path);
if (err)
goto out;
err = -EINVAL; err = -EINVAL;
if (IS_MNT_UNBINDABLE(old_path.mnt)) if (IS_MNT_UNBINDABLE(old_path.mnt))
goto out; goto out2;
if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
goto out; goto out2;
err = -ENOMEM; err = -ENOMEM;
if (recurse) if (recurse)
...@@ -1771,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name, ...@@ -1771,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name,
mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
if (!mnt) if (!mnt)
goto out; goto out2;
err = graft_tree(mnt, path); err = graft_tree(mnt, path);
if (err) { if (err) {
LIST_HEAD(umount_list);
br_write_lock(vfsmount_lock); br_write_lock(vfsmount_lock);
umount_tree(mnt, 0, &umount_list); umount_tree(mnt, 0, &umount_list);
br_write_unlock(vfsmount_lock); br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
} }
out2:
unlock_mount(path);
release_mounts(&umount_list);
out: out:
up_write(&namespace_sem);
path_put(&old_path); path_put(&old_path);
return err; return err;
} }
...@@ -1873,18 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name) ...@@ -1873,18 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name)
if (err) if (err)
return err; return err;
down_write(&namespace_sem); err = lock_mount(path);
err = follow_down(path, true);
if (err < 0) if (err < 0)
goto out; goto out;
err = -EINVAL; err = -EINVAL;
if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
goto out;
err = -ENOENT;
mutex_lock(&path->dentry->d_inode->i_mutex);
if (cant_mount(path->dentry))
goto out1; goto out1;
if (d_unlinked(path->dentry)) if (d_unlinked(path->dentry))
...@@ -1926,9 +1942,8 @@ static int do_move_mount(struct path *path, char *old_name) ...@@ -1926,9 +1942,8 @@ static int do_move_mount(struct path *path, char *old_name)
* automatically */ * automatically */
list_del_init(&old_path.mnt->mnt_expire); list_del_init(&old_path.mnt->mnt_expire);
out1: out1:
mutex_unlock(&path->dentry->d_inode->i_mutex); unlock_mount(path);
out: out:
up_write(&namespace_sem);
if (!err) if (!err)
path_put(&parent_path); path_put(&parent_path);
path_put(&old_path); path_put(&old_path);
...@@ -1983,11 +1998,9 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag ...@@ -1983,11 +1998,9 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag
mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
down_write(&namespace_sem); err = lock_mount(path);
/* Something was mounted here while we slept */ if (err)
err = follow_down(path, true); return err;
if (err < 0)
goto unlock;
err = -EINVAL; err = -EINVAL;
if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
...@@ -2007,7 +2020,7 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag ...@@ -2007,7 +2020,7 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag
err = graft_tree(newmnt, path); err = graft_tree(newmnt, path);
unlock: unlock:
up_write(&namespace_sem); unlock_mount(path);
return err; return err;
} }
...@@ -2575,55 +2588,53 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, ...@@ -2575,55 +2588,53 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out1; goto out1;
error = security_sb_pivotroot(&old, &new); error = security_sb_pivotroot(&old, &new);
if (error) { if (error)
path_put(&old); goto out2;
goto out1;
}
get_fs_root(current->fs, &root); get_fs_root(current->fs, &root);
down_write(&namespace_sem); error = lock_mount(&old);
mutex_lock(&old.dentry->d_inode->i_mutex); if (error)
goto out3;
error = -EINVAL; error = -EINVAL;
if (IS_MNT_SHARED(old.mnt) || if (IS_MNT_SHARED(old.mnt) ||
IS_MNT_SHARED(new.mnt->mnt_parent) || IS_MNT_SHARED(new.mnt->mnt_parent) ||
IS_MNT_SHARED(root.mnt->mnt_parent)) IS_MNT_SHARED(root.mnt->mnt_parent))
goto out2; goto out4;
if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
goto out2; goto out4;
error = -ENOENT; error = -ENOENT;
if (cant_mount(old.dentry))
goto out2;
if (d_unlinked(new.dentry)) if (d_unlinked(new.dentry))
goto out2; goto out4;
if (d_unlinked(old.dentry)) if (d_unlinked(old.dentry))
goto out2; goto out4;
error = -EBUSY; error = -EBUSY;
if (new.mnt == root.mnt || if (new.mnt == root.mnt ||
old.mnt == root.mnt) old.mnt == root.mnt)
goto out2; /* loop, on the same file system */ goto out4; /* loop, on the same file system */
error = -EINVAL; error = -EINVAL;
if (root.mnt->mnt_root != root.dentry) if (root.mnt->mnt_root != root.dentry)
goto out2; /* not a mountpoint */ goto out4; /* not a mountpoint */
if (root.mnt->mnt_parent == root.mnt) if (root.mnt->mnt_parent == root.mnt)
goto out2; /* not attached */ goto out4; /* not attached */
if (new.mnt->mnt_root != new.dentry) if (new.mnt->mnt_root != new.dentry)
goto out2; /* not a mountpoint */ goto out4; /* not a mountpoint */
if (new.mnt->mnt_parent == new.mnt) if (new.mnt->mnt_parent == new.mnt)
goto out2; /* not attached */ goto out4; /* not attached */
/* make sure we can reach put_old from new_root */ /* make sure we can reach put_old from new_root */
tmp = old.mnt; tmp = old.mnt;
if (tmp != new.mnt) { if (tmp != new.mnt) {
for (;;) { for (;;) {
if (tmp->mnt_parent == tmp) if (tmp->mnt_parent == tmp)
goto out2; /* already mounted on put_old */ goto out4; /* already mounted on put_old */
if (tmp->mnt_parent == new.mnt) if (tmp->mnt_parent == new.mnt)
break; break;
tmp = tmp->mnt_parent; tmp = tmp->mnt_parent;
} }
if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
goto out2; goto out4;
} else if (!is_subdir(old.dentry, new.dentry)) } else if (!is_subdir(old.dentry, new.dentry))
goto out2; goto out4;
br_write_lock(vfsmount_lock); br_write_lock(vfsmount_lock);
detach_mnt(new.mnt, &parent_path); detach_mnt(new.mnt, &parent_path);
detach_mnt(root.mnt, &root_parent); detach_mnt(root.mnt, &root_parent);
...@@ -2634,14 +2645,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, ...@@ -2634,14 +2645,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
touch_mnt_namespace(current->nsproxy->mnt_ns); touch_mnt_namespace(current->nsproxy->mnt_ns);
br_write_unlock(vfsmount_lock); br_write_unlock(vfsmount_lock);
chroot_fs_refs(&root, &new); chroot_fs_refs(&root, &new);
error = 0; error = 0;
path_put(&root_parent); out4:
path_put(&parent_path); unlock_mount(&old);
out2: if (!error) {
mutex_unlock(&old.dentry->d_inode->i_mutex); path_put(&root_parent);
up_write(&namespace_sem); path_put(&parent_path);
}
out3:
path_put(&root); path_put(&root);
out2:
path_put(&old); path_put(&old);
out1: out1:
path_put(&new); path_put(&new);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment