Commit c7c4591d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace changes from Eric Biederman:
 "This is an assorted mishmash of small cleanups, enhancements and bug
  fixes.

  The major theme is user namespace mount restrictions.  nsown_capable
  is killed as it encourages not thinking about details that need to be
  considered.  A very hard to hit pid namespace exiting bug was finally
  tracked and fixed.  A couple of cleanups to the basic namespace
  infrastructure.

  Finally there is an enhancement that makes per user namespace
  capabilities usable as capabilities, and an enhancement that allows
  the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns:  Kill nsown_capable it makes the wrong thing easy
  capabilities: allow nice if we are privileged
  pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
  userns: Allow PR_CAPBSET_DROP in a user namespace.
  namespaces: Simplify copy_namespaces so it is clear what is going on.
  pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
  sysfs: Restrict mounting sysfs
  userns: Better restrictions on when proc and sysfs can be mounted
  vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
  kernel/nsproxy.c: Improving a snippet of code.
  proc: Restrict mounting the proc filesystem
  vfs: Lock in place mounts from more privileged users
parents 11c7b03d c7b96acf
...@@ -831,6 +831,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, ...@@ -831,6 +831,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
/* Don't allow unprivileged users to reveal what is under a mount */
if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
mnt->mnt.mnt_flags |= MNT_LOCKED;
atomic_inc(&sb->s_active); atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root); mnt->mnt.mnt_root = dget(root);
...@@ -1327,6 +1331,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) ...@@ -1327,6 +1331,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out; goto dput_and_out;
if (!check_mnt(mnt)) if (!check_mnt(mnt))
goto dput_and_out; goto dput_and_out;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto dput_and_out;
retval = do_umount(mnt, flags); retval = do_umount(mnt, flags);
dput_and_out: dput_and_out:
...@@ -1349,14 +1355,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) ...@@ -1349,14 +1355,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
#endif #endif
static bool mnt_ns_loop(struct path *path) static bool is_mnt_ns_file(struct dentry *dentry)
{ {
/* Could bind mounting the mount namespace inode cause a /* Is this a proxy for a mount namespace? */
* mount namespace loop? struct inode *inode = dentry->d_inode;
*/
struct inode *inode = path->dentry->d_inode;
struct proc_ns *ei; struct proc_ns *ei;
struct mnt_namespace *mnt_ns;
if (!proc_ns_inode(inode)) if (!proc_ns_inode(inode))
return false; return false;
...@@ -1365,7 +1368,19 @@ static bool mnt_ns_loop(struct path *path) ...@@ -1365,7 +1368,19 @@ static bool mnt_ns_loop(struct path *path)
if (ei->ns_ops != &mntns_operations) if (ei->ns_ops != &mntns_operations)
return false; return false;
mnt_ns = ei->ns; return true;
}
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct mnt_namespace *mnt_ns;
if (!is_mnt_ns_file(dentry))
return false;
mnt_ns = get_proc_ns(dentry->d_inode)->ns;
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
} }
...@@ -1374,13 +1389,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, ...@@ -1374,13 +1389,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
{ {
struct mount *res, *p, *q, *r, *parent; struct mount *res, *p, *q, *r, *parent;
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
return ERR_PTR(-EINVAL);
if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
res = q = clone_mnt(mnt, dentry, flag); res = q = clone_mnt(mnt, dentry, flag);
if (IS_ERR(q)) if (IS_ERR(q))
return q; return q;
q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint; q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt; p = mnt;
...@@ -1390,7 +1409,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, ...@@ -1390,7 +1409,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue; continue;
for (s = r; s; s = next_mnt(s, r)) { for (s = r; s; s = next_mnt(s, r)) {
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) { if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
continue;
}
if (!(flag & CL_COPY_MNT_NS_FILE) &&
is_mnt_ns_file(s->mnt.mnt_root)) {
s = skip_mnt_tree(s); s = skip_mnt_tree(s);
continue; continue;
} }
...@@ -1696,6 +1721,19 @@ static int do_change_type(struct path *path, int flag) ...@@ -1696,6 +1721,19 @@ static int do_change_type(struct path *path, int flag)
return err; return err;
} }
static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
if (!is_subdir(child->mnt_mountpoint, dentry))
continue;
if (child->mnt.mnt_flags & MNT_LOCKED)
return true;
}
return false;
}
/* /*
* do loopback mount. * do loopback mount.
*/ */
...@@ -1713,7 +1751,7 @@ static int do_loopback(struct path *path, const char *old_name, ...@@ -1713,7 +1751,7 @@ static int do_loopback(struct path *path, const char *old_name,
return err; return err;
err = -EINVAL; err = -EINVAL;
if (mnt_ns_loop(&old_path)) if (mnt_ns_loop(old_path.dentry))
goto out; goto out;
mp = lock_mount(path); mp = lock_mount(path);
...@@ -1731,8 +1769,11 @@ static int do_loopback(struct path *path, const char *old_name, ...@@ -1731,8 +1769,11 @@ static int do_loopback(struct path *path, const char *old_name,
if (!check_mnt(parent) || !check_mnt(old)) if (!check_mnt(parent) || !check_mnt(old))
goto out2; goto out2;
if (!recurse && has_locked_children(old, old_path.dentry))
goto out2;
if (recurse) if (recurse)
mnt = copy_tree(old, old_path.dentry, 0); mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
else else
mnt = clone_mnt(old, old_path.dentry, 0); mnt = clone_mnt(old, old_path.dentry, 0);
...@@ -1741,6 +1782,8 @@ static int do_loopback(struct path *path, const char *old_name, ...@@ -1741,6 +1782,8 @@ static int do_loopback(struct path *path, const char *old_name,
goto out2; goto out2;
} }
mnt->mnt.mnt_flags &= ~MNT_LOCKED;
err = graft_tree(mnt, parent, mp); err = graft_tree(mnt, parent, mp);
if (err) { if (err) {
br_write_lock(&vfsmount_lock); br_write_lock(&vfsmount_lock);
...@@ -1853,6 +1896,9 @@ static int do_move_mount(struct path *path, const char *old_name) ...@@ -1853,6 +1896,9 @@ static int do_move_mount(struct path *path, const char *old_name)
if (!check_mnt(p) || !check_mnt(old)) if (!check_mnt(p) || !check_mnt(old))
goto out1; goto out1;
if (old->mnt.mnt_flags & MNT_LOCKED)
goto out1;
err = -EINVAL; err = -EINVAL;
if (old_path.dentry != old_path.mnt->mnt_root) if (old_path.dentry != old_path.mnt->mnt_root)
goto out1; goto out1;
...@@ -2389,7 +2435,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, ...@@ -2389,7 +2435,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
namespace_lock(); namespace_lock();
/* First pass: copy the tree topology */ /* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE; copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns) if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags); new = copy_tree(old, old->mnt.mnt_root, copy_flags);
...@@ -2424,6 +2470,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, ...@@ -2424,6 +2470,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
} }
p = next_mnt(p, old); p = next_mnt(p, old);
q = next_mnt(q, new); q = next_mnt(q, new);
if (!q)
break;
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(p, old);
} }
namespace_unlock(); namespace_unlock();
...@@ -2630,6 +2680,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, ...@@ -2630,6 +2680,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out4; goto out4;
if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
goto out4; goto out4;
if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
goto out4;
error = -ENOENT; error = -ENOENT;
if (d_unlinked(new.dentry)) if (d_unlinked(new.dentry))
goto out4; goto out4;
...@@ -2653,6 +2705,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, ...@@ -2653,6 +2705,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
br_write_lock(&vfsmount_lock); br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path); detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent); detach_mnt(root_mnt, &root_parent);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
new_mnt->mnt.mnt_flags |= MNT_LOCKED;
root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
}
/* mount old root on put_old */ /* mount old root on put_old */
attach_mnt(root_mnt, old_mnt, old_mp); attach_mnt(root_mnt, old_mnt, old_mp);
/* mount new_root on / */ /* mount new_root on / */
...@@ -2811,25 +2867,38 @@ bool current_chrooted(void) ...@@ -2811,25 +2867,38 @@ bool current_chrooted(void)
return chrooted; return chrooted;
} }
void update_mnt_policy(struct user_namespace *userns) bool fs_fully_visible(struct file_system_type *type)
{ {
struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt; struct mount *mnt;
bool visible = false;
down_read(&namespace_sem); if (unlikely(!ns))
return false;
namespace_lock();
list_for_each_entry(mnt, &ns->list, mnt_list) { list_for_each_entry(mnt, &ns->list, mnt_list) {
switch (mnt->mnt.mnt_sb->s_magic) { struct mount *child;
case SYSFS_MAGIC: if (mnt->mnt.mnt_sb->s_type != type)
userns->may_mount_sysfs = true; continue;
break;
case PROC_SUPER_MAGIC: /* This mount is not fully visible if there are any child mounts
userns->may_mount_proc = true; * that cover anything except for empty directories.
break; */
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
if (!S_ISDIR(inode->i_mode))
goto next;
if (inode->i_nlink != 2)
goto next;
} }
if (userns->may_mount_sysfs && userns->may_mount_proc) visible = true;
break; goto found;
next: ;
} }
up_read(&namespace_sem); found:
namespace_unlock();
return visible;
} }
static void *mntns_get(struct task_struct *task) static void *mntns_get(struct task_struct *task)
...@@ -2860,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns) ...@@ -2860,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
struct path root; struct path root;
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_CHROOT) || !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
!nsown_capable(CAP_SYS_ADMIN)) !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (fs->users != 1) if (fs->users != 1)
......
...@@ -443,7 +443,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) ...@@ -443,7 +443,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
goto dput_and_out; goto dput_and_out;
error = -EPERM; error = -EPERM;
if (!nsown_capable(CAP_SYS_CHROOT)) if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
goto dput_and_out; goto dput_and_out;
error = security_path_chroot(&path); error = security_path_chroot(&path);
if (error) if (error)
......
...@@ -19,11 +19,14 @@ ...@@ -19,11 +19,14 @@
#define CL_EXPIRE 0x01 #define CL_EXPIRE 0x01
#define CL_SLAVE 0x02 #define CL_SLAVE 0x02
#define CL_COPY_ALL 0x04 #define CL_COPY_UNBINDABLE 0x04
#define CL_MAKE_SHARED 0x08 #define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10 #define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20 #define CL_SHARED_TO_SLAVE 0x20
#define CL_UNPRIVILEGED 0x40 #define CL_UNPRIVILEGED 0x40
#define CL_COPY_MNT_NS_FILE 0x80
#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
static inline void set_mnt_shared(struct mount *mnt) static inline void set_mnt_shared(struct mount *mnt)
{ {
......
...@@ -110,7 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ...@@ -110,7 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
ns = task_active_pid_ns(current); ns = task_active_pid_ns(current);
options = data; options = data;
if (!current_user_ns()->may_mount_proc) if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);
/* Does the mounter have privilege over the pid namespace? */
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
} }
......
...@@ -112,9 +112,16 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ...@@ -112,9 +112,16 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
struct super_block *sb; struct super_block *sb;
int error; int error;
if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) if (!(flags & MS_KERNMOUNT)) {
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
if (!kobj_ns_current_may_mount(type))
return ERR_PTR(-EPERM);
}
}
info = kzalloc(sizeof(*info), GFP_KERNEL); info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) if (!info)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -210,7 +210,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, ...@@ -210,7 +210,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
struct user_namespace *ns, int cap); struct user_namespace *ns, int cap);
extern bool capable(int cap); extern bool capable(int cap);
extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable(struct user_namespace *ns, int cap);
extern bool nsown_capable(int cap);
extern bool inode_capable(const struct inode *inode, int cap); extern bool inode_capable(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
......
...@@ -1900,6 +1900,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *); ...@@ -1900,6 +1900,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
extern int freeze_super(struct super_block *super); extern int freeze_super(struct super_block *super);
extern int thaw_super(struct super_block *super); extern int thaw_super(struct super_block *super);
extern bool our_mnt(struct vfsmount *mnt); extern bool our_mnt(struct vfsmount *mnt);
extern bool fs_fully_visible(struct file_system_type *);
extern int current_umask(void); extern int current_umask(void);
......
...@@ -39,6 +39,7 @@ enum kobj_ns_type { ...@@ -39,6 +39,7 @@ enum kobj_ns_type {
*/ */
struct kobj_ns_type_operations { struct kobj_ns_type_operations {
enum kobj_ns_type type; enum kobj_ns_type type;
bool (*current_may_mount)(void);
void *(*grab_current_ns)(void); void *(*grab_current_ns)(void);
const void *(*netlink_ns)(struct sock *sk); const void *(*netlink_ns)(struct sock *sk);
const void *(*initial_ns)(void); const void *(*initial_ns)(void);
...@@ -50,6 +51,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type); ...@@ -50,6 +51,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
bool kobj_ns_current_may_mount(enum kobj_ns_type type);
void *kobj_ns_grab_current(enum kobj_ns_type type); void *kobj_ns_grab_current(enum kobj_ns_type type);
const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
const void *kobj_ns_initial(enum kobj_ns_type type); const void *kobj_ns_initial(enum kobj_ns_type type);
......
...@@ -48,6 +48,7 @@ struct mnt_namespace; ...@@ -48,6 +48,7 @@ struct mnt_namespace;
#define MNT_INTERNAL 0x4000 #define MNT_INTERNAL 0x4000
#define MNT_LOCK_READONLY 0x400000 #define MNT_LOCK_READONLY 0x400000
#define MNT_LOCKED 0x800000
struct vfsmount { struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */ struct dentry *mnt_root; /* root of the mounted tree */
......
...@@ -27,8 +27,6 @@ struct user_namespace { ...@@ -27,8 +27,6 @@ struct user_namespace {
kuid_t owner; kuid_t owner;
kgid_t group; kgid_t group;
unsigned int proc_inum; unsigned int proc_inum;
bool may_mount_sysfs;
bool may_mount_proc;
}; };
extern struct user_namespace init_user_ns; extern struct user_namespace init_user_ns;
...@@ -85,6 +83,4 @@ static inline void put_user_ns(struct user_namespace *ns) ...@@ -85,6 +83,4 @@ static inline void put_user_ns(struct user_namespace *ns)
#endif #endif
void update_mnt_policy(struct user_namespace *userns);
#endif /* _LINUX_USER_H */ #endif /* _LINUX_USER_H */
...@@ -171,7 +171,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new) ...@@ -171,7 +171,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
{ {
struct ipc_namespace *ns = new; struct ipc_namespace *ns = new;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN)) !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
/* Ditch state from the old ipc namespace */ /* Ditch state from the old ipc namespace */
......
...@@ -432,18 +432,6 @@ bool capable(int cap) ...@@ -432,18 +432,6 @@ bool capable(int cap)
} }
EXPORT_SYMBOL(capable); EXPORT_SYMBOL(capable);
/**
* nsown_capable - Check superior capability to one's own user_ns
* @cap: The capability in question
*
* Return true if the current task has the given superior capability
* targeted at its own user namespace.
*/
bool nsown_capable(int cap)
{
return ns_capable(current_user_ns(), cap);
}
/** /**
* inode_capable - Check superior capability over inode * inode_capable - Check superior capability over inode
* @inode: The inode in question * @inode: The inode in question
......
...@@ -1824,11 +1824,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) ...@@ -1824,11 +1824,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
*/ */
if (unshare_flags & CLONE_NEWUSER) if (unshare_flags & CLONE_NEWUSER)
unshare_flags |= CLONE_THREAD | CLONE_FS; unshare_flags |= CLONE_THREAD | CLONE_FS;
/*
* If unsharing a pid namespace must also unshare the thread.
*/
if (unshare_flags & CLONE_NEWPID)
unshare_flags |= CLONE_THREAD;
/* /*
* If unsharing a thread from a thread group, must also unshare vm. * If unsharing a thread from a thread group, must also unshare vm.
*/ */
......
...@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) ...@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
struct group_info *group_info; struct group_info *group_info;
int retval; int retval;
if (!nsown_capable(CAP_SETGID)) if (!ns_capable(current_user_ns(), CAP_SETGID))
return -EPERM; return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX) if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL; return -EINVAL;
......
...@@ -126,22 +126,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) ...@@ -126,22 +126,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy; struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns; struct nsproxy *new_ns;
int err = 0;
if (!old_ns)
return 0;
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET)))) {
get_nsproxy(old_ns); get_nsproxy(old_ns);
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET)))
return 0; return 0;
if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
err = -EPERM;
goto out;
} }
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
/* /*
* CLONE_NEWIPC must detach from the undolist: after switching * CLONE_NEWIPC must detach from the undolist: after switching
* to a new ipc namespace, the semaphore arrays from the old * to a new ipc namespace, the semaphore arrays from the old
...@@ -149,22 +143,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) ...@@ -149,22 +143,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
* means share undolist with parent, so we must forbid using * means share undolist with parent, so we must forbid using
* it along with CLONE_NEWIPC. * it along with CLONE_NEWIPC.
*/ */
if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
err = -EINVAL; (CLONE_NEWIPC | CLONE_SYSVSEM))
goto out; return -EINVAL;
}
new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
if (IS_ERR(new_ns)) { if (IS_ERR(new_ns))
err = PTR_ERR(new_ns); return PTR_ERR(new_ns);
goto out;
}
tsk->nsproxy = new_ns; tsk->nsproxy = new_ns;
return 0;
out:
put_nsproxy(old_ns);
return err;
} }
void free_nsproxy(struct nsproxy *ns) void free_nsproxy(struct nsproxy *ns)
......
...@@ -265,6 +265,7 @@ void free_pid(struct pid *pid) ...@@ -265,6 +265,7 @@ void free_pid(struct pid *pid)
struct pid_namespace *ns = upid->ns; struct pid_namespace *ns = upid->ns;
hlist_del_rcu(&upid->pid_chain); hlist_del_rcu(&upid->pid_chain);
switch(--ns->nr_hashed) { switch(--ns->nr_hashed) {
case 2:
case 1: case 1:
/* When all that is left in the pid namespace /* When all that is left in the pid namespace
* is the reaper wake up the reaper. The reaper * is the reaper wake up the reaper. The reaper
......
...@@ -329,7 +329,7 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) ...@@ -329,7 +329,7 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
struct pid_namespace *ancestor, *new = ns; struct pid_namespace *ancestor, *new = ns;
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN)) !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
/* /*
......
...@@ -337,7 +337,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) ...@@ -337,7 +337,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
if (rgid != (gid_t) -1) { if (rgid != (gid_t) -1) {
if (gid_eq(old->gid, krgid) || if (gid_eq(old->gid, krgid) ||
gid_eq(old->egid, krgid) || gid_eq(old->egid, krgid) ||
nsown_capable(CAP_SETGID)) ns_capable(old->user_ns, CAP_SETGID))
new->gid = krgid; new->gid = krgid;
else else
goto error; goto error;
...@@ -346,7 +346,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) ...@@ -346,7 +346,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
if (gid_eq(old->gid, kegid) || if (gid_eq(old->gid, kegid) ||
gid_eq(old->egid, kegid) || gid_eq(old->egid, kegid) ||
gid_eq(old->sgid, kegid) || gid_eq(old->sgid, kegid) ||
nsown_capable(CAP_SETGID)) ns_capable(old->user_ns, CAP_SETGID))
new->egid = kegid; new->egid = kegid;
else else
goto error; goto error;
...@@ -387,7 +387,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) ...@@ -387,7 +387,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
old = current_cred(); old = current_cred();
retval = -EPERM; retval = -EPERM;
if (nsown_capable(CAP_SETGID)) if (ns_capable(old->user_ns, CAP_SETGID))
new->gid = new->egid = new->sgid = new->fsgid = kgid; new->gid = new->egid = new->sgid = new->fsgid = kgid;
else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
new->egid = new->fsgid = kgid; new->egid = new->fsgid = kgid;
...@@ -471,7 +471,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) ...@@ -471,7 +471,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
new->uid = kruid; new->uid = kruid;
if (!uid_eq(old->uid, kruid) && if (!uid_eq(old->uid, kruid) &&
!uid_eq(old->euid, kruid) && !uid_eq(old->euid, kruid) &&
!nsown_capable(CAP_SETUID)) !ns_capable(old->user_ns, CAP_SETUID))
goto error; goto error;
} }
...@@ -480,7 +480,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) ...@@ -480,7 +480,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
if (!uid_eq(old->uid, keuid) && if (!uid_eq(old->uid, keuid) &&
!uid_eq(old->euid, keuid) && !uid_eq(old->euid, keuid) &&
!uid_eq(old->suid, keuid) && !uid_eq(old->suid, keuid) &&
!nsown_capable(CAP_SETUID)) !ns_capable(old->user_ns, CAP_SETUID))
goto error; goto error;
} }
...@@ -534,7 +534,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) ...@@ -534,7 +534,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
old = current_cred(); old = current_cred();
retval = -EPERM; retval = -EPERM;
if (nsown_capable(CAP_SETUID)) { if (ns_capable(old->user_ns, CAP_SETUID)) {
new->suid = new->uid = kuid; new->suid = new->uid = kuid;
if (!uid_eq(kuid, old->uid)) { if (!uid_eq(kuid, old->uid)) {
retval = set_user(new); retval = set_user(new);
...@@ -591,7 +591,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) ...@@ -591,7 +591,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
old = current_cred(); old = current_cred();
retval = -EPERM; retval = -EPERM;
if (!nsown_capable(CAP_SETUID)) { if (!ns_capable(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
goto error; goto error;
...@@ -673,7 +673,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) ...@@ -673,7 +673,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
old = current_cred(); old = current_cred();
retval = -EPERM; retval = -EPERM;
if (!nsown_capable(CAP_SETGID)) { if (!ns_capable(old->user_ns, CAP_SETGID)) {
if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
!gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
goto error; goto error;
...@@ -744,7 +744,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) ...@@ -744,7 +744,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
nsown_capable(CAP_SETUID)) { ns_capable(old->user_ns, CAP_SETUID)) {
if (!uid_eq(kuid, old->fsuid)) { if (!uid_eq(kuid, old->fsuid)) {
new->fsuid = kuid; new->fsuid = kuid;
if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
...@@ -783,7 +783,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) ...@@ -783,7 +783,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) ||
gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
nsown_capable(CAP_SETGID)) { ns_capable(old->user_ns, CAP_SETGID)) {
if (!gid_eq(kgid, old->fsgid)) { if (!gid_eq(kgid, old->fsgid)) {
new->fsgid = kgid; new->fsgid = kgid;
goto change_okay; goto change_okay;
......
...@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) ...@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
struct group_info *group_info; struct group_info *group_info;
int retval; int retval;
if (!nsown_capable(CAP_SETGID)) if (!ns_capable(current_user_ns(), CAP_SETGID))
return -EPERM; return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX) if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL; return -EINVAL;
......
...@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = { ...@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
.owner = GLOBAL_ROOT_UID, .owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID, .group = GLOBAL_ROOT_GID,
.proc_inum = PROC_USER_INIT_INO, .proc_inum = PROC_USER_INIT_INO,
.may_mount_sysfs = true,
.may_mount_proc = true,
}; };
EXPORT_SYMBOL_GPL(init_user_ns); EXPORT_SYMBOL_GPL(init_user_ns);
......
...@@ -101,8 +101,6 @@ int create_user_ns(struct cred *new) ...@@ -101,8 +101,6 @@ int create_user_ns(struct cred *new)
set_cred_user_ns(new, ns); set_cred_user_ns(new, ns);
update_mnt_policy(ns);
return 0; return 0;
} }
......
...@@ -114,7 +114,7 @@ static int utsns_install(struct nsproxy *nsproxy, void *new) ...@@ -114,7 +114,7 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
struct uts_namespace *ns = new; struct uts_namespace *ns = new;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN)) !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
get_uts_ns(ns); get_uts_ns(ns);
......
...@@ -931,6 +931,21 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) ...@@ -931,6 +931,21 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
return kobj_child_ns_ops(kobj->parent); return kobj_child_ns_ops(kobj->parent);
} }
bool kobj_ns_current_may_mount(enum kobj_ns_type type)
{
bool may_mount = false;
if (type == KOBJ_NS_TYPE_NONE)
return true;
spin_lock(&kobj_ns_type_lock);
if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
kobj_ns_ops_tbl[type])
may_mount = kobj_ns_ops_tbl[type]->current_may_mount();
spin_unlock(&kobj_ns_type_lock);
return may_mount;
}
void *kobj_ns_grab_current(enum kobj_ns_type type) void *kobj_ns_grab_current(enum kobj_ns_type type)
{ {
......
...@@ -1196,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net) ...@@ -1196,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net)
#endif #endif
} }
static bool net_current_may_mount(void)
{
struct net *net = current->nsproxy->net_ns;
return ns_capable(net->user_ns, CAP_SYS_ADMIN);
}
static void *net_grab_current_ns(void) static void *net_grab_current_ns(void)
{ {
struct net *ns = current->nsproxy->net_ns; struct net *ns = current->nsproxy->net_ns;
...@@ -1218,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk) ...@@ -1218,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk)
struct kobj_ns_type_operations net_ns_type_operations = { struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET, .type = KOBJ_NS_TYPE_NET,
.current_may_mount = net_current_may_mount,
.grab_current_ns = net_grab_current_ns, .grab_current_ns = net_grab_current_ns,
.netlink_ns = net_netlink_ns, .netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns, .initial_ns = net_initial_ns,
......
...@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) ...@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
struct net *net = ns; struct net *net = ns;
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN)) !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
put_net(nsproxy->net_ns); put_net(nsproxy->net_ns);
......
...@@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds) ...@@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if ((creds->pid == task_tgid_vnr(current) || if ((creds->pid == task_tgid_vnr(current) ||
ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
return 0; return 0;
} }
return -EPERM; return -EPERM;
......
...@@ -768,16 +768,16 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) ...@@ -768,16 +768,16 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
*/ */
static int cap_safe_nice(struct task_struct *p) static int cap_safe_nice(struct task_struct *p)
{ {
int is_subset; int is_subset, ret = 0;
rcu_read_lock(); rcu_read_lock();
is_subset = cap_issubset(__task_cred(p)->cap_permitted, is_subset = cap_issubset(__task_cred(p)->cap_permitted,
current_cred()->cap_permitted); current_cred()->cap_permitted);
if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
ret = -EPERM;
rcu_read_unlock(); rcu_read_unlock();
if (!is_subset && !capable(CAP_SYS_NICE)) return ret;
return -EPERM;
return 0;
} }
/** /**
...@@ -824,7 +824,7 @@ int cap_task_setnice(struct task_struct *p, int nice) ...@@ -824,7 +824,7 @@ int cap_task_setnice(struct task_struct *p, int nice)
*/ */
static long cap_prctl_drop(struct cred *new, unsigned long cap) static long cap_prctl_drop(struct cred *new, unsigned long cap)
{ {
if (!capable(CAP_SETPCAP)) if (!ns_capable(current_user_ns(), CAP_SETPCAP))
return -EPERM; return -EPERM;
if (!cap_valid(cap)) if (!cap_valid(cap))
return -EINVAL; return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment