Commit 2c3de1c2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull userns fixes from Eric W Biederman:
 "The bulk of the changes are fixing the worst consequences of the user
  namespace design oversight in not considering what happens when one
  namespace starts off as a clone of another namespace, as happens with
  the mount namespace.

  The rest of the changes are just plain bug fixes.

  Many thanks to Andy Lutomirski for pointing out many of these issues."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns: Restrict when proc and sysfs can be mounted
  ipc: Restrict mounting the mqueue filesystem
  vfs: Carefully propogate mounts across user namespaces
  vfs: Add a mount flag to lock read only bind mounts
  userns:  Don't allow creation if the user is chrooted
  yama:  Better permission check for ptraceme
  pid: Handle the exit of a multi-threaded init.
  scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids.
parents 90641712 87a8ebd6
...@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, ...@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
} }
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
/* Don't allow unprivileged users to change mount flags */
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
atomic_inc(&sb->s_active); atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root); mnt->mnt.mnt_root = dget(root);
...@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) ...@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
if (readonly_request == __mnt_is_readonly(mnt)) if (readonly_request == __mnt_is_readonly(mnt))
return 0; return 0;
if (mnt->mnt_flags & MNT_LOCK_READONLY)
return -EPERM;
if (readonly_request) if (readonly_request)
error = mnt_make_readonly(real_mount(mnt)); error = mnt_make_readonly(real_mount(mnt));
else else
...@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, ...@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
/* First pass: copy the tree topology */ /* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE; copy_flags = CL_COPY_ALL | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns) if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE; copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags); new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) { if (IS_ERR(new)) {
up_write(&namespace_sem); up_write(&namespace_sem);
...@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt) ...@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
return check_mnt(real_mount(mnt)); return check_mnt(real_mount(mnt));
} }
bool current_chrooted(void)
{
/* Does the current process have a non-standard root */
struct path ns_root;
struct path fs_root;
bool chrooted;
/* Find the namespace root */
ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
ns_root.dentry = ns_root.mnt->mnt_root;
path_get(&ns_root);
while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
;
get_fs_root(current->fs, &fs_root);
chrooted = !path_equal(&fs_root, &ns_root);
path_put(&fs_root);
path_put(&ns_root);
return chrooted;
}
void update_mnt_policy(struct user_namespace *userns)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt;
down_read(&namespace_sem);
list_for_each_entry(mnt, &ns->list, mnt_list) {
switch (mnt->mnt.mnt_sb->s_magic) {
case SYSFS_MAGIC:
userns->may_mount_sysfs = true;
break;
case PROC_SUPER_MAGIC:
userns->may_mount_proc = true;
break;
}
if (userns->may_mount_sysfs && userns->may_mount_proc)
break;
}
up_read(&namespace_sem);
}
static void *mntns_get(struct task_struct *task) static void *mntns_get(struct task_struct *task)
{ {
struct mnt_namespace *ns = NULL; struct mnt_namespace *ns = NULL;
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/mnt_namespace.h> #include <linux/mnt_namespace.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/nsproxy.h>
#include "internal.h" #include "internal.h"
#include "pnode.h" #include "pnode.h"
...@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest, ...@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
struct mount *source_mnt, struct list_head *tree_list) struct mount *source_mnt, struct list_head *tree_list)
{ {
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct mount *m, *child; struct mount *m, *child;
int ret = 0; int ret = 0;
struct mount *prev_dest_mnt = dest_mnt; struct mount *prev_dest_mnt = dest_mnt;
...@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, ...@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
/* Notice when we are propagating across user namespaces */
if (m->mnt_ns->user_ns != user_ns)
type |= CL_UNPRIVILEGED;
child = copy_tree(source, source->mnt.mnt_root, type); child = copy_tree(source, source->mnt.mnt_root, type);
if (IS_ERR(child)) { if (IS_ERR(child)) {
ret = PTR_ERR(child); ret = PTR_ERR(child);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#define CL_MAKE_SHARED 0x08 #define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10 #define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20 #define CL_SHARED_TO_SLAVE 0x20
#define CL_UNPRIVILEGED 0x40
static inline void set_mnt_shared(struct mount *mnt) static inline void set_mnt_shared(struct mount *mnt)
{ {
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/user_namespace.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/parser.h> #include <linux/parser.h>
...@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ...@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
} else { } else {
ns = task_active_pid_ns(current); ns = task_active_pid_ns(current);
options = data; options = data;
if (!current_user_ns()->may_mount_proc)
return ERR_PTR(-EPERM);
} }
sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/user_namespace.h>
#include "sysfs.h" #include "sysfs.h"
...@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ...@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
struct super_block *sb; struct super_block *sb;
int error; int error;
if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
return ERR_PTR(-EPERM);
info = kzalloc(sizeof(*info), GFP_KERNEL); info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) if (!info)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
...@@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, ...@@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
spin_unlock(&fs->lock); spin_unlock(&fs->lock);
} }
extern bool current_chrooted(void);
#endif /* _LINUX_FS_STRUCT_H */ #endif /* _LINUX_FS_STRUCT_H */
...@@ -47,6 +47,8 @@ struct mnt_namespace; ...@@ -47,6 +47,8 @@ struct mnt_namespace;
#define MNT_INTERNAL 0x4000 #define MNT_INTERNAL 0x4000
#define MNT_LOCK_READONLY 0x400000
struct vfsmount { struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */ struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */ struct super_block *mnt_sb; /* pointer to superblock */
......
...@@ -26,6 +26,8 @@ struct user_namespace { ...@@ -26,6 +26,8 @@ struct user_namespace {
kuid_t owner; kuid_t owner;
kgid_t group; kgid_t group;
unsigned int proc_inum; unsigned int proc_inum;
bool may_mount_sysfs;
bool may_mount_proc;
}; };
extern struct user_namespace init_user_ns; extern struct user_namespace init_user_ns;
...@@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns) ...@@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns)
#endif #endif
void update_mnt_policy(struct user_namespace *userns);
#endif /* _LINUX_USER_H */ #endif /* _LINUX_USER_H */
...@@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, ...@@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, int flags, const char *dev_name,
void *data) void *data)
{ {
if (!(flags & MS_KERNMOUNT)) if (!(flags & MS_KERNMOUNT)) {
data = current->nsproxy->ipc_ns; struct ipc_namespace *ns = current->nsproxy->ipc_ns;
/* Don't allow mounting unless the caller has CAP_SYS_ADMIN
* over the ipc namespace.
*/
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
data = ns;
}
return mount_ns(fs_type, flags, data, mqueue_fill_super); return mount_ns(fs_type, flags, data, mqueue_fill_super);
} }
......
...@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) ...@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
int nr; int nr;
int rc; int rc;
struct task_struct *task, *me = current; struct task_struct *task, *me = current;
int init_pids = thread_group_leader(me) ? 1 : 2;
/* Don't allow any more processes into the pid namespace */ /* Don't allow any more processes into the pid namespace */
disable_pid_allocation(pid_ns); disable_pid_allocation(pid_ns);
...@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) ...@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
*/ */
for (;;) { for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
if (pid_ns->nr_hashed == 1) if (pid_ns->nr_hashed == init_pids)
break; break;
schedule(); schedule();
} }
......
...@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = { ...@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = {
.owner = GLOBAL_ROOT_UID, .owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID, .group = GLOBAL_ROOT_GID,
.proc_inum = PROC_USER_INIT_INO, .proc_inum = PROC_USER_INIT_INO,
.may_mount_sysfs = true,
.may_mount_proc = true,
}; };
EXPORT_SYMBOL_GPL(init_user_ns); EXPORT_SYMBOL_GPL(init_user_ns);
......
...@@ -61,6 +61,15 @@ int create_user_ns(struct cred *new) ...@@ -61,6 +61,15 @@ int create_user_ns(struct cred *new)
kgid_t group = new->egid; kgid_t group = new->egid;
int ret; int ret;
/*
* Verify that we can not violate the policy of which files
* may be accessed that is specified by the root directory,
* by verifing that the root directory is at the root of the
* mount namespace which allows all files to be accessed.
*/
if (current_chrooted())
return -EPERM;
/* The creator needs a mapping in the parent user namespace /* The creator needs a mapping in the parent user namespace
* or else we won't be able to reasonably tell userspace who * or else we won't be able to reasonably tell userspace who
* created a user_namespace. * created a user_namespace.
...@@ -87,6 +96,8 @@ int create_user_ns(struct cred *new) ...@@ -87,6 +96,8 @@ int create_user_ns(struct cred *new)
set_cred_user_ns(new, ns); set_cred_user_ns(new, ns);
update_mnt_policy(ns);
return 0; return 0;
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/pid_namespace.h>
#include <linux/pid.h> #include <linux/pid.h>
#include <linux/nsproxy.h> #include <linux/nsproxy.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) ...@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if (!uid_valid(uid) || !gid_valid(gid)) if (!uid_valid(uid) || !gid_valid(gid))
return -EINVAL; return -EINVAL;
if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && if ((creds->pid == task_tgid_vnr(current) ||
ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
......
...@@ -347,10 +347,8 @@ int yama_ptrace_traceme(struct task_struct *parent) ...@@ -347,10 +347,8 @@ int yama_ptrace_traceme(struct task_struct *parent)
/* Only disallow PTRACE_TRACEME on more aggressive settings. */ /* Only disallow PTRACE_TRACEME on more aggressive settings. */
switch (ptrace_scope) { switch (ptrace_scope) {
case YAMA_SCOPE_CAPABILITY: case YAMA_SCOPE_CAPABILITY:
rcu_read_lock(); if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE))
if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
rc = -EPERM; rc = -EPERM;
rcu_read_unlock();
break; break;
case YAMA_SCOPE_NO_ATTACH: case YAMA_SCOPE_NO_ATTACH:
rc = -EPERM; rc = -EPERM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment