Commit f463fe5b authored by Seth Forshee's avatar Seth Forshee

ext4: Add support for unprivileged mounts from user namespaces

Support unprivileged mounting of ext4 volumes from user
namespaces. This requires the following changes:

 - Perform all uid and gid conversions to/from disk relative to
   s_user_ns. In many cases this will already be handled by the
   vfs helper functions. This also requires updates to handle
   cases where ids may not map into s_user_ns.

 - Update most capability checks to check for capabilities in
   s_user_ns rather than init_user_ns. These mostly reflect
   changes to the filesystem that a user in s_user_ns could
   already make externally by virtue of having write access to
   the backing device.

 - Restrict unsafe options in either the mount options or the
   ext4 superblock. Currently the only concerning option is
   errors=panic, and this is made to require CAP_SYS_ADMIN in
   init_user_ns.

 - Verify that unprivileged users have the required access to the
   journal device at the path passed via the journal_path mount
   option.

   Note that for the journal_path and the journal_dev mount
   options, and for external journal devices specified in the
   ext4 superblock, devcgroup restrictions will be enforced by
   __blkdev_get(), (via blkdev_get_by_dev()), ensuring that the
   user has been granted appropriate access to the block device.

 - Set the FS_USERNS_MOUNT flag on the filesystem types supported
   by ext4.

sysfs attributes for ext4 mounts remain writable only by real
root.
Signed-off-by: default avatarSeth Forshee <seth.forshee@canonical.com>
parent d033b7ff
......@@ -13,7 +13,7 @@
* Convert from filesystem to in-memory representation.
*/
static struct posix_acl *
ext4_acl_from_disk(const void *value, size_t size)
ext4_acl_from_disk(struct super_block *sb, const void *value, size_t size)
{
const char *end = (char *)value + size;
int n, count;
......@@ -57,16 +57,20 @@ ext4_acl_from_disk(const void *value, size_t size)
if ((char *)value > end)
goto fail;
acl->a_entries[n].e_uid =
make_kuid(&init_user_ns,
make_kuid(sb->s_user_ns,
le32_to_cpu(entry->e_id));
if (!uid_valid(acl->a_entries[n].e_uid))
goto fail;
break;
case ACL_GROUP:
value = (char *)value + sizeof(ext4_acl_entry);
if ((char *)value > end)
goto fail;
acl->a_entries[n].e_gid =
make_kgid(&init_user_ns,
make_kgid(sb->s_user_ns,
le32_to_cpu(entry->e_id));
if (!gid_valid(acl->a_entries[n].e_gid))
goto fail;
break;
default:
......@@ -86,11 +90,14 @@ ext4_acl_from_disk(const void *value, size_t size)
* Convert from in-memory to filesystem representation.
*/
static void *
ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
ext4_acl_to_disk(struct super_block *sb, const struct posix_acl *acl,
size_t *size)
{
ext4_acl_header *ext_acl;
char *e;
size_t n;
uid_t uid;
gid_t gid;
*size = ext4_acl_size(acl->a_count);
ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
......@@ -106,13 +113,17 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
entry->e_perm = cpu_to_le16(acl_e->e_perm);
switch (acl_e->e_tag) {
case ACL_USER:
entry->e_id = cpu_to_le32(
from_kuid(&init_user_ns, acl_e->e_uid));
uid = from_kuid(sb->s_user_ns, acl_e->e_uid);
if (uid == (uid_t)-1)
goto fail;
entry->e_id = cpu_to_le32(uid);
e += sizeof(ext4_acl_entry);
break;
case ACL_GROUP:
entry->e_id = cpu_to_le32(
from_kgid(&init_user_ns, acl_e->e_gid));
gid = from_kgid(sb->s_user_ns, acl_e->e_gid);
if (gid == (gid_t)-1)
goto fail;
entry->e_id = cpu_to_le32(gid);
e += sizeof(ext4_acl_entry);
break;
......@@ -165,7 +176,7 @@ ext4_get_acl(struct inode *inode, int type)
retval = ext4_xattr_get(inode, name_index, "", value, retval);
}
if (retval > 0)
acl = ext4_acl_from_disk(value, retval);
acl = ext4_acl_from_disk(inode->i_sb, value, retval);
else if (retval == -ENODATA || retval == -ENOSYS)
acl = NULL;
else
......@@ -218,7 +229,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
return -EINVAL;
}
if (acl) {
value = ext4_acl_to_disk(acl, &size);
value = ext4_acl_to_disk(inode->i_sb, acl, &size);
if (IS_ERR(value))
return (int)PTR_ERR(value);
}
......
......@@ -555,8 +555,8 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
/* Hm, nope. Are (enough) root reserved clusters available? */
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
capable(CAP_SYS_RESOURCE) ||
(!gid_eq(sbi->s_resgid, make_kgid(sbi->s_sb->s_user_ns, 0)) && in_group_p(sbi->s_resgid)) ||
ns_capable(sbi->s_sb->s_user_ns, CAP_SYS_RESOURCE) ||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
if (free_clusters >= (nclusters + dirty_clusters +
......
......@@ -732,6 +732,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
/* Supplied owner must be valid */
if (owner && (owner[0] == (uid_t)-1 || owner[1] == (uid_t)-1))
return ERR_PTR(-EOVERFLOW);
if ((ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
......@@ -744,7 +748,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
}
sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb);
trace_ext4_request_inode(dir, mode);
......
......@@ -254,7 +254,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
* This test looks nicer. Thanks to Pauline Middelink
*/
if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE))
if (!ns_capable(sb->s_user_ns, CAP_LINUX_IMMUTABLE))
goto flags_out;
}
......@@ -263,7 +263,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
* the relevant capability.
*/
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE))
if (!ns_capable(sb->s_user_ns, CAP_SYS_RESOURCE))
goto flags_out;
}
if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
......@@ -598,7 +598,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (!blk_queue_discard(q))
......
......@@ -20,7 +20,7 @@ int ext4_resize_begin(struct super_block *sb)
{
int ret = 0;
if (!capable(CAP_SYS_RESOURCE))
if (!ns_capable(sb->s_user_ns, CAP_SYS_RESOURCE))
return -EPERM;
/*
......
......@@ -39,6 +39,7 @@
#include <linux/log2.h>
#include <linux/crc16.h>
#include <linux/cleancache.h>
#include <linux/user_namespace.h>
#include <asm/uaccess.h>
#include <linux/kthread.h>
......@@ -91,7 +92,7 @@ static struct file_system_type ext2_fs_type = {
.name = "ext2",
.mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
.fs_flags = FS_REQUIRES_DEV | FS_USERNS_MOUNT,
};
MODULE_ALIAS_FS("ext2");
MODULE_ALIAS("ext2");
......@@ -106,7 +107,7 @@ static struct file_system_type ext3_fs_type = {
.name = "ext3",
.mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
.fs_flags = FS_REQUIRES_DEV | FS_USERNS_MOUNT,
};
MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
......@@ -1509,6 +1510,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
return -1;
}
if (token == Opt_err_panic && !capable(CAP_SYS_ADMIN)) {
ext4_msg(sb, KERN_ERR,
"Mount option \"%s\" not allowed for unprivileged mounts",
opt);
return -1;
}
if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
return -1;
if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
......@@ -1551,14 +1559,14 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
} else if (token == Opt_stripe) {
sbi->s_stripe = arg;
} else if (token == Opt_resuid) {
uid = make_kuid(current_user_ns(), arg);
uid = make_kuid(sb->s_user_ns, arg);
if (!uid_valid(uid)) {
ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
return -1;
}
sbi->s_resuid = uid;
} else if (token == Opt_resgid) {
gid = make_kgid(current_user_ns(), arg);
gid = make_kgid(sb->s_user_ns, arg);
if (!gid_valid(gid)) {
ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
return -1;
......@@ -1597,6 +1605,19 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
return -1;
}
/*
* Refuse access for unprivileged mounts if the user does
* not have rw access to the journal device via the supplied
* path.
*/
if (!capable(CAP_SYS_ADMIN) &&
inode_permission(d_inode(path.dentry), MAY_READ|MAY_WRITE)) {
ext4_msg(sb, KERN_ERR,
"error: Insufficient access to journal path %s",
journal_path);
return -1;
}
journal_inode = d_inode(path.dentry);
if (!S_ISBLK(journal_inode->i_mode)) {
ext4_msg(sb, KERN_ERR, "error: journal path %s "
......@@ -1827,14 +1848,14 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("%s", token2str(m->token));
}
if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(sb->s_user_ns, EXT4_DEF_RESUID)) ||
le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
SEQ_OPTS_PRINT("resuid=%u",
from_kuid_munged(&init_user_ns, sbi->s_resuid));
if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
from_kuid_munged(sb->s_user_ns, sbi->s_resuid));
if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(sb->s_user_ns, EXT4_DEF_RESGID)) ||
le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
SEQ_OPTS_PRINT("resgid=%u",
from_kgid_munged(&init_user_ns, sbi->s_resgid));
from_kgid_munged(sb->s_user_ns, sbi->s_resgid));
def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
SEQ_OPTS_PUTS("errors=remount-ro");
......@@ -2606,7 +2627,7 @@ static ssize_t trigger_test_error(struct ext4_attr *a,
{
int len = count;
if (!capable(CAP_SYS_ADMIN))
if (!ns_capable(sbi->s_sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (len && buf[len-1] == '\n')
......@@ -3590,19 +3611,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
set_opt(sb, WRITEBACK_DATA);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) {
if (!capable(CAP_SYS_ADMIN))
goto failed_mount;
set_opt(sb, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
} else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) {
set_opt(sb, ERRORS_CONT);
else
} else {
set_opt(sb, ERRORS_RO);
}
/* block_validity enabled by default; disable with noblock_validity */
set_opt(sb, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sb, DISCARD);
sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
sbi->s_resuid = make_kuid(sb->s_user_ns, le16_to_cpu(es->s_def_resuid));
if (!uid_valid(sbi->s_resuid))
sbi->s_resuid = make_kuid(sb->s_user_ns, EXT4_DEF_RESUID);
sbi->s_resgid = make_kgid(sb->s_user_ns, le16_to_cpu(es->s_def_resgid));
if (!gid_valid(sbi->s_resgid))
sbi->s_resgid = make_kgid(sb->s_user_ns, EXT4_DEF_RESGID);
sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
......@@ -4364,6 +4392,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_blkdev_remove(sbi);
brelse(bh);
out_fail:
/* sb->s_user_ns will be put when sb is destroyed */
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
......@@ -5582,7 +5611,7 @@ static struct file_system_type ext4_fs_type = {
.name = "ext4",
.mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
.fs_flags = FS_REQUIRES_DEV | FS_USERNS_MOUNT,
};
MODULE_ALIAS_FS("ext4");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment