Commit c4ed533b authored by Christian Brauner's avatar Christian Brauner Committed by David Sterba

btrfs: allow idmapped SNAP_DESTROY ioctls

Destroying subvolumes and snapshots are important features of btrfs.
Both operations are available to unprivileged users if the filesystem
has been mounted with the "user_subvol_rm_allowed" mount option. Allow
subvolume and snapshot deletion on idmapped mounts. This is a fairly
straightforward operation since all the permission checking helpers are
already capable of handling idmapped mounts. So we just need to pass
down the mount's userns.

Subvolumes and snapshots can either be deleted by specifying their name
or - if BTRFS_IOC_SNAP_DESTROY_V2 is used - by their subvolume or
snapshot id if the BTRFS_SUBVOL_SPEC_BY_ID is set.

This feature is blocked on idmapped mounts as this allows filesystem
wide subvolume deletions and thus can escape the scope of what's exposed
under the mount identified by the fd passed with the ioctl.

This means that even the root or CAP_SYS_ADMIN capable user can't delete
a subvolume via BTRFS_SUBVOL_SPEC_BY_ID. This is intentional.

The root user is currently already subject to permission checks in
btrfs_may_delete() including whether the inode's i_uid/i_gid of the
directory the subvolume is located in have a mapping in the caller's
idmapping. For this to fail isn't currently possible since a btrfs
filesystem can't be mounted with a non-initial idmapping but it shows
that even the root user would fail to delete a subvolume if the relevant
inode isn't mapped in their idmapping. The idmapped mount case is the
same in principle.

This isn't a huge problem a root user wanting to delete arbitrary
subvolumes can just always create another (even detached) mount without
an idmapping attached.

In addition, we will allow BTRFS_SUBVOL_SPEC_BY_ID for cases where the
subvolume to delete is directly located under inode referenced by the fd
passed for the ioctl() in a follow-up commit.

Here is an example where a btrfs subvolume is deleted through a
subvolume mount that does not expose the subvolume to be delete but it
can still be deleted by using the subvolume id:

  /* Compile the following program as "delete_by_spec". */

  #define _GNU_SOURCE
  #include <fcntl.h>
  #include <inttypes.h>
  #include <linux/btrfs.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <sys/ioctl.h>
  #include <sys/stat.h>
  #include <sys/types.h>
  #include <unistd.h>

  static int rm_subvolume_by_id(int fd, uint64_t subvolid)
  {
	 struct btrfs_ioctl_vol_args_v2 args = {};
	 int ret;

	 args.flags = BTRFS_SUBVOL_SPEC_BY_ID;
	 args.subvolid = subvolid;

	 ret = ioctl(fd, BTRFS_IOC_SNAP_DESTROY_V2, &args);
	 if (ret < 0)
		 return -1;

	 return 0;
  }

  int main(int argc, char *argv[])
  {
	 int subvolid = 0;

	 if (argc < 3)
		 exit(1);

	 fprintf(stderr, "Opening %s\n", argv[1]);
	 int fd = open(argv[1], O_CLOEXEC | O_DIRECTORY);
	 if (fd < 0)
		 exit(2);

	 subvolid = atoi(argv[2]);

	 fprintf(stderr, "Deleting subvolume with subvolid %d\n", subvolid);
	 int ret = rm_subvolume_by_id(fd, subvolid);
	 if (ret < 0)
		 exit(3);

	 exit(0);
  }
  #include <stdio.h>"
  #include <stdlib.h>"
  #include <linux/btrfs.h"

  truncate -s 10G btrfs.img
  mkfs.btrfs btrfs.img
  export LOOPDEV=$(sudo losetup -f --show btrfs.img)
  mount ${LOOPDEV} /mnt
  sudo chown $(id -u):$(id -g) /mnt
  btrfs subvolume create /mnt/A
  btrfs subvolume create /mnt/B/C
  # Get subvolume id via:
  sudo btrfs subvolume show /mnt/A
  # Save subvolid
  SUBVOLID=<nr>
  sudo umount /mnt
  sudo mount ${LOOPDEV} -o subvol=B/C,user_subvol_rm_allowed /mnt
  ./delete_by_spec /mnt ${SUBVOLID}

With idmapped mounts this can potentially be used by users to delete
subvolumes/snapshots they would otherwise not have access to as the
idmapping would be applied to an inode that is not exposed in the mount
of the subvolume.

The fact that this is a filesystem wide operation suggests it might be a
good idea to expose this under a separate ioctl that clearly indicates
this. In essence, the file descriptor passed with the ioctl is merely
used to identify the filesystem on which to operate when
BTRFS_SUBVOL_SPEC_BY_ID is used.
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarChristian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 4d4340c9
...@@ -837,7 +837,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ...@@ -837,7 +837,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
* nfs_async_unlink(). * nfs_async_unlink().
*/ */
static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) static int btrfs_may_delete(struct user_namespace *mnt_userns,
struct inode *dir, struct dentry *victim, int isdir)
{ {
int error; int error;
...@@ -847,12 +848,12 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) ...@@ -847,12 +848,12 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
BUG_ON(d_inode(victim->d_parent) != dir); BUG_ON(d_inode(victim->d_parent) != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC); error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
if (error) if (error)
return error; return error;
if (IS_APPEND(dir)) if (IS_APPEND(dir))
return -EPERM; return -EPERM;
if (check_sticky(&init_user_ns, dir, d_inode(victim)) || if (check_sticky(mnt_userns, dir, d_inode(victim)) ||
IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) || IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) ||
IS_SWAPFILE(d_inode(victim))) IS_SWAPFILE(d_inode(victim)))
return -EPERM; return -EPERM;
...@@ -2907,6 +2908,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, ...@@ -2907,6 +2908,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_root *dest = NULL; struct btrfs_root *dest = NULL;
struct btrfs_ioctl_vol_args *vol_args = NULL; struct btrfs_ioctl_vol_args *vol_args = NULL;
struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL; struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
struct user_namespace *mnt_userns = file_mnt_user_ns(file);
char *subvol_name, *subvol_name_ptr = NULL; char *subvol_name, *subvol_name_ptr = NULL;
int subvol_namelen; int subvol_namelen;
int err = 0; int err = 0;
...@@ -2934,6 +2936,18 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, ...@@ -2934,6 +2936,18 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err) if (err)
goto out; goto out;
} else { } else {
/*
* Deleting by subvolume id can be used to delete
* subvolumes/snapshots anywhere in the filesystem.
* Ensure that users can't abuse idmapped mounts of
* btrfs subvolumes/snapshots to perform operations in
* the whole filesystem.
*/
if (mnt_userns != &init_user_ns) {
err = -EOPNOTSUPP;
goto out;
}
if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) { if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL; err = -EINVAL;
goto out; goto out;
...@@ -3018,7 +3032,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, ...@@ -3018,7 +3032,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
if (err == -EINTR) if (err == -EINTR)
goto free_subvol_name; goto free_subvol_name;
dentry = lookup_one_len(subvol_name, parent, subvol_namelen); dentry = lookup_one(mnt_userns, subvol_name, parent, subvol_namelen);
if (IS_ERR(dentry)) { if (IS_ERR(dentry)) {
err = PTR_ERR(dentry); err = PTR_ERR(dentry);
goto out_unlock_dir; goto out_unlock_dir;
...@@ -3060,14 +3074,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, ...@@ -3060,14 +3074,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (root == dest) if (root == dest)
goto out_dput; goto out_dput;
err = inode_permission(&init_user_ns, inode, err = inode_permission(mnt_userns, inode, MAY_WRITE | MAY_EXEC);
MAY_WRITE | MAY_EXEC);
if (err) if (err)
goto out_dput; goto out_dput;
} }
/* check if subvolume may be deleted by a user */ /* check if subvolume may be deleted by a user */
err = btrfs_may_delete(dir, dentry, 1); err = btrfs_may_delete(mnt_userns, dir, dentry, 1);
if (err) if (err)
goto out_dput; goto out_dput;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment