Commit 855b7717 authored by Kanchan Joshi's avatar Kanchan Joshi Committed by Christoph Hellwig

nvme: fine-granular CAP_SYS_ADMIN for nvme io commands

Currently both io and admin commands are kept under a
coarse-granular CAP_SYS_ADMIN check, disregarding file mode completely.

$ ls -l /dev/ng*
crw-rw-rw- 1 root root 242, 0 Sep  9 19:20 /dev/ng0n1
crw------- 1 root root 242, 1 Sep  9 19:20 /dev/ng0n2

In the example above, ng0n1 appears as if it may allow unprivileged
read/write operation but it does not and behaves same as ng0n2.

This patch implements a shift from CAP_SYS_ADMIN to more fine-granular
control for io-commands.
If CAP_SYS_ADMIN is present, nothing else is checked as before.
Otherwise, following rules are in place
- any admin-cmd is not allowed
- vendor-specific and fabric commmand are not allowed
- io-commands that can write are allowed if matching FMODE_WRITE
permission is present
- io-commands that read are allowed

Add a helper nvme_cmd_allowed that implements above policy.
Change all the callers of CAP_SYS_ADMIN to go through nvme_cmd_allowed
for any decision making.
Since file open mode is counted for any approval/denial, change at
various places to keep file-mode information handy.
Signed-off-by: default avatarKanchan Joshi <joshi.k@samsung.com>
Reviewed-by: default avatarJens Axboe <axboe@kernel.dk>
Reviewed-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent cf3d0084
...@@ -8,6 +8,34 @@ ...@@ -8,6 +8,34 @@
#include <linux/io_uring.h> #include <linux/io_uring.h>
#include "nvme.h" #include "nvme.h"
static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
fmode_t mode)
{
if (capable(CAP_SYS_ADMIN))
return true;
/*
* Do not allow unprivileged processes to send vendor specific or fabrics
* commands as we can't be sure about their effects.
*/
if (c->common.opcode >= nvme_cmd_vendor_start ||
c->common.opcode == nvme_fabrics_command)
return false;
/* do not allow unprivileged admin commands */
if (!ns)
return false;
/*
* Only allow I/O commands that transfer data to the controller if the
* special file is open for writing, but always allow I/O commands that
* transfer data from the controller.
*/
if (nvme_is_write(c))
return mode & FMODE_WRITE;
return true;
}
/* /*
* Convert integer values from ioctl structures to user pointers, silently * Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit * ignoring the upper bits in the compat case to match behaviour of 32-bit
...@@ -261,7 +289,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, ...@@ -261,7 +289,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
} }
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd __user *ucmd) struct nvme_passthru_cmd __user *ucmd, fmode_t mode)
{ {
struct nvme_passthru_cmd cmd; struct nvme_passthru_cmd cmd;
struct nvme_command c; struct nvme_command c;
...@@ -269,8 +297,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -269,8 +297,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u64 result; u64 result;
int status; int status;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd))) if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT; return -EFAULT;
if (cmd.flags) if (cmd.flags)
...@@ -291,6 +317,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -291,6 +317,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15); c.common.cdw15 = cpu_to_le32(cmd.cdw15);
if (!nvme_cmd_allowed(ns, &c, mode))
return -EACCES;
if (cmd.timeout_ms) if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms); timeout = msecs_to_jiffies(cmd.timeout_ms);
...@@ -308,15 +337,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -308,15 +337,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
} }
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd64 __user *ucmd, bool vec) struct nvme_passthru_cmd64 __user *ucmd, bool vec,
fmode_t mode)
{ {
struct nvme_passthru_cmd64 cmd; struct nvme_passthru_cmd64 cmd;
struct nvme_command c; struct nvme_command c;
unsigned timeout = 0; unsigned timeout = 0;
int status; int status;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd))) if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT; return -EFAULT;
if (cmd.flags) if (cmd.flags)
...@@ -337,6 +365,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -337,6 +365,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15); c.common.cdw15 = cpu_to_le32(cmd.cdw15);
if (!nvme_cmd_allowed(ns, &c, mode))
return -EACCES;
if (cmd.timeout_ms) if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms); timeout = msecs_to_jiffies(cmd.timeout_ms);
...@@ -483,9 +514,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -483,9 +514,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
void *meta = NULL; void *meta = NULL;
int ret; int ret;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
c.common.opcode = READ_ONCE(cmd->opcode); c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags); c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags) if (c.common.flags)
...@@ -507,6 +535,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -507,6 +535,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode))
return -EACCES;
d.metadata = READ_ONCE(cmd->metadata); d.metadata = READ_ONCE(cmd->metadata);
d.addr = READ_ONCE(cmd->addr); d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len); d.data_len = READ_ONCE(cmd->data_len);
...@@ -570,13 +601,13 @@ static bool is_ctrl_ioctl(unsigned int cmd) ...@@ -570,13 +601,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
} }
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
void __user *argp) void __user *argp, fmode_t mode)
{ {
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ADMIN_CMD: case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp); return nvme_user_cmd(ctrl, NULL, argp, mode);
case NVME_IOCTL_ADMIN64_CMD: case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp, false); return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
default: default:
return sed_ioctl(ctrl->opal_dev, cmd, argp); return sed_ioctl(ctrl->opal_dev, cmd, argp);
} }
...@@ -601,14 +632,14 @@ struct nvme_user_io32 { ...@@ -601,14 +632,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */ #endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp) void __user *argp, fmode_t mode)
{ {
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ID: case NVME_IOCTL_ID:
force_successful_syscall_return(); force_successful_syscall_return();
return ns->head->ns_id; return ns->head->ns_id;
case NVME_IOCTL_IO_CMD: case NVME_IOCTL_IO_CMD:
return nvme_user_cmd(ns->ctrl, ns, argp); return nvme_user_cmd(ns->ctrl, ns, argp, mode);
/* /*
* struct nvme_user_io can have different padding on some 32-bit ABIs. * struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the * Just accept the compat version as all fields that are used are the
...@@ -620,19 +651,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, ...@@ -620,19 +651,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_SUBMIT_IO: case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp); return nvme_submit_io(ns, argp);
case NVME_IOCTL_IO64_CMD: case NVME_IOCTL_IO64_CMD:
return nvme_user_cmd64(ns->ctrl, ns, argp, false); return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
case NVME_IOCTL_IO64_CMD_VEC: case NVME_IOCTL_IO64_CMD_VEC:
return nvme_user_cmd64(ns->ctrl, ns, argp, true); return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
default: default:
return -ENOTTY; return -ENOTTY;
} }
} }
static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg,
fmode_t mode)
{ {
if (is_ctrl_ioctl(cmd)) if (is_ctrl_ioctl(cmd))
return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode);
return nvme_ns_ioctl(ns, cmd, arg); return nvme_ns_ioctl(ns, cmd, arg, mode);
} }
int nvme_ioctl(struct block_device *bdev, fmode_t mode, int nvme_ioctl(struct block_device *bdev, fmode_t mode,
...@@ -640,7 +672,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -640,7 +672,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
{ {
struct nvme_ns *ns = bdev->bd_disk->private_data; struct nvme_ns *ns = bdev->bd_disk->private_data;
return __nvme_ioctl(ns, cmd, (void __user *)arg); return __nvme_ioctl(ns, cmd, (void __user *)arg, mode);
} }
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
...@@ -648,7 +680,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -648,7 +680,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct nvme_ns *ns = struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
return __nvme_ioctl(ns, cmd, (void __user *)arg); return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode);
} }
static int nvme_uring_cmd_checks(unsigned int issue_flags) static int nvme_uring_cmd_checks(unsigned int issue_flags)
...@@ -716,7 +748,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, ...@@ -716,7 +748,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
} }
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx) void __user *argp, struct nvme_ns_head *head, int srcu_idx,
fmode_t mode)
__releases(&head->srcu) __releases(&head->srcu)
{ {
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
...@@ -724,7 +757,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, ...@@ -724,7 +757,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl); nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
nvme_put_ctrl(ctrl); nvme_put_ctrl(ctrl);
return ret; return ret;
...@@ -749,9 +782,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -749,9 +782,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
* deadlock when deleting namespaces using the passthrough interface. * deadlock when deleting namespaces using the passthrough interface.
*/ */
if (is_ctrl_ioctl(cmd)) if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
mode);
ret = nvme_ns_ioctl(ns, cmd, argp); ret = nvme_ns_ioctl(ns, cmd, argp, mode);
out_unlock: out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
return ret; return ret;
...@@ -773,9 +807,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, ...@@ -773,9 +807,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
goto out_unlock; goto out_unlock;
if (is_ctrl_ioctl(cmd)) if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
file->f_mode);
ret = nvme_ns_ioctl(ns, cmd, argp); ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode);
out_unlock: out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
return ret; return ret;
...@@ -849,7 +884,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) ...@@ -849,7 +884,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
return ret; return ret;
} }
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
fmode_t mode)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int ret; int ret;
...@@ -873,7 +909,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) ...@@ -873,7 +909,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref); kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
ret = nvme_user_cmd(ctrl, ns, argp); ret = nvme_user_cmd(ctrl, ns, argp, mode);
nvme_put_ns(ns); nvme_put_ns(ns);
return ret; return ret;
...@@ -890,11 +926,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, ...@@ -890,11 +926,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ADMIN_CMD: case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp); return nvme_user_cmd(ctrl, NULL, argp, file->f_mode);
case NVME_IOCTL_ADMIN64_CMD: case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp, false); return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode);
case NVME_IOCTL_IO_CMD: case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp); return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
case NVME_IOCTL_RESET: case NVME_IOCTL_RESET:
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
......
...@@ -797,6 +797,7 @@ enum nvme_opcode { ...@@ -797,6 +797,7 @@ enum nvme_opcode {
nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d, nvme_cmd_zone_append = 0x7d,
nvme_cmd_vendor_start = 0x80,
}; };
#define nvme_opcode_name(opcode) { opcode, #opcode } #define nvme_opcode_name(opcode) { opcode, #opcode }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment