Commit 03e5cb7b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:
 "Nothing major in here, just two different parts:

   - A small series from Breno that enables passing the full SQE down
     for ->uring_cmd().

     This is a prerequisite for enabling full network socket operations.
     Queued up a bit late because of some stylistic concerns that got
     resolved, would be nice to have this in 6.4-rc1 so the dependent
     work will be easier to handle for 6.5.

   - Fix for the huge page coalescing, which was a regression introduced
     in the 6.3 kernel release (Tobias)"

* tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux:
  io_uring: Remove unnecessary BUILD_BUG_ON
  io_uring: Pass whole sqe to commands
  io_uring: Create a helper to return the SQE size
  io_uring/rsrc: check for nonconsecutive pages
parents fc4354c6 d2b7fa61
...@@ -1035,7 +1035,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) ...@@ -1035,7 +1035,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
} }
static void ublk_commit_completion(struct ublk_device *ub, static void ublk_commit_completion(struct ublk_device *ub,
struct ublksrv_io_cmd *ub_cmd) const struct ublksrv_io_cmd *ub_cmd)
{ {
u32 qid = ub_cmd->q_id, tag = ub_cmd->tag; u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
struct ublk_queue *ubq = ublk_get_queue(ub, qid); struct ublk_queue *ubq = ublk_get_queue(ub, qid);
...@@ -1292,7 +1292,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op) ...@@ -1292,7 +1292,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags, unsigned int issue_flags,
struct ublksrv_io_cmd *ub_cmd) const struct ublksrv_io_cmd *ub_cmd)
{ {
struct ublk_device *ub = cmd->file->private_data; struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq; struct ublk_queue *ubq;
...@@ -1399,17 +1399,17 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, ...@@ -1399,17 +1399,17 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{ {
struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd;
struct ublksrv_io_cmd ub_cmd;
/* /*
* Not necessary for async retry, but let's keep it simple and always * Not necessary for async retry, but let's keep it simple and always
* copy the values to avoid any potential reuse. * copy the values to avoid any potential reuse.
*/ */
ub_cmd.q_id = READ_ONCE(ub_src->q_id); const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
ub_cmd.tag = READ_ONCE(ub_src->tag); const struct ublksrv_io_cmd ub_cmd = {
ub_cmd.result = READ_ONCE(ub_src->result); .q_id = READ_ONCE(ub_src->q_id),
ub_cmd.addr = READ_ONCE(ub_src->addr); .tag = READ_ONCE(ub_src->tag),
.result = READ_ONCE(ub_src->result),
.addr = READ_ONCE(ub_src->addr)
};
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
} }
...@@ -1619,7 +1619,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx) ...@@ -1619,7 +1619,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0]; int ublksrv_pid = (int)header->data[0];
struct gendisk *disk; struct gendisk *disk;
int ret = -EINVAL; int ret = -EINVAL;
...@@ -1682,7 +1682,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) ...@@ -1682,7 +1682,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub, static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr; void __user *argp = (void __user *)(unsigned long)header->addr;
cpumask_var_t cpumask; cpumask_var_t cpumask;
unsigned long queue; unsigned long queue;
...@@ -1733,7 +1733,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) ...@@ -1733,7 +1733,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr; void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublksrv_ctrl_dev_info info; struct ublksrv_ctrl_dev_info info;
struct ublk_device *ub; struct ublk_device *ub;
...@@ -1910,7 +1910,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) ...@@ -1910,7 +1910,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n", pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
__func__, cmd->cmd_op, header->dev_id, header->queue_id, __func__, cmd->cmd_op, header->dev_id, header->queue_id,
...@@ -1929,7 +1929,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub) ...@@ -1929,7 +1929,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
static int ublk_ctrl_get_dev_info(struct ublk_device *ub, static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr; void __user *argp = (void __user *)(unsigned long)header->addr;
if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
...@@ -1960,7 +1960,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub) ...@@ -1960,7 +1960,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
static int ublk_ctrl_get_params(struct ublk_device *ub, static int ublk_ctrl_get_params(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr; void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph; struct ublk_params_header ph;
int ret; int ret;
...@@ -1991,7 +1991,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub, ...@@ -1991,7 +1991,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub,
static int ublk_ctrl_set_params(struct ublk_device *ub, static int ublk_ctrl_set_params(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr; void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph; struct ublk_params_header ph;
int ret = -EFAULT; int ret = -EFAULT;
...@@ -2052,7 +2052,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) ...@@ -2052,7 +2052,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
static int ublk_ctrl_start_recovery(struct ublk_device *ub, static int ublk_ctrl_start_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ret = -EINVAL; int ret = -EINVAL;
int i; int i;
...@@ -2094,7 +2094,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, ...@@ -2094,7 +2094,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
static int ublk_ctrl_end_recovery(struct ublk_device *ub, static int ublk_ctrl_end_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0]; int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL; int ret = -EINVAL;
...@@ -2161,7 +2161,7 @@ static int ublk_char_dev_permission(struct ublk_device *ub, ...@@ -2161,7 +2161,7 @@ static int ublk_char_dev_permission(struct ublk_device *ub,
static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
struct io_uring_cmd *cmd) struct io_uring_cmd *cmd)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe);
bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
void __user *argp = (void __user *)(unsigned long)header->addr; void __user *argp = (void __user *)(unsigned long)header->addr;
char *dev_path = NULL; char *dev_path = NULL;
...@@ -2240,7 +2240,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, ...@@ -2240,7 +2240,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags) unsigned int issue_flags)
{ {
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
struct ublk_device *ub = NULL; struct ublk_device *ub = NULL;
u32 cmd_op = cmd->cmd_op; u32 cmd_op = cmd->cmd_op;
int ret = -EINVAL; int ret = -EINVAL;
......
...@@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{ {
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
const struct nvme_uring_cmd *cmd = ioucmd->cmd; const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
struct request_queue *q = ns ? ns->queue : ctrl->admin_q; struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d; struct nvme_uring_data d;
struct nvme_command c; struct nvme_command c;
......
...@@ -24,7 +24,7 @@ enum io_uring_cmd_flags { ...@@ -24,7 +24,7 @@ enum io_uring_cmd_flags {
struct io_uring_cmd { struct io_uring_cmd {
struct file *file; struct file *file;
const void *cmd; const struct io_uring_sqe *sqe;
union { union {
/* callback to defer completions to task context */ /* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
...@@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk) ...@@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring) if (tsk->io_uring)
__io_uring_free(tsk); __io_uring_free(tsk);
} }
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
}
#else #else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd) struct iov_iter *iter, void *ioucmd)
......
...@@ -394,4 +394,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res) ...@@ -394,4 +394,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
io_req_task_work_add(req); io_req_task_work_add(req);
} }
/*
* IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
* slot.
*/
static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
{
if (ctx->flags & IORING_SETUP_SQE128)
return 2 * sizeof(struct io_uring_sqe);
return sizeof(struct io_uring_sqe);
}
#endif #endif
...@@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = { ...@@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = {
}, },
[IORING_OP_URING_CMD] = { [IORING_OP_URING_CMD] = {
.name = "URING_CMD", .name = "URING_CMD",
.async_size = uring_cmd_pdu_size(1), .async_size = 2 * sizeof(struct io_uring_sqe),
.prep_async = io_uring_cmd_prep_async, .prep_async = io_uring_cmd_prep_async,
}, },
[IORING_OP_SEND_ZC] = { [IORING_OP_SEND_ZC] = {
......
...@@ -1116,7 +1116,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, ...@@ -1116,7 +1116,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
if (nr_pages > 1) { if (nr_pages > 1) {
folio = page_folio(pages[0]); folio = page_folio(pages[0]);
for (i = 1; i < nr_pages; i++) { for (i = 1; i < nr_pages; i++) {
if (page_folio(pages[i]) != folio) { /*
* Pages must be consecutive and on the same folio for
* this to work
*/
if (page_folio(pages[i]) != folio ||
pages[i] != pages[i - 1] + 1) {
folio = NULL; folio = NULL;
break; break;
} }
......
...@@ -69,15 +69,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done); ...@@ -69,15 +69,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done);
int io_uring_cmd_prep_async(struct io_kiocb *req) int io_uring_cmd_prep_async(struct io_kiocb *req)
{ {
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
size_t cmd_size;
BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16); memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx));
BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80); ioucmd->sqe = req->async_data;
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
memcpy(req->async_data, ioucmd->cmd, cmd_size);
ioucmd->cmd = req->async_data;
return 0; return 0;
} }
...@@ -103,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -103,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->imu = ctx->user_bufs[index]; req->imu = ctx->user_bufs[index];
io_req_set_rsrc_node(req, ctx, 0); io_req_set_rsrc_node(req, ctx, 0);
} }
ioucmd->cmd = sqe->cmd; ioucmd->sqe = sqe;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
return 0; return 0;
} }
......
...@@ -3,11 +3,3 @@ ...@@ -3,11 +3,3 @@
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_uring_cmd_prep_async(struct io_kiocb *req); int io_uring_cmd_prep_async(struct io_kiocb *req);
/*
* The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
* the following sqe if SQE128 is used.
*/
#define uring_cmd_pdu_size(is_sqe128) \
((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
offsetof(struct io_uring_sqe, cmd))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment