Commit 03e5cb7b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:
 "Nothing major in here, just two different parts:

   - A small series from Breno that enables passing the full SQE down
     for ->uring_cmd().

     This is a prerequisite for enabling full network socket operations.
     Queued up a bit late because of some stylistic concerns that got
     resolved, would be nice to have this in 6.4-rc1 so the dependent
     work will be easier to handle for 6.5.

   - Fix for the huge page coalescing, which was a regression introduced
     in the 6.3 kernel release (Tobias)"

* tag 'for-6.4/io_uring-2023-05-07' of git://git.kernel.dk/linux:
  io_uring: Remove unnecessary BUILD_BUG_ON
  io_uring: Pass whole sqe to commands
  io_uring: Create a helper to return the SQE size
  io_uring/rsrc: check for nonconsecutive pages
parents fc4354c6 d2b7fa61
......@@ -1035,7 +1035,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
}
static void ublk_commit_completion(struct ublk_device *ub,
struct ublksrv_io_cmd *ub_cmd)
const struct ublksrv_io_cmd *ub_cmd)
{
u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
struct ublk_queue *ubq = ublk_get_queue(ub, qid);
......@@ -1292,7 +1292,7 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
struct ublksrv_io_cmd *ub_cmd)
const struct ublksrv_io_cmd *ub_cmd)
{
struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq;
......@@ -1399,17 +1399,17 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct ublksrv_io_cmd *ub_src = (struct ublksrv_io_cmd *) cmd->cmd;
struct ublksrv_io_cmd ub_cmd;
/*
* Not necessary for async retry, but let's keep it simple and always
* copy the values to avoid any potential reuse.
*/
ub_cmd.q_id = READ_ONCE(ub_src->q_id);
ub_cmd.tag = READ_ONCE(ub_src->tag);
ub_cmd.result = READ_ONCE(ub_src->result);
ub_cmd.addr = READ_ONCE(ub_src->addr);
const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
const struct ublksrv_io_cmd ub_cmd = {
.q_id = READ_ONCE(ub_src->q_id),
.tag = READ_ONCE(ub_src->tag),
.result = READ_ONCE(ub_src->result),
.addr = READ_ONCE(ub_src->addr)
};
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
}
......@@ -1619,7 +1619,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
struct gendisk *disk;
int ret = -EINVAL;
......@@ -1682,7 +1682,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
cpumask_var_t cpumask;
unsigned long queue;
......@@ -1733,7 +1733,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublksrv_ctrl_dev_info info;
struct ublk_device *ub;
......@@ -1910,7 +1910,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
__func__, cmd->cmd_op, header->dev_id, header->queue_id,
......@@ -1929,7 +1929,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
......@@ -1960,7 +1960,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
static int ublk_ctrl_get_params(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret;
......@@ -1991,7 +1991,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub,
static int ublk_ctrl_set_params(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret = -EFAULT;
......@@ -2052,7 +2052,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ret = -EINVAL;
int i;
......@@ -2094,7 +2094,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
static int ublk_ctrl_end_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL;
......@@ -2161,7 +2161,7 @@ static int ublk_char_dev_permission(struct ublk_device *ub,
static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe);
bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
void __user *argp = (void __user *)(unsigned long)header->addr;
char *dev_path = NULL;
......@@ -2240,7 +2240,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
struct ublk_device *ub = NULL;
u32 cmd_op = cmd->cmd_op;
int ret = -EINVAL;
......
......@@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
const struct nvme_uring_cmd *cmd = ioucmd->cmd;
const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
......
......@@ -24,7 +24,7 @@ enum io_uring_cmd_flags {
struct io_uring_cmd {
struct file *file;
const void *cmd;
const struct io_uring_sqe *sqe;
union {
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
......@@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
}
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
......
......@@ -394,4 +394,14 @@ static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
io_req_task_work_add(req);
}
/*
* IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
* slot.
*/
static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
{
if (ctx->flags & IORING_SETUP_SQE128)
return 2 * sizeof(struct io_uring_sqe);
return sizeof(struct io_uring_sqe);
}
#endif
......@@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = {
},
[IORING_OP_URING_CMD] = {
.name = "URING_CMD",
.async_size = uring_cmd_pdu_size(1),
.async_size = 2 * sizeof(struct io_uring_sqe),
.prep_async = io_uring_cmd_prep_async,
},
[IORING_OP_SEND_ZC] = {
......
......@@ -1116,7 +1116,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
if (nr_pages > 1) {
folio = page_folio(pages[0]);
for (i = 1; i < nr_pages; i++) {
if (page_folio(pages[i]) != folio) {
/*
* Pages must be consecutive and on the same folio for
* this to work
*/
if (page_folio(pages[i]) != folio ||
pages[i] != pages[i - 1] + 1) {
folio = NULL;
break;
}
......
......@@ -69,15 +69,9 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done);
int io_uring_cmd_prep_async(struct io_kiocb *req)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
size_t cmd_size;
BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16);
BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80);
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
memcpy(req->async_data, ioucmd->cmd, cmd_size);
ioucmd->cmd = req->async_data;
memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx));
ioucmd->sqe = req->async_data;
return 0;
}
......@@ -103,7 +97,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->imu = ctx->user_bufs[index];
io_req_set_rsrc_node(req, ctx, 0);
}
ioucmd->cmd = sqe->cmd;
ioucmd->sqe = sqe;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
return 0;
}
......
......@@ -3,11 +3,3 @@
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_uring_cmd_prep_async(struct io_kiocb *req);
/*
* The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
* the following sqe if SQE128 is used.
*/
#define uring_cmd_pdu_size(is_sqe128) \
((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
offsetof(struct io_uring_sqe, cmd))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment