Commit 4f57f06c authored by Jens Axboe's avatar Jens Axboe

io_uring: add support for IORING_OP_MSG_RING command

This adds support for IORING_OP_MSG_RING, which allows an SQE to signal
another ring. That allows either waking up someone waiting on the ring,
or even passing a 64-bit value via the user_data field in the CQE.

sqe->fd must contain the fd of a ring that should receive the CQE.
sqe->off will be propagated to the cqe->user_data on the target ring,
and sqe->len will be propagated to cqe->res. The results CQE will have
IORING_CQE_F_MSG set in its flags, to indicate that this CQE was generated
from a messaging request rather than a SQE issued locally on that ring.
This effectively allows passing a 64-bit and a 32-bit quantify between
the two rings.

This request type has the following request specific error cases:

- -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is
  of the io_uring type.
- -EOVERFLOW. Set if we were not able to deliver a request to the target
  ring.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent cc3cec83
...@@ -706,6 +706,12 @@ struct io_hardlink { ...@@ -706,6 +706,12 @@ struct io_hardlink {
int flags; int flags;
}; };
struct io_msg {
struct file *file;
u64 user_data;
u32 len;
};
struct io_async_connect { struct io_async_connect {
struct sockaddr_storage address; struct sockaddr_storage address;
}; };
...@@ -871,6 +877,7 @@ struct io_kiocb { ...@@ -871,6 +877,7 @@ struct io_kiocb {
struct io_mkdir mkdir; struct io_mkdir mkdir;
struct io_symlink symlink; struct io_symlink symlink;
struct io_hardlink hardlink; struct io_hardlink hardlink;
struct io_msg msg;
}; };
u8 opcode; u8 opcode;
...@@ -1121,6 +1128,9 @@ static const struct io_op_def io_op_defs[] = { ...@@ -1121,6 +1128,9 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_MKDIRAT] = {}, [IORING_OP_MKDIRAT] = {},
[IORING_OP_SYMLINKAT] = {}, [IORING_OP_SYMLINKAT] = {},
[IORING_OP_LINKAT] = {}, [IORING_OP_LINKAT] = {},
[IORING_OP_MSG_RING] = {
.needs_file = 1,
},
}; };
/* requests with any of those set should undergo io_disarm_next() */ /* requests with any of those set should undergo io_disarm_next() */
...@@ -4322,6 +4332,46 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags) ...@@ -4322,6 +4332,46 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
return 0; return 0;
} }
static int io_msg_ring_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
sqe->rw_flags || sqe->splice_fd_in || sqe->buf_index ||
sqe->personality))
return -EINVAL;
if (req->file->f_op != &io_uring_fops)
return -EBADFD;
req->msg.user_data = READ_ONCE(sqe->off);
req->msg.len = READ_ONCE(sqe->len);
return 0;
}
static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx;
struct io_msg *msg = &req->msg;
int ret = -EOVERFLOW;
bool filled;
target_ctx = req->file->private_data;
spin_lock(&target_ctx->completion_lock);
filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len,
IORING_CQE_F_MSG);
io_commit_cqring(target_ctx);
spin_unlock(&target_ctx->completion_lock);
if (filled) {
io_cqring_ev_posted(target_ctx);
ret = 0;
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
}
static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
...@@ -6700,6 +6750,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -6700,6 +6750,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_symlinkat_prep(req, sqe); return io_symlinkat_prep(req, sqe);
case IORING_OP_LINKAT: case IORING_OP_LINKAT:
return io_linkat_prep(req, sqe); return io_linkat_prep(req, sqe);
case IORING_OP_MSG_RING:
return io_msg_ring_prep(req, sqe);
} }
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
...@@ -6983,6 +7035,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6983,6 +7035,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_LINKAT: case IORING_OP_LINKAT:
ret = io_linkat(req, issue_flags); ret = io_linkat(req, issue_flags);
break; break;
case IORING_OP_MSG_RING:
ret = io_msg_ring(req, issue_flags);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
......
...@@ -143,6 +143,7 @@ enum { ...@@ -143,6 +143,7 @@ enum {
IORING_OP_MKDIRAT, IORING_OP_MKDIRAT,
IORING_OP_SYMLINKAT, IORING_OP_SYMLINKAT,
IORING_OP_LINKAT, IORING_OP_LINKAT,
IORING_OP_MSG_RING,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
...@@ -199,9 +200,11 @@ struct io_uring_cqe { ...@@ -199,9 +200,11 @@ struct io_uring_cqe {
* *
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
* IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING
*/ */
#define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_MSG (1U << 2)
enum { enum {
IORING_CQE_BUFFER_SHIFT = 16, IORING_CQE_BUFFER_SHIFT = 16,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment