Commit c73ebb68 authored by Hao Xu's avatar Hao Xu Committed by Jens Axboe

io_uring: add timeout support for io_uring_enter()

Now users who want to get woken when waiting for events should submit a
timeout command first. It is not safe for applications that split SQ and
CQ handling between two threads, such as mysql. Users should synchronize
the two threads explicitly to protect SQ and that will impact the
performance.

This patch adds support for timeout to existing io_uring_enter(). To
avoid overloading arguments, it introduces a new parameter structure
which contains sigmask and timeout.

I have tested the workloads with one thread submiting nop requests
while the other reaping the cqe with timeout. It shows 1.8~2x faster
when the iodepth is 16.
Signed-off-by: default avatarJiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: default avatarHao Xu <haoxu@linux.alibaba.com>
[axboe: various cleanups/fixes, and name change to SIG_IS_DATA]
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 27926b68
...@@ -7118,7 +7118,8 @@ static int io_run_task_work_sig(void) ...@@ -7118,7 +7118,8 @@ static int io_run_task_work_sig(void)
* application must reap them itself, as they reside on the shared cq ring. * application must reap them itself, as they reside on the shared cq ring.
*/ */
static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz) const sigset_t __user *sig, size_t sigsz,
struct __kernel_timespec __user *uts)
{ {
struct io_wait_queue iowq = { struct io_wait_queue iowq = {
.wq = { .wq = {
...@@ -7130,6 +7131,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -7130,6 +7131,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
.to_wait = min_events, .to_wait = min_events,
}; };
struct io_rings *rings = ctx->rings; struct io_rings *rings = ctx->rings;
struct timespec64 ts;
signed long timeout = 0;
int ret = 0; int ret = 0;
do { do {
...@@ -7152,6 +7155,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -7152,6 +7155,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret; return ret;
} }
if (uts) {
if (get_timespec64(&ts, uts))
return -EFAULT;
timeout = timespec64_to_jiffies(&ts);
}
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
trace_io_uring_cqring_wait(ctx, min_events); trace_io_uring_cqring_wait(ctx, min_events);
do { do {
...@@ -7165,7 +7174,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -7165,7 +7174,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
break; break;
if (io_should_wake(&iowq, false)) if (io_should_wake(&iowq, false))
break; break;
schedule(); if (uts) {
timeout = schedule_timeout(timeout);
if (timeout == 0) {
ret = -ETIME;
break;
}
} else {
schedule();
}
} while (1); } while (1);
finish_wait(&ctx->wait, &iowq.wq); finish_wait(&ctx->wait, &iowq.wq);
...@@ -9167,9 +9184,39 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) ...@@ -9167,9 +9184,39 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
finish_wait(&ctx->sqo_sq_wait, &wait); finish_wait(&ctx->sqo_sq_wait, &wait);
} }
static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
struct __kernel_timespec __user **ts,
const sigset_t __user **sig)
{
struct io_uring_getevents_arg arg;
/*
* If EXT_ARG isn't set, then we have no timespec and the argp pointer
* is just a pointer to the sigset_t.
*/
if (!(flags & IORING_ENTER_EXT_ARG)) {
*sig = (const sigset_t __user *) argp;
*ts = NULL;
return 0;
}
/*
* EXT_ARG is set - ensure we agree on the size of it and copy in our
* timespec and sigset_t pointers if good.
*/
if (*argsz != sizeof(arg))
return -EINVAL;
if (copy_from_user(&arg, argp, sizeof(arg)))
return -EFAULT;
*sig = u64_to_user_ptr(arg.sigmask);
*argsz = arg.sigmask_sz;
*ts = u64_to_user_ptr(arg.ts);
return 0;
}
SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
u32, min_complete, u32, flags, const sigset_t __user *, sig, u32, min_complete, u32, flags, const void __user *, argp,
size_t, sigsz) size_t, argsz)
{ {
struct io_ring_ctx *ctx; struct io_ring_ctx *ctx;
long ret = -EBADF; long ret = -EBADF;
...@@ -9179,7 +9226,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ...@@ -9179,7 +9226,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
io_run_task_work(); io_run_task_work();
if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
IORING_ENTER_SQ_WAIT)) IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))
return -EINVAL; return -EINVAL;
f = fdget(fd); f = fdget(fd);
...@@ -9225,6 +9272,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ...@@ -9225,6 +9272,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
goto out; goto out;
} }
if (flags & IORING_ENTER_GETEVENTS) { if (flags & IORING_ENTER_GETEVENTS) {
const sigset_t __user *sig;
struct __kernel_timespec __user *ts;
ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
if (unlikely(ret))
goto out;
min_complete = min(min_complete, ctx->cq_entries); min_complete = min(min_complete, ctx->cq_entries);
/* /*
...@@ -9237,7 +9291,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ...@@ -9237,7 +9291,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
!(ctx->flags & IORING_SETUP_SQPOLL)) { !(ctx->flags & IORING_SETUP_SQPOLL)) {
ret = io_iopoll_check(ctx, min_complete); ret = io_iopoll_check(ctx, min_complete);
} else { } else {
ret = io_cqring_wait(ctx, min_complete, sig, sigsz); ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
} }
} }
...@@ -9600,7 +9654,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ...@@ -9600,7 +9654,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED; IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
IORING_FEAT_EXT_ARG;
if (copy_to_user(params, p, sizeof(*p))) { if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT; ret = -EFAULT;
......
...@@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries, ...@@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
struct io_uring_params __user *p); struct io_uring_params __user *p);
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
u32 min_complete, u32 flags, u32 min_complete, u32 flags,
const sigset_t __user *sig, size_t sigsz); const void __user *argp, size_t argsz);
asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
void __user *arg, unsigned int nr_args); void __user *arg, unsigned int nr_args);
......
...@@ -231,6 +231,7 @@ struct io_cqring_offsets { ...@@ -231,6 +231,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_SQ_WAIT (1U << 2)
#define IORING_ENTER_EXT_ARG (1U << 3)
/* /*
* Passed in for io_uring_setup(2). Copied back with updated info on success * Passed in for io_uring_setup(2). Copied back with updated info on success
...@@ -259,6 +260,7 @@ struct io_uring_params { ...@@ -259,6 +260,7 @@ struct io_uring_params {
#define IORING_FEAT_FAST_POLL (1U << 5) #define IORING_FEAT_FAST_POLL (1U << 5)
#define IORING_FEAT_POLL_32BITS (1U << 6) #define IORING_FEAT_POLL_32BITS (1U << 6)
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
#define IORING_FEAT_EXT_ARG (1U << 8)
/* /*
* io_uring_register(2) opcodes and arguments * io_uring_register(2) opcodes and arguments
...@@ -335,4 +337,11 @@ enum { ...@@ -335,4 +337,11 @@ enum {
IORING_RESTRICTION_LAST IORING_RESTRICTION_LAST
}; };
struct io_uring_getevents_arg {
__u64 sigmask;
__u32 sigmask_sz;
__u32 pad;
__u64 ts;
};
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment