Commit c8d1ba58 authored by Jens Axboe's avatar Jens Axboe

io_uring: split work handling part of SQPOLL into helper

This is done in preparation for handling more than one ctx, but it also
cleans up the code a bit since io_sq_thread() was a bit too unwieldy to
get a get overview on.

__io_sq_thread() is now the main handler, and it returns an enum sq_ret
that tells io_sq_thread() what it ended up doing. The parent then makes
a decision on idle, spinning, or work handling based on that.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 3f0e64d0
...@@ -6642,110 +6642,119 @@ static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode, ...@@ -6642,110 +6642,119 @@ static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode,
return ret; return ret;
} }
static int io_sq_thread(void *data) enum sq_ret {
SQT_IDLE = 1,
SQT_SPIN = 2,
SQT_DID_WORK = 4,
};
static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
unsigned long start_jiffies)
{ {
struct io_ring_ctx *ctx = data; unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
const struct cred *old_cred; unsigned int to_submit;
unsigned long timeout;
int ret = 0; int ret = 0;
init_wait(&ctx->sqo_wait_entry); again:
ctx->sqo_wait_entry.func = io_sq_wake_function; if (!list_empty(&ctx->iopoll_list)) {
unsigned nr_events = 0;
complete(&ctx->sq_thread_comp); mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->iopoll_list) && !need_resched())
io_do_iopoll(ctx, &nr_events, 0);
mutex_unlock(&ctx->uring_lock);
}
old_cred = override_creds(ctx->creds); to_submit = io_sqring_entries(ctx);
timeout = jiffies + ctx->sq_thread_idle; /*
while (!kthread_should_park()) { * If submit got -EBUSY, flag us as needing the application
unsigned int to_submit; * to enter the kernel to reap and flush events.
*/
if (!to_submit || ret == -EBUSY || need_resched()) {
/*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
* adding ourselves to the waitqueue, as the unuse/drop
* may sleep.
*/
io_sq_thread_drop_mm();
if (!list_empty(&ctx->iopoll_list)) { /*
unsigned nr_events = 0; * We're polling. If we're within the defined idle
* period, then let us spin without work before going
* to sleep. The exception is if we got EBUSY doing
* more IO, we should wait for the application to
* reap events and wake us up.
*/
if (!list_empty(&ctx->iopoll_list) || need_resched() ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs)))
return SQT_SPIN;
mutex_lock(&ctx->uring_lock); prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry,
if (!list_empty(&ctx->iopoll_list) && !need_resched()) TASK_INTERRUPTIBLE);
io_do_iopoll(ctx, &nr_events, 0);
else /*
timeout = jiffies + ctx->sq_thread_idle; * While doing polled IO, before going to sleep, we need
mutex_unlock(&ctx->uring_lock); * to check if there are new reqs added to iopoll_list,
* it is because reqs may have been punted to io worker
* and will be added to iopoll_list later, hence check
* the iopoll_list again.
*/
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
!list_empty_careful(&ctx->iopoll_list)) {
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
goto again;
} }
io_ring_set_wakeup_flag(ctx);
to_submit = io_sqring_entries(ctx); to_submit = io_sqring_entries(ctx);
if (!to_submit || ret == -EBUSY)
return SQT_IDLE;
}
/* finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
* If submit got -EBUSY, flag us as needing the application io_ring_clear_wakeup_flag(ctx);
* to enter the kernel to reap and flush events.
*/
if (!to_submit || ret == -EBUSY || need_resched()) {
/*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
* adding ourselves to the waitqueue, as the unuse/drop
* may sleep.
*/
io_sq_thread_drop_mm();
/* mutex_lock(&ctx->uring_lock);
* We're polling. If we're within the defined idle if (likely(!percpu_ref_is_dying(&ctx->refs)))
* period, then let us spin without work before going ret = io_submit_sqes(ctx, to_submit);
* to sleep. The exception is if we got EBUSY doing mutex_unlock(&ctx->uring_lock);
* more IO, we should wait for the application to return SQT_DID_WORK;
* reap events and wake us up. }
*/
if (!list_empty(&ctx->iopoll_list) || need_resched() ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
io_run_task_work();
cond_resched();
continue;
}
prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry, static int io_sq_thread(void *data)
TASK_INTERRUPTIBLE); {
struct io_ring_ctx *ctx = data;
const struct cred *old_cred;
unsigned long start_jiffies;
/* init_wait(&ctx->sqo_wait_entry);
* While doing polled IO, before going to sleep, we need ctx->sqo_wait_entry.func = io_sq_wake_function;
* to check if there are new reqs added to iopoll_list,
* it is because reqs may have been punted to io worker
* and will be added to iopoll_list later, hence check
* the iopoll_list again.
*/
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
!list_empty_careful(&ctx->iopoll_list)) {
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
continue;
}
io_ring_set_wakeup_flag(ctx); complete(&ctx->sq_thread_comp);
to_submit = io_sqring_entries(ctx); old_cred = override_creds(ctx->creds);
if (!to_submit || ret == -EBUSY) {
if (kthread_should_park()) {
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
break;
}
if (io_run_task_work()) {
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
io_ring_clear_wakeup_flag(ctx);
continue;
}
schedule();
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
ret = 0; start_jiffies = jiffies;
continue; while (!kthread_should_park()) {
} enum sq_ret ret;
finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
io_ring_clear_wakeup_flag(ctx); ret = __io_sq_thread(ctx, start_jiffies);
switch (ret) {
case SQT_IDLE:
schedule();
start_jiffies = jiffies;
continue;
case SQT_SPIN:
io_run_task_work();
cond_resched();
fallthrough;
case SQT_DID_WORK:
continue;
} }
mutex_lock(&ctx->uring_lock);
if (likely(!percpu_ref_is_dying(&ctx->refs)))
ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
timeout = jiffies + ctx->sq_thread_idle;
} }
io_run_task_work(); io_run_task_work();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment