Commit 71a85387 authored by Jens Axboe's avatar Jens Axboe

io-wq: check for wq exit after adding new worker task_work

We check IO_WQ_BIT_EXIT before attempting to create a new worker, and
wq exit cancels pending work if we have any. But it's possible to have
a race between the two, where creation checks exit finding it not set,
but we're in the process of exiting. The exit side will cancel pending
creation task_work, but there's a gap where we add task_work after we've
canceled existing creations at exit time.

Fix this by checking the EXIT bit post adding the creation task_work.
If it's set, run the same cancelation that exit does.

Reported-and-tested-by: syzbot+b60c982cb0efc5e05a47@syzkaller.appspotmail.com
Reviewed-by: default avatarHao Xu <haoxu@linux.alibaba.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 78a78060
...@@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe, ...@@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct, struct io_wqe_acct *acct,
struct io_cb_cancel_data *match); struct io_cb_cancel_data *match);
static void create_worker_cb(struct callback_head *cb); static void create_worker_cb(struct callback_head *cb);
static void io_wq_cancel_tw_create(struct io_wq *wq);
static bool io_worker_get(struct io_worker *worker) static bool io_worker_get(struct io_worker *worker)
{ {
...@@ -357,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker, ...@@ -357,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker,
test_and_set_bit_lock(0, &worker->create_state)) test_and_set_bit_lock(0, &worker->create_state))
goto fail_release; goto fail_release;
atomic_inc(&wq->worker_refs);
init_task_work(&worker->create_work, func); init_task_work(&worker->create_work, func);
worker->create_index = acct->index; worker->create_index = acct->index;
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
/*
* EXIT may have been set after checking it above, check after
* adding the task_work and remove any creation item if it is
* now set. wq exit does that too, but we can have added this
* work item after we canceled in io_wq_exit_workers().
*/
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
io_wq_cancel_tw_create(wq);
io_worker_ref_put(wq);
return true; return true;
}
io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state); clear_bit_unlock(0, &worker->create_state);
fail_release: fail_release:
io_worker_release(worker); io_worker_release(worker);
...@@ -1196,13 +1209,9 @@ void io_wq_exit_start(struct io_wq *wq) ...@@ -1196,13 +1209,9 @@ void io_wq_exit_start(struct io_wq *wq)
set_bit(IO_WQ_BIT_EXIT, &wq->state); set_bit(IO_WQ_BIT_EXIT, &wq->state);
} }
static void io_wq_exit_workers(struct io_wq *wq) static void io_wq_cancel_tw_create(struct io_wq *wq)
{ {
struct callback_head *cb; struct callback_head *cb;
int node;
if (!wq->task)
return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker; struct io_worker *worker;
...@@ -1210,6 +1219,16 @@ static void io_wq_exit_workers(struct io_wq *wq) ...@@ -1210,6 +1219,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
worker = container_of(cb, struct io_worker, create_work); worker = container_of(cb, struct io_worker, create_work);
io_worker_cancel_cb(worker); io_worker_cancel_cb(worker);
} }
}
static void io_wq_exit_workers(struct io_wq *wq)
{
int node;
if (!wq->task)
return;
io_wq_cancel_tw_create(wq);
rcu_read_lock(); rcu_read_lock();
for_each_node(node) { for_each_node(node) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment