Commit 216578e5 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: fix REQ_F_COMP_LOCKED by killing it

REQ_F_COMP_LOCKED is used and implemented in a buggy way. The problem is
that the flag is set before io_put_req() but not cleared after, and if
that wasn't the final reference, the request will be freed with the flag
set from some other context, which may not hold a spinlock. That means
possible races with removing linked timeouts and unsynchronised
completion (e.g. access to CQ).

Instead of fixing REQ_F_COMP_LOCKED, kill the flag and use
task_work_add() to move such requests to a fresh context to free from
it, as was done with __io_free_req_finish().
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 4edf20f9
...@@ -574,7 +574,6 @@ enum { ...@@ -574,7 +574,6 @@ enum {
REQ_F_NOWAIT_BIT, REQ_F_NOWAIT_BIT,
REQ_F_LINK_TIMEOUT_BIT, REQ_F_LINK_TIMEOUT_BIT,
REQ_F_ISREG_BIT, REQ_F_ISREG_BIT,
REQ_F_COMP_LOCKED_BIT,
REQ_F_NEED_CLEANUP_BIT, REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT, REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT, REQ_F_BUFFER_SELECTED_BIT,
...@@ -613,8 +612,6 @@ enum { ...@@ -613,8 +612,6 @@ enum {
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
/* regular file */ /* regular file */
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
/* completion under lock */
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
/* needs cleanup */ /* needs cleanup */
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
/* already went through poll handler */ /* already went through poll handler */
...@@ -963,8 +960,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, ...@@ -963,8 +960,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
struct io_comp_state *cs); struct io_comp_state *cs);
static void io_cqring_fill_event(struct io_kiocb *req, long res); static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void io_put_req(struct io_kiocb *req); static void io_put_req(struct io_kiocb *req);
static void io_put_req_deferred(struct io_kiocb *req, int nr);
static void io_double_put_req(struct io_kiocb *req); static void io_double_put_req(struct io_kiocb *req);
static void __io_double_put_req(struct io_kiocb *req);
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req); static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void __io_queue_linked_timeout(struct io_kiocb *req); static void __io_queue_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req); static void io_queue_linked_timeout(struct io_kiocb *req);
...@@ -1316,9 +1313,8 @@ static void io_kill_timeout(struct io_kiocb *req) ...@@ -1316,9 +1313,8 @@ static void io_kill_timeout(struct io_kiocb *req)
atomic_set(&req->ctx->cq_timeouts, atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1); atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list); list_del_init(&req->timeout.list);
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, 0); io_cqring_fill_event(req, 0);
io_put_req(req); io_put_req_deferred(req, 1);
} }
} }
...@@ -1369,8 +1365,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) ...@@ -1369,8 +1365,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
if (link) { if (link) {
__io_queue_linked_timeout(link); __io_queue_linked_timeout(link);
/* drop submission reference */ /* drop submission reference */
link->flags |= REQ_F_COMP_LOCKED; io_put_req_deferred(link, 1);
io_put_req(link);
} }
kfree(de); kfree(de);
} while (!list_empty(&ctx->defer_list)); } while (!list_empty(&ctx->defer_list));
...@@ -1597,13 +1592,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs) ...@@ -1597,13 +1592,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
req = list_first_entry(&cs->list, struct io_kiocb, compl.list); req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
list_del(&req->compl.list); list_del(&req->compl.list);
__io_cqring_fill_event(req, req->result, req->compl.cflags); __io_cqring_fill_event(req, req->result, req->compl.cflags);
if (!(req->flags & REQ_F_LINK_HEAD)) {
req->flags |= REQ_F_COMP_LOCKED; /*
io_put_req(req); * io_free_req() doesn't care about completion_lock unless one
} else { * of these flags is set. REQ_F_WORK_INITIALIZED is in the list
* because of a potential deadlock with req->work.fs->lock
*/
if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
|REQ_F_WORK_INITIALIZED)) {
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
io_put_req(req); io_put_req(req);
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
} else {
io_put_req(req);
} }
} }
io_commit_cqring(ctx); io_commit_cqring(ctx);
...@@ -1702,10 +1703,14 @@ static void io_dismantle_req(struct io_kiocb *req) ...@@ -1702,10 +1703,14 @@ static void io_dismantle_req(struct io_kiocb *req)
io_req_clean_work(req); io_req_clean_work(req);
} }
static void __io_free_req_finish(struct io_kiocb *req) static void __io_free_req(struct io_kiocb *req)
{ {
struct io_uring_task *tctx = req->task->io_uring; struct io_uring_task *tctx;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx;
io_dismantle_req(req);
tctx = req->task->io_uring;
ctx = req->ctx;
atomic_long_inc(&tctx->req_complete); atomic_long_inc(&tctx->req_complete);
if (tctx->in_idle) if (tctx->in_idle)
...@@ -1719,33 +1724,6 @@ static void __io_free_req_finish(struct io_kiocb *req) ...@@ -1719,33 +1724,6 @@ static void __io_free_req_finish(struct io_kiocb *req)
percpu_ref_put(&ctx->refs); percpu_ref_put(&ctx->refs);
} }
static void io_req_task_file_table_put(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
io_dismantle_req(req);
__io_free_req_finish(req);
}
static void __io_free_req(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_COMP_LOCKED)) {
io_dismantle_req(req);
__io_free_req_finish(req);
} else {
int ret;
init_task_work(&req->task_work, io_req_task_file_table_put);
ret = task_work_add(req->task, &req->task_work, TWA_RESUME);
if (unlikely(ret)) {
struct task_struct *tsk;
tsk = io_wq_get_task(req->ctx->io_wq);
task_work_add(tsk, &req->task_work, 0);
}
}
}
static bool io_link_cancel_timeout(struct io_kiocb *req) static bool io_link_cancel_timeout(struct io_kiocb *req)
{ {
struct io_timeout_data *io = req->async_data; struct io_timeout_data *io = req->async_data;
...@@ -1754,11 +1732,10 @@ static bool io_link_cancel_timeout(struct io_kiocb *req) ...@@ -1754,11 +1732,10 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)
ret = hrtimer_try_to_cancel(&io->timer); ret = hrtimer_try_to_cancel(&io->timer);
if (ret != -1) { if (ret != -1) {
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, -ECANCELED); io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(ctx); io_commit_cqring(ctx);
req->flags &= ~REQ_F_LINK_HEAD; req->flags &= ~REQ_F_LINK_HEAD;
io_put_req(req); io_put_req_deferred(req, 1);
return true; return true;
} }
...@@ -1785,17 +1762,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req) ...@@ -1785,17 +1762,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
static void io_kill_linked_timeout(struct io_kiocb *req) static void io_kill_linked_timeout(struct io_kiocb *req)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
bool wake_ev;
if (!(req->flags & REQ_F_COMP_LOCKED)) {
unsigned long flags; unsigned long flags;
bool wake_ev;
spin_lock_irqsave(&ctx->completion_lock, flags); spin_lock_irqsave(&ctx->completion_lock, flags);
wake_ev = __io_kill_linked_timeout(req); wake_ev = __io_kill_linked_timeout(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else {
wake_ev = __io_kill_linked_timeout(req);
}
if (wake_ev) if (wake_ev)
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
...@@ -1835,27 +1807,29 @@ static void __io_fail_links(struct io_kiocb *req) ...@@ -1835,27 +1807,29 @@ static void __io_fail_links(struct io_kiocb *req)
trace_io_uring_fail_link(req, link); trace_io_uring_fail_link(req, link);
io_cqring_fill_event(link, -ECANCELED); io_cqring_fill_event(link, -ECANCELED);
link->flags |= REQ_F_COMP_LOCKED;
__io_double_put_req(link); /*
* It's ok to free under spinlock as they're not linked anymore,
* but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
* work.fs->lock.
*/
if (link->flags & REQ_F_WORK_INITIALIZED)
io_put_req_deferred(link, 2);
else
io_double_put_req(link);
} }
io_commit_cqring(ctx); io_commit_cqring(ctx);
io_cqring_ev_posted(ctx);
} }
static void io_fail_links(struct io_kiocb *req) static void io_fail_links(struct io_kiocb *req)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
if (!(req->flags & REQ_F_COMP_LOCKED)) {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags); spin_lock_irqsave(&ctx->completion_lock, flags);
__io_fail_links(req); __io_fail_links(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else {
__io_fail_links(req);
}
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
} }
...@@ -2069,6 +2043,34 @@ static void io_put_req(struct io_kiocb *req) ...@@ -2069,6 +2043,34 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req); io_free_req(req);
} }
static void io_put_req_deferred_cb(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
io_free_req(req);
}
static void io_free_req_deferred(struct io_kiocb *req)
{
int ret;
init_task_work(&req->task_work, io_put_req_deferred_cb);
ret = io_req_task_work_add(req, true);
if (unlikely(ret)) {
struct task_struct *tsk;
tsk = io_wq_get_task(req->ctx->io_wq);
task_work_add(tsk, &req->task_work, 0);
wake_up_process(tsk);
}
}
static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
{
if (refcount_sub_and_test(refs, &req->refs))
io_free_req_deferred(req);
}
static struct io_wq_work *io_steal_work(struct io_kiocb *req) static struct io_wq_work *io_steal_work(struct io_kiocb *req)
{ {
struct io_kiocb *nxt; struct io_kiocb *nxt;
...@@ -2085,17 +2087,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req) ...@@ -2085,17 +2087,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
return nxt ? &nxt->work : NULL; return nxt ? &nxt->work : NULL;
} }
/*
* Must only be used if we don't need to care about links, usually from
* within the completion handling itself.
*/
static void __io_double_put_req(struct io_kiocb *req)
{
/* drop both submit and complete references */
if (refcount_sub_and_test(2, &req->refs))
__io_free_req(req);
}
static void io_double_put_req(struct io_kiocb *req) static void io_double_put_req(struct io_kiocb *req)
{ {
/* drop both submit and complete references */ /* drop both submit and complete references */
...@@ -5127,9 +5118,8 @@ static bool io_poll_remove_one(struct io_kiocb *req) ...@@ -5127,9 +5118,8 @@ static bool io_poll_remove_one(struct io_kiocb *req)
if (do_complete) { if (do_complete) {
io_cqring_fill_event(req, -ECANCELED); io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(req->ctx); io_commit_cqring(req->ctx);
req->flags |= REQ_F_COMP_LOCKED;
req_set_fail_links(req); req_set_fail_links(req);
io_put_req(req); io_put_req_deferred(req, 1);
} }
return do_complete; return do_complete;
...@@ -5311,9 +5301,8 @@ static int __io_timeout_cancel(struct io_kiocb *req) ...@@ -5311,9 +5301,8 @@ static int __io_timeout_cancel(struct io_kiocb *req)
list_del_init(&req->timeout.list); list_del_init(&req->timeout.list);
req_set_fail_links(req); req_set_fail_links(req);
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, -ECANCELED); io_cqring_fill_event(req, -ECANCELED);
io_put_req(req); io_put_req_deferred(req, 1);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment