Commit 2faf852d authored by Jens Axboe's avatar Jens Axboe

io_uring: cleanup fixed file data table references

syzbot reports a use-after-free in io_ring_file_ref_switch() when it
tries to switch back to percpu mode. When we put the final reference to
the table by calling percpu_ref_kill_and_confirm(), we don't want the
zero reference to queue async work for flushing the potentially queued
up items. We currently do a few flush_work(), but they merely paper
around the issue, since the work item may not have been queued yet
depending on the when the percpu-ref callback gets run.

Coming into the file unregister, we know we have the ring quiesced.
io_ring_file_ref_switch() can check for whether or not the ref is dying
or not, and not queue anything async at that point. Once the ref has
been confirmed killed, flush any potential items manually.

Reported-by: syzbot+7caeaea49c2c8a591e3d@syzkaller.appspotmail.com
Fixes: 05f3fb3c ("io_uring: avoid ring quiesce for fixed file set unregister and update")
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent df069d80
...@@ -753,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, ...@@ -753,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_files_update *ip, struct io_uring_files_update *ip,
unsigned nr_args); unsigned nr_args);
static int io_grab_files(struct io_kiocb *req); static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data);
static struct kmem_cache *req_cachep; static struct kmem_cache *req_cachep;
...@@ -5261,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) ...@@ -5261,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
if (!data) if (!data)
return -ENXIO; return -ENXIO;
/* protect against inflight atomic switch, which drops the ref */
percpu_ref_get(&data->refs);
/* wait for existing switches */
flush_work(&data->ref_work);
percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill); percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
wait_for_completion(&data->done);
percpu_ref_put(&data->refs);
/* flush potential new switch */
flush_work(&data->ref_work); flush_work(&data->ref_work);
wait_for_completion(&data->done);
io_ring_file_ref_flush(data);
percpu_ref_exit(&data->refs); percpu_ref_exit(&data->refs);
__io_sqe_files_unregister(ctx); __io_sqe_files_unregister(ctx);
...@@ -5507,14 +5503,11 @@ struct io_file_put { ...@@ -5507,14 +5503,11 @@ struct io_file_put {
struct completion *done; struct completion *done;
}; };
static void io_ring_file_ref_switch(struct work_struct *work) static void io_ring_file_ref_flush(struct fixed_file_data *data)
{ {
struct io_file_put *pfile, *tmp; struct io_file_put *pfile, *tmp;
struct fixed_file_data *data;
struct llist_node *node; struct llist_node *node;
data = container_of(work, struct fixed_file_data, ref_work);
while ((node = llist_del_all(&data->put_llist)) != NULL) { while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) { llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file); io_ring_file_put(data->ctx, pfile->file);
...@@ -5524,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work) ...@@ -5524,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work)
kfree(pfile); kfree(pfile);
} }
} }
}
static void io_ring_file_ref_switch(struct work_struct *work)
{
struct fixed_file_data *data;
data = container_of(work, struct fixed_file_data, ref_work);
io_ring_file_ref_flush(data);
percpu_ref_get(&data->refs); percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs); percpu_ref_switch_to_percpu(&data->refs);
} }
...@@ -5535,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) ...@@ -5535,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref)
data = container_of(ref, struct fixed_file_data, refs); data = container_of(ref, struct fixed_file_data, refs);
/* we can't safely switch from inside this context, punt to wq */ /*
queue_work(system_wq, &data->ref_work); * We can't safely switch from inside this context, punt to wq. If
* the table ref is going away, the table is being unregistered.
* Don't queue up the async work for that case, the caller will
* handle it.
*/
if (!percpu_ref_is_dying(&data->refs))
queue_work(system_wq, &data->ref_work);
} }
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment