Commit f6f360ae authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "This one looks a bit bigger than it is, but that's mainly because 2/3
  of it is enabling IORING_OP_CLOSE to close direct file descriptors.

  We've had a few folks using them and finding it confusing that the way
  to close them is through using -1 for file update, this just brings
  API symmetry for direct descriptors. Hence I think we should just do
  this now and have a better API for 5.15 release. There's some room for
  de-duplicating the close code, but we're leaving that for the next
  merge window.

  Outside of that, just small fixes:

   - Poll race fixes (Hao)

   - io-wq core dump exit fix (me)

   - Reschedule around potentially intensive tctx and buffer iterators
     on teardown (me)

   - Fix for always ending up punting files update to io-wq (me)

   - Put the provided buffer meta data under memcg accounting (me)

   - Tweak for io_write(), removing dead code that was added with the
     iterator changes in this release (Pavel)"

* tag 'io_uring-5.15-2021-09-25' of git://git.kernel.dk/linux-block:
  io_uring: make OP_CLOSE consistent with direct open
  io_uring: kill extra checks in io_write()
  io_uring: don't punt files update to io-wq unconditionally
  io_uring: put provided buffer meta data under memcg accounting
  io_uring: allow conditional reschedule for intensive iterators
  io_uring: fix potential req refcount underflow
  io_uring: fix missing set of EPOLLONESHOT for CQ ring overflow
  io_uring: fix race between poll completion and cancel_hash insertion
  io-wq: ensure we exit if thread group is exiting
parents 2d70de4e 7df778be
...@@ -584,7 +584,8 @@ static int io_wqe_worker(void *data) ...@@ -584,7 +584,8 @@ static int io_wqe_worker(void *data)
if (!get_signal(&ksig)) if (!get_signal(&ksig))
continue; continue;
if (fatal_signal_pending(current)) if (fatal_signal_pending(current) ||
signal_group_exit(current->signal))
break; break;
continue; continue;
} }
......
...@@ -502,6 +502,7 @@ struct io_poll_update { ...@@ -502,6 +502,7 @@ struct io_poll_update {
struct io_close { struct io_close {
struct file *file; struct file *file;
int fd; int fd;
u32 file_slot;
}; };
struct io_timeout_data { struct io_timeout_data {
...@@ -1098,6 +1099,8 @@ static int io_req_prep_async(struct io_kiocb *req); ...@@ -1098,6 +1099,8 @@ static int io_req_prep_async(struct io_kiocb *req);
static int io_install_fixed_file(struct io_kiocb *req, struct file *file, static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index); unsigned int issue_flags, u32 slot_index);
static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static struct kmem_cache *req_cachep; static struct kmem_cache *req_cachep;
...@@ -3605,7 +3608,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) ...@@ -3605,7 +3608,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
iov_iter_save_state(iter, state); iov_iter_save_state(iter, state);
} }
req->result = iov_iter_count(iter); req->result = iov_iter_count(iter);
ret2 = 0;
/* Ensure we clear previously set non-block flag */ /* Ensure we clear previously set non-block flag */
if (!force_nonblock) if (!force_nonblock)
...@@ -3670,8 +3672,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) ...@@ -3670,8 +3672,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
} else { } else {
copy_iov: copy_iov:
iov_iter_restore(iter, state); iov_iter_restore(iter, state);
if (ret2 > 0)
iov_iter_advance(iter, ret2);
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
return ret ?: -EAGAIN; return ret ?: -EAGAIN;
} }
...@@ -4387,7 +4387,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) ...@@ -4387,7 +4387,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
int i, bid = pbuf->bid; int i, bid = pbuf->bid;
for (i = 0; i < pbuf->nbufs; i++) { for (i = 0; i < pbuf->nbufs; i++) {
buf = kmalloc(sizeof(*buf), GFP_KERNEL); buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
if (!buf) if (!buf)
break; break;
...@@ -4594,12 +4594,16 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -4594,12 +4594,16 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) sqe->rw_flags || sqe->buf_index)
return -EINVAL; return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE) if (req->flags & REQ_F_FIXED_FILE)
return -EBADF; return -EBADF;
req->close.fd = READ_ONCE(sqe->fd); req->close.fd = READ_ONCE(sqe->fd);
req->close.file_slot = READ_ONCE(sqe->file_index);
if (req->close.file_slot && req->close.fd)
return -EINVAL;
return 0; return 0;
} }
...@@ -4611,6 +4615,11 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags) ...@@ -4611,6 +4615,11 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
struct file *file = NULL; struct file *file = NULL;
int ret = -EBADF; int ret = -EBADF;
if (req->close.file_slot) {
ret = io_close_fixed(req, issue_flags);
goto err;
}
spin_lock(&files->file_lock); spin_lock(&files->file_lock);
fdt = files_fdtable(files); fdt = files_fdtable(files);
if (close->fd >= fdt->max_fds) { if (close->fd >= fdt->max_fds) {
...@@ -5338,7 +5347,7 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) ...@@ -5338,7 +5347,7 @@ static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
if (req->poll.events & EPOLLONESHOT) if (req->poll.events & EPOLLONESHOT)
flags = 0; flags = 0;
if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
req->poll.done = true; req->poll.events |= EPOLLONESHOT;
flags = 0; flags = 0;
} }
if (flags & IORING_CQE_F_MORE) if (flags & IORING_CQE_F_MORE)
...@@ -5367,10 +5376,15 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) ...@@ -5367,10 +5376,15 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
} else { } else {
bool done; bool done;
if (req->poll.done) {
spin_unlock(&ctx->completion_lock);
return;
}
done = __io_poll_complete(req, req->result); done = __io_poll_complete(req, req->result);
if (done) { if (done) {
io_poll_remove_double(req); io_poll_remove_double(req);
hash_del(&req->hash_node); hash_del(&req->hash_node);
req->poll.done = true;
} else { } else {
req->result = 0; req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait); add_wait_queue(req->poll.head, &req->poll.wait);
...@@ -5508,6 +5522,7 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked) ...@@ -5508,6 +5522,7 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked)
hash_del(&req->hash_node); hash_del(&req->hash_node);
io_poll_remove_double(req); io_poll_remove_double(req);
apoll->poll.done = true;
spin_unlock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock);
if (!READ_ONCE(apoll->poll.canceled)) if (!READ_ONCE(apoll->poll.canceled))
...@@ -5828,6 +5843,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) ...@@ -5828,6 +5843,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct io_poll_table ipt; struct io_poll_table ipt;
__poll_t mask; __poll_t mask;
bool done;
ipt.pt._qproc = io_poll_queue_proc; ipt.pt._qproc = io_poll_queue_proc;
...@@ -5836,13 +5852,13 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) ...@@ -5836,13 +5852,13 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
if (mask) { /* no async, we'd stolen it */ if (mask) { /* no async, we'd stolen it */
ipt.error = 0; ipt.error = 0;
io_poll_complete(req, mask); done = io_poll_complete(req, mask);
} }
spin_unlock(&ctx->completion_lock); spin_unlock(&ctx->completion_lock);
if (mask) { if (mask) {
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
if (poll->events & EPOLLONESHOT) if (done)
io_put_req(req); io_put_req(req);
} }
return ipt.error; return ipt.error;
...@@ -6333,19 +6349,16 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6333,19 +6349,16 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
struct io_uring_rsrc_update2 up; struct io_uring_rsrc_update2 up;
int ret; int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
up.offset = req->rsrc_update.offset; up.offset = req->rsrc_update.offset;
up.data = req->rsrc_update.arg; up.data = req->rsrc_update.arg;
up.nr = 0; up.nr = 0;
up.tags = 0; up.tags = 0;
up.resv = 0; up.resv = 0;
mutex_lock(&ctx->uring_lock); io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
&up, req->rsrc_update.nr_args); &up, req->rsrc_update.nr_args);
mutex_unlock(&ctx->uring_lock); io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
if (ret < 0) if (ret < 0)
req_set_fail(req); req_set_fail(req);
...@@ -8400,6 +8413,44 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, ...@@ -8400,6 +8413,44 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
return ret; return ret;
} }
static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
{
unsigned int offset = req->close.file_slot - 1;
struct io_ring_ctx *ctx = req->ctx;
struct io_fixed_file *file_slot;
struct file *file;
int ret, i;
io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
ret = -ENXIO;
if (unlikely(!ctx->file_data))
goto out;
ret = -EINVAL;
if (offset >= ctx->nr_user_files)
goto out;
ret = io_rsrc_node_switch_start(ctx);
if (ret)
goto out;
i = array_index_nospec(offset, ctx->nr_user_files);
file_slot = io_fixed_file_slot(&ctx->file_table, i);
ret = -EBADF;
if (!file_slot->file_ptr)
goto out;
file = (struct file *)(file_slot->file_ptr & FFS_MASK);
ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
if (ret)
goto out;
file_slot->file_ptr = 0;
io_rsrc_node_switch(ctx, ctx->file_data);
ret = 0;
out:
io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
return ret;
}
static int __io_sqe_files_update(struct io_ring_ctx *ctx, static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up, struct io_uring_rsrc_update2 *up,
unsigned nr_args) unsigned nr_args)
...@@ -9166,8 +9217,10 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) ...@@ -9166,8 +9217,10 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
struct io_buffer *buf; struct io_buffer *buf;
unsigned long index; unsigned long index;
xa_for_each(&ctx->io_buffers, index, buf) xa_for_each(&ctx->io_buffers, index, buf) {
__io_remove_buffers(ctx, buf, index, -1U); __io_remove_buffers(ctx, buf, index, -1U);
cond_resched();
}
} }
static void io_req_cache_free(struct list_head *list) static void io_req_cache_free(struct list_head *list)
...@@ -9665,8 +9718,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) ...@@ -9665,8 +9718,10 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
struct io_tctx_node *node; struct io_tctx_node *node;
unsigned long index; unsigned long index;
xa_for_each(&tctx->xa, index, node) xa_for_each(&tctx->xa, index, node) {
io_uring_del_tctx_node(index); io_uring_del_tctx_node(index);
cond_resched();
}
if (wq) { if (wq) {
/* /*
* Must be after io_uring_del_task_file() (removes nodes under * Must be after io_uring_del_task_file() (removes nodes under
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment