Commit 5bd831a4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.5-20191212' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - A tweak to IOSQE_IO_LINK (also marked for stable) to allow links that
   don't sever if the result is < 0.

   This is mostly for linked timeouts, where if we ask for a pure
   timeout we always get -ETIME. This makes links useless for that case,
   hence allow a case where it works.

 - Five minor optimizations to fix and improve cases that regressed
   since v5.4.

 - An SQTHREAD locking fix.

 - A sendmsg/recvmsg iov assignment fix.

 - Net fix where read_iter/write_iter don't honor IOCB_NOWAIT, and
   subsequently ensuring that works for io_uring.

 - Fix a case where for an invalid opcode we might return -EBADF instead
   of -EINVAL, if the ->fd of that sqe was set to an invalid fd value.

* tag 'io_uring-5.5-20191212' of git://git.kernel.dk/linux-block:
  io_uring: ensure we return -EINVAL on unknown opcode
  io_uring: add sockets to list of files that support non-blocking issue
  net: make socket read/write_iter() honor IOCB_NOWAIT
  io_uring: only hash regular files for async work execution
  io_uring: run next sqe inline if possible
  io_uring: don't dynamically allocate poll data
  io_uring: deferred send/recvmsg should assign iov
  io_uring: sqthread should grab ctx->uring_lock for submissions
  io-wq: briefly spin for new work after finishing work
  io-wq: remove worker->wait waitqueue
  io_uring: allow unbreakable links
parents 15da849c 9e3aa61a
...@@ -49,7 +49,6 @@ struct io_worker { ...@@ -49,7 +49,6 @@ struct io_worker {
struct hlist_nulls_node nulls_node; struct hlist_nulls_node nulls_node;
struct list_head all_list; struct list_head all_list;
struct task_struct *task; struct task_struct *task;
wait_queue_head_t wait;
struct io_wqe *wqe; struct io_wqe *wqe;
struct io_wq_work *cur_work; struct io_wq_work *cur_work;
...@@ -258,7 +257,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe) ...@@ -258,7 +257,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
worker = hlist_nulls_entry(n, struct io_worker, nulls_node); worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
if (io_worker_get(worker)) { if (io_worker_get(worker)) {
wake_up(&worker->wait); wake_up_process(worker->task);
io_worker_release(worker); io_worker_release(worker);
return true; return true;
} }
...@@ -492,28 +491,46 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -492,28 +491,46 @@ static void io_worker_handle_work(struct io_worker *worker)
} while (1); } while (1);
} }
static inline void io_worker_spin_for_work(struct io_wqe *wqe)
{
int i = 0;
while (++i < 1000) {
if (io_wqe_run_queue(wqe))
break;
if (need_resched())
break;
cpu_relax();
}
}
static int io_wqe_worker(void *data) static int io_wqe_worker(void *data)
{ {
struct io_worker *worker = data; struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe; struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq; struct io_wq *wq = wqe->wq;
DEFINE_WAIT(wait); bool did_work;
io_worker_start(wqe, worker); io_worker_start(wqe, worker);
did_work = false;
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
prepare_to_wait(&worker->wait, &wait, TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
loop:
if (did_work)
io_worker_spin_for_work(wqe);
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) { if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
io_worker_handle_work(worker); io_worker_handle_work(worker);
continue; did_work = true;
goto loop;
} }
did_work = false;
/* drops the lock on success, retry */ /* drops the lock on success, retry */
if (__io_worker_idle(wqe, worker)) { if (__io_worker_idle(wqe, worker)) {
__release(&wqe->lock); __release(&wqe->lock);
continue; goto loop;
} }
spin_unlock_irq(&wqe->lock); spin_unlock_irq(&wqe->lock);
if (signal_pending(current)) if (signal_pending(current))
...@@ -526,8 +543,6 @@ static int io_wqe_worker(void *data) ...@@ -526,8 +543,6 @@ static int io_wqe_worker(void *data)
break; break;
} }
finish_wait(&worker->wait, &wait);
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
if (!wq_list_empty(&wqe->work_list)) if (!wq_list_empty(&wqe->work_list))
...@@ -589,7 +604,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ...@@ -589,7 +604,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
refcount_set(&worker->ref, 1); refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL; worker->nulls_node.pprev = NULL;
init_waitqueue_head(&worker->wait);
worker->wqe = wqe; worker->wqe = wqe;
spin_lock_init(&worker->lock); spin_lock_init(&worker->lock);
......
...@@ -35,7 +35,8 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, ...@@ -35,7 +35,8 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list) struct io_wq_work_list *list)
{ {
if (!list->first) { if (!list->first) {
list->first = list->last = node; list->last = node;
WRITE_ONCE(list->first, node);
} else { } else {
list->last->next = node; list->last->next = node;
list->last = node; list->last = node;
...@@ -47,7 +48,7 @@ static inline void wq_node_del(struct io_wq_work_list *list, ...@@ -47,7 +48,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
struct io_wq_work_node *prev) struct io_wq_work_node *prev)
{ {
if (node == list->first) if (node == list->first)
list->first = node->next; WRITE_ONCE(list->first, node->next);
if (node == list->last) if (node == list->last)
list->last = prev; list->last = prev;
if (prev) if (prev)
...@@ -58,7 +59,7 @@ static inline void wq_node_del(struct io_wq_work_list *list, ...@@ -58,7 +59,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
#define wq_list_for_each(pos, prv, head) \ #define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
#define wq_list_empty(list) ((list)->first == NULL) #define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
#define INIT_WQ_LIST(list) do { \ #define INIT_WQ_LIST(list) do { \
(list)->first = NULL; \ (list)->first = NULL; \
(list)->last = NULL; \ (list)->last = NULL; \
......
...@@ -293,7 +293,7 @@ struct io_poll_iocb { ...@@ -293,7 +293,7 @@ struct io_poll_iocb {
__poll_t events; __poll_t events;
bool done; bool done;
bool canceled; bool canceled;
struct wait_queue_entry *wait; struct wait_queue_entry wait;
}; };
struct io_timeout_data { struct io_timeout_data {
...@@ -377,6 +377,7 @@ struct io_kiocb { ...@@ -377,6 +377,7 @@ struct io_kiocb {
#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */ #define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
#define REQ_F_INFLIGHT 16384 /* on inflight list */ #define REQ_F_INFLIGHT 16384 /* on inflight list */
#define REQ_F_COMP_LOCKED 32768 /* completion under lock */ #define REQ_F_COMP_LOCKED 32768 /* completion under lock */
#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
u64 user_data; u64 user_data;
u32 result; u32 result;
u32 sequence; u32 sequence;
...@@ -580,7 +581,9 @@ static inline bool io_prep_async_work(struct io_kiocb *req, ...@@ -580,7 +581,9 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
switch (req->sqe->opcode) { switch (req->sqe->opcode) {
case IORING_OP_WRITEV: case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE_FIXED:
do_hashed = true; /* only regular files should be hashed for writes */
if (req->flags & REQ_F_ISREG)
do_hashed = true;
/* fall-through */ /* fall-through */
case IORING_OP_READV: case IORING_OP_READV:
case IORING_OP_READ_FIXED: case IORING_OP_READ_FIXED:
...@@ -1292,6 +1295,12 @@ static void kiocb_end_write(struct io_kiocb *req) ...@@ -1292,6 +1295,12 @@ static void kiocb_end_write(struct io_kiocb *req)
file_end_write(req->file); file_end_write(req->file);
} }
static inline void req_set_fail_links(struct io_kiocb *req)
{
if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
req->flags |= REQ_F_FAIL_LINK;
}
static void io_complete_rw_common(struct kiocb *kiocb, long res) static void io_complete_rw_common(struct kiocb *kiocb, long res)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
...@@ -1299,8 +1308,8 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) ...@@ -1299,8 +1308,8 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result) if (res != req->result)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_cqring_add_event(req, res); io_cqring_add_event(req, res);
} }
...@@ -1330,8 +1339,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) ...@@ -1330,8 +1339,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result) if (res != req->result)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
req->result = res; req->result = res;
if (res != -EAGAIN) if (res != -EAGAIN)
req->flags |= REQ_F_IOPOLL_COMPLETED; req->flags |= REQ_F_IOPOLL_COMPLETED;
...@@ -1422,7 +1431,7 @@ static bool io_file_supports_async(struct file *file) ...@@ -1422,7 +1431,7 @@ static bool io_file_supports_async(struct file *file)
{ {
umode_t mode = file_inode(file)->i_mode; umode_t mode = file_inode(file)->i_mode;
if (S_ISBLK(mode) || S_ISCHR(mode)) if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISSOCK(mode))
return true; return true;
if (S_ISREG(mode) && file->f_op != &io_uring_fops) if (S_ISREG(mode) && file->f_op != &io_uring_fops)
return true; return true;
...@@ -1858,7 +1867,9 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1858,7 +1867,9 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov; goto copy_iov;
} }
if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT)) /* file path doesn't support NOWAIT for non-direct_IO */
if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
(req->flags & REQ_F_ISREG))
goto copy_iov; goto copy_iov;
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(&iter);
...@@ -1956,8 +1967,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -1956,8 +1967,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX, end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC); fsync_flags & IORING_FSYNC_DATASYNC);
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
return 0; return 0;
...@@ -2003,8 +2014,8 @@ static int io_sync_file_range(struct io_kiocb *req, ...@@ -2003,8 +2014,8 @@ static int io_sync_file_range(struct io_kiocb *req,
ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
return 0; return 0;
...@@ -2019,6 +2030,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io) ...@@ -2019,6 +2030,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
flags = READ_ONCE(sqe->msg_flags); flags = READ_ONCE(sqe->msg_flags);
msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr); msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov); return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov);
#else #else
return 0; return 0;
...@@ -2054,7 +2066,6 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2054,7 +2066,6 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} else { } else {
kmsg = &io.msg.msg; kmsg = &io.msg.msg;
kmsg->msg_name = &addr; kmsg->msg_name = &addr;
io.msg.iov = io.msg.fast_iov;
ret = io_sendmsg_prep(req, &io); ret = io_sendmsg_prep(req, &io);
if (ret) if (ret)
goto out; goto out;
...@@ -2079,8 +2090,8 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2079,8 +2090,8 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
out: out:
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
return 0; return 0;
#else #else
...@@ -2097,6 +2108,7 @@ static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io) ...@@ -2097,6 +2108,7 @@ static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
flags = READ_ONCE(sqe->msg_flags); flags = READ_ONCE(sqe->msg_flags);
msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr); msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr, return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr,
&io->msg.iov); &io->msg.iov);
#else #else
...@@ -2136,7 +2148,6 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2136,7 +2148,6 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} else { } else {
kmsg = &io.msg.msg; kmsg = &io.msg.msg;
kmsg->msg_name = &addr; kmsg->msg_name = &addr;
io.msg.iov = io.msg.fast_iov;
ret = io_recvmsg_prep(req, &io); ret = io_recvmsg_prep(req, &io);
if (ret) if (ret)
goto out; goto out;
...@@ -2161,8 +2172,8 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2161,8 +2172,8 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
out: out:
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
return 0; return 0;
#else #else
...@@ -2196,8 +2207,8 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2196,8 +2207,8 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} }
if (ret == -ERESTARTSYS) if (ret == -ERESTARTSYS)
ret = -EINTR; ret = -EINTR;
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
return 0; return 0;
...@@ -2263,8 +2274,8 @@ static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2263,8 +2274,8 @@ static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret == -ERESTARTSYS) if (ret == -ERESTARTSYS)
ret = -EINTR; ret = -EINTR;
out: out:
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
return 0; return 0;
...@@ -2279,8 +2290,8 @@ static void io_poll_remove_one(struct io_kiocb *req) ...@@ -2279,8 +2290,8 @@ static void io_poll_remove_one(struct io_kiocb *req)
spin_lock(&poll->head->lock); spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true); WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait->entry)) { if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait->entry); list_del_init(&poll->wait.entry);
io_queue_async_work(req); io_queue_async_work(req);
} }
spin_unlock(&poll->head->lock); spin_unlock(&poll->head->lock);
...@@ -2340,8 +2351,8 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -2340,8 +2351,8 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
...@@ -2351,7 +2362,6 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error) ...@@ -2351,7 +2362,6 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
req->poll.done = true; req->poll.done = true;
kfree(req->poll.wait);
if (error) if (error)
io_cqring_fill_event(req, error); io_cqring_fill_event(req, error);
else else
...@@ -2389,7 +2399,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr) ...@@ -2389,7 +2399,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
*/ */
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
if (!mask && ret != -ECANCELED) { if (!mask && ret != -ECANCELED) {
add_wait_queue(poll->head, poll->wait); add_wait_queue(poll->head, &poll->wait);
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
return; return;
} }
...@@ -2399,8 +2409,8 @@ static void io_poll_complete_work(struct io_wq_work **workptr) ...@@ -2399,8 +2409,8 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
if (ret < 0 && req->flags & REQ_F_LINK) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_put_req_find_next(req, &nxt); io_put_req_find_next(req, &nxt);
if (nxt) if (nxt)
*workptr = &nxt->work; *workptr = &nxt->work;
...@@ -2419,7 +2429,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -2419,7 +2429,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & poll->events)) if (mask && !(mask & poll->events))
return 0; return 0;
list_del_init(&poll->wait->entry); list_del_init(&poll->wait.entry);
/* /*
* Run completion inline if we can. We're using trylock here because * Run completion inline if we can. We're using trylock here because
...@@ -2460,7 +2470,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, ...@@ -2460,7 +2470,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
pt->error = 0; pt->error = 0;
pt->req->poll.head = head; pt->req->poll.head = head;
add_wait_queue(head, pt->req->poll.wait); add_wait_queue(head, &pt->req->poll.wait);
} }
static void io_poll_req_insert(struct io_kiocb *req) static void io_poll_req_insert(struct io_kiocb *req)
...@@ -2489,10 +2499,6 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2489,10 +2499,6 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (!poll->file) if (!poll->file)
return -EBADF; return -EBADF;
poll->wait = kmalloc(sizeof(*poll->wait), GFP_KERNEL);
if (!poll->wait)
return -ENOMEM;
req->io = NULL; req->io = NULL;
INIT_IO_WORK(&req->work, io_poll_complete_work); INIT_IO_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events); events = READ_ONCE(sqe->poll_events);
...@@ -2509,9 +2515,9 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2509,9 +2515,9 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ipt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ ipt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
/* initialized the list so that we can do list_empty checks */ /* initialized the list so that we can do list_empty checks */
INIT_LIST_HEAD(&poll->wait->entry); INIT_LIST_HEAD(&poll->wait.entry);
init_waitqueue_func_entry(poll->wait, io_poll_wake); init_waitqueue_func_entry(&poll->wait, io_poll_wake);
poll->wait->private = poll; poll->wait.private = poll;
INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->list);
...@@ -2520,14 +2526,14 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2520,14 +2526,14 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
if (likely(poll->head)) { if (likely(poll->head)) {
spin_lock(&poll->head->lock); spin_lock(&poll->head->lock);
if (unlikely(list_empty(&poll->wait->entry))) { if (unlikely(list_empty(&poll->wait.entry))) {
if (ipt.error) if (ipt.error)
cancel = true; cancel = true;
ipt.error = 0; ipt.error = 0;
mask = 0; mask = 0;
} }
if (mask || ipt.error) if (mask || ipt.error)
list_del_init(&poll->wait->entry); list_del_init(&poll->wait.entry);
else if (cancel) else if (cancel)
WRITE_ONCE(poll->canceled, true); WRITE_ONCE(poll->canceled, true);
else if (!poll->done) /* actually waiting for an event */ else if (!poll->done) /* actually waiting for an event */
...@@ -2582,8 +2588,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) ...@@ -2582,8 +2588,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
if (req->flags & REQ_F_LINK) req_set_fail_links(req);
req->flags |= REQ_F_FAIL_LINK;
io_put_req(req); io_put_req(req);
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
...@@ -2608,8 +2613,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) ...@@ -2608,8 +2613,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
if (ret == -1) if (ret == -1)
return -EALREADY; return -EALREADY;
if (req->flags & REQ_F_LINK) req_set_fail_links(req);
req->flags |= REQ_F_FAIL_LINK;
io_cqring_fill_event(req, -ECANCELED); io_cqring_fill_event(req, -ECANCELED);
io_put_req(req); io_put_req(req);
return 0; return 0;
...@@ -2640,8 +2644,8 @@ static int io_timeout_remove(struct io_kiocb *req, ...@@ -2640,8 +2644,8 @@ static int io_timeout_remove(struct io_kiocb *req,
io_commit_cqring(ctx); io_commit_cqring(ctx);
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
if (ret < 0 && req->flags & REQ_F_LINK) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
...@@ -2822,8 +2826,8 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx, ...@@ -2822,8 +2826,8 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx); io_cqring_ev_posted(ctx);
if (ret < 0 && (req->flags & REQ_F_LINK)) if (ret < 0)
req->flags |= REQ_F_FAIL_LINK; req_set_fail_links(req);
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
} }
...@@ -2991,12 +2995,7 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2991,12 +2995,7 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
if (req->result == -EAGAIN) if (req->result == -EAGAIN)
return -EAGAIN; return -EAGAIN;
/* workqueue context doesn't hold uring_lock, grab it now */
if (req->in_async)
mutex_lock(&ctx->uring_lock);
io_iopoll_req_issued(req); io_iopoll_req_issued(req);
if (req->in_async)
mutex_unlock(&ctx->uring_lock);
} }
return 0; return 0;
...@@ -3044,8 +3043,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr) ...@@ -3044,8 +3043,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_put_req(req); io_put_req(req);
if (ret) { if (ret) {
if (req->flags & REQ_F_LINK) req_set_fail_links(req);
req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
io_put_req(req); io_put_req(req);
} }
...@@ -3064,7 +3062,12 @@ static void io_wq_submit_work(struct io_wq_work **workptr) ...@@ -3064,7 +3062,12 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
} }
} }
static bool io_op_needs_file(const struct io_uring_sqe *sqe) static bool io_req_op_valid(int op)
{
return op >= IORING_OP_NOP && op < IORING_OP_LAST;
}
static int io_op_needs_file(const struct io_uring_sqe *sqe)
{ {
int op = READ_ONCE(sqe->opcode); int op = READ_ONCE(sqe->opcode);
...@@ -3075,9 +3078,11 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe) ...@@ -3075,9 +3078,11 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe)
case IORING_OP_TIMEOUT_REMOVE: case IORING_OP_TIMEOUT_REMOVE:
case IORING_OP_ASYNC_CANCEL: case IORING_OP_ASYNC_CANCEL:
case IORING_OP_LINK_TIMEOUT: case IORING_OP_LINK_TIMEOUT:
return false; return 0;
default: default:
return true; if (io_req_op_valid(op))
return 1;
return -EINVAL;
} }
} }
...@@ -3094,7 +3099,7 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req) ...@@ -3094,7 +3099,7 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
unsigned flags; unsigned flags;
int fd; int fd, ret;
flags = READ_ONCE(req->sqe->flags); flags = READ_ONCE(req->sqe->flags);
fd = READ_ONCE(req->sqe->fd); fd = READ_ONCE(req->sqe->fd);
...@@ -3102,8 +3107,9 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req) ...@@ -3102,8 +3107,9 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req)
if (flags & IOSQE_IO_DRAIN) if (flags & IOSQE_IO_DRAIN)
req->flags |= REQ_F_IO_DRAIN; req->flags |= REQ_F_IO_DRAIN;
if (!io_op_needs_file(req->sqe)) ret = io_op_needs_file(req->sqe);
return 0; if (ret <= 0)
return ret;
if (flags & IOSQE_FIXED_FILE) { if (flags & IOSQE_FIXED_FILE) {
if (unlikely(!ctx->file_table || if (unlikely(!ctx->file_table ||
...@@ -3179,8 +3185,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) ...@@ -3179,8 +3185,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) { if (prev) {
if (prev->flags & REQ_F_LINK) req_set_fail_links(prev);
prev->flags |= REQ_F_FAIL_LINK;
io_async_find_and_cancel(ctx, req, prev->user_data, NULL, io_async_find_and_cancel(ctx, req, prev->user_data, NULL,
-ETIME); -ETIME);
io_put_req(prev); io_put_req(prev);
...@@ -3231,13 +3236,14 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) ...@@ -3231,13 +3236,14 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
static void __io_queue_sqe(struct io_kiocb *req) static void __io_queue_sqe(struct io_kiocb *req)
{ {
struct io_kiocb *linked_timeout = io_prep_linked_timeout(req); struct io_kiocb *linked_timeout;
struct io_kiocb *nxt = NULL; struct io_kiocb *nxt = NULL;
int ret; int ret;
again:
linked_timeout = io_prep_linked_timeout(req);
ret = io_issue_sqe(req, &nxt, true); ret = io_issue_sqe(req, &nxt, true);
if (nxt)
io_queue_async_work(nxt);
/* /*
* We async punt it if the file wasn't marked NOWAIT, or if the file * We async punt it if the file wasn't marked NOWAIT, or if the file
...@@ -3256,7 +3262,7 @@ static void __io_queue_sqe(struct io_kiocb *req) ...@@ -3256,7 +3262,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
* submit reference when the iocb is actually submitted. * submit reference when the iocb is actually submitted.
*/ */
io_queue_async_work(req); io_queue_async_work(req);
return; goto done_req;
} }
err: err:
...@@ -3273,10 +3279,15 @@ static void __io_queue_sqe(struct io_kiocb *req) ...@@ -3273,10 +3279,15 @@ static void __io_queue_sqe(struct io_kiocb *req)
/* and drop final reference, if we failed */ /* and drop final reference, if we failed */
if (ret) { if (ret) {
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
if (req->flags & REQ_F_LINK) req_set_fail_links(req);
req->flags |= REQ_F_FAIL_LINK;
io_put_req(req); io_put_req(req);
} }
done_req:
if (nxt) {
req = nxt;
nxt = NULL;
goto again;
}
} }
static void io_queue_sqe(struct io_kiocb *req) static void io_queue_sqe(struct io_kiocb *req)
...@@ -3293,8 +3304,7 @@ static void io_queue_sqe(struct io_kiocb *req) ...@@ -3293,8 +3304,7 @@ static void io_queue_sqe(struct io_kiocb *req)
if (ret) { if (ret) {
if (ret != -EIOCBQUEUED) { if (ret != -EIOCBQUEUED) {
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
if (req->flags & REQ_F_LINK) req_set_fail_links(req);
req->flags |= REQ_F_FAIL_LINK;
io_double_put_req(req); io_double_put_req(req);
} }
} else } else
...@@ -3310,8 +3320,8 @@ static inline void io_queue_link_head(struct io_kiocb *req) ...@@ -3310,8 +3320,8 @@ static inline void io_queue_link_head(struct io_kiocb *req)
io_queue_sqe(req); io_queue_sqe(req);
} }
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) IOSQE_IO_HARDLINK)
static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
struct io_kiocb **link) struct io_kiocb **link)
...@@ -3349,6 +3359,9 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, ...@@ -3349,6 +3359,9 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
if (req->sqe->flags & IOSQE_IO_DRAIN) if (req->sqe->flags & IOSQE_IO_DRAIN)
(*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN; (*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
if (req->sqe->flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK;
io = kmalloc(sizeof(*io), GFP_KERNEL); io = kmalloc(sizeof(*io), GFP_KERNEL);
if (!io) { if (!io) {
ret = -EAGAIN; ret = -EAGAIN;
...@@ -3358,13 +3371,16 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, ...@@ -3358,13 +3371,16 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
ret = io_req_defer_prep(req, io); ret = io_req_defer_prep(req, io);
if (ret) { if (ret) {
kfree(io); kfree(io);
/* fail even hard links since we don't submit */
prev->flags |= REQ_F_FAIL_LINK; prev->flags |= REQ_F_FAIL_LINK;
goto err_req; goto err_req;
} }
trace_io_uring_link(ctx, req, prev); trace_io_uring_link(ctx, req, prev);
list_add_tail(&req->link_list, &prev->link_list); list_add_tail(&req->link_list, &prev->link_list);
} else if (req->sqe->flags & IOSQE_IO_LINK) { } else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK; req->flags |= REQ_F_LINK;
if (req->sqe->flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK;
INIT_LIST_HEAD(&req->link_list); INIT_LIST_HEAD(&req->link_list);
*link = req; *link = req;
...@@ -3647,7 +3663,9 @@ static int io_sq_thread(void *data) ...@@ -3647,7 +3663,9 @@ static int io_sq_thread(void *data)
} }
to_submit = min(to_submit, ctx->sq_entries); to_submit = min(to_submit, ctx->sq_entries);
mutex_lock(&ctx->uring_lock);
ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true); ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
mutex_unlock(&ctx->uring_lock);
if (ret > 0) if (ret > 0)
inflight += ret; inflight += ret;
} }
......
...@@ -48,6 +48,7 @@ struct io_uring_sqe { ...@@ -48,6 +48,7 @@ struct io_uring_sqe {
#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
#define IOSQE_IO_LINK (1U << 2) /* links next sqe */ #define IOSQE_IO_LINK (1U << 2) /* links next sqe */
#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
/* /*
* io_uring_setup() flags * io_uring_setup() flags
...@@ -57,23 +58,28 @@ struct io_uring_sqe { ...@@ -57,23 +58,28 @@ struct io_uring_sqe {
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
#define IORING_OP_NOP 0 enum {
#define IORING_OP_READV 1 IORING_OP_NOP,
#define IORING_OP_WRITEV 2 IORING_OP_READV,
#define IORING_OP_FSYNC 3 IORING_OP_WRITEV,
#define IORING_OP_READ_FIXED 4 IORING_OP_FSYNC,
#define IORING_OP_WRITE_FIXED 5 IORING_OP_READ_FIXED,
#define IORING_OP_POLL_ADD 6 IORING_OP_WRITE_FIXED,
#define IORING_OP_POLL_REMOVE 7 IORING_OP_POLL_ADD,
#define IORING_OP_SYNC_FILE_RANGE 8 IORING_OP_POLL_REMOVE,
#define IORING_OP_SENDMSG 9 IORING_OP_SYNC_FILE_RANGE,
#define IORING_OP_RECVMSG 10 IORING_OP_SENDMSG,
#define IORING_OP_TIMEOUT 11 IORING_OP_RECVMSG,
#define IORING_OP_TIMEOUT_REMOVE 12 IORING_OP_TIMEOUT,
#define IORING_OP_ACCEPT 13 IORING_OP_TIMEOUT_REMOVE,
#define IORING_OP_ASYNC_CANCEL 14 IORING_OP_ACCEPT,
#define IORING_OP_LINK_TIMEOUT 15 IORING_OP_ASYNC_CANCEL,
#define IORING_OP_CONNECT 16 IORING_OP_LINK_TIMEOUT,
IORING_OP_CONNECT,
/* this goes last, obviously */
IORING_OP_LAST,
};
/* /*
* sqe->fsync_flags * sqe->fsync_flags
......
...@@ -957,7 +957,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -957,7 +957,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
.msg_iocb = iocb}; .msg_iocb = iocb};
ssize_t res; ssize_t res;
if (file->f_flags & O_NONBLOCK) if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT; msg.msg_flags = MSG_DONTWAIT;
if (iocb->ki_pos != 0) if (iocb->ki_pos != 0)
...@@ -982,7 +982,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -982,7 +982,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_pos != 0) if (iocb->ki_pos != 0)
return -ESPIPE; return -ESPIPE;
if (file->f_flags & O_NONBLOCK) if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT; msg.msg_flags = MSG_DONTWAIT;
if (sock->type == SOCK_SEQPACKET) if (sock->type == SOCK_SEQPACKET)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment