Commit a6a4b66b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20190516' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "A small set of fixes for io_uring.

  This contains:

   - smp_rmb() cleanup for io_cqring_events() (Jackie)

   - io_cqring_wait() simplification (Jackie)

   - removal of dead 'ev_flags' passing (me)

   - SQ poll CPU affinity verification fix (me)

   - SQ poll wait fix (Roman)

   - SQE command prep cleanup and fix (Stefan)"

* tag 'for-linus-20190516' of git://git.kernel.dk/linux-block:
  io_uring: use wait_event_interruptible for cq_wait conditional wait
  io_uring: adjust smp_rmb inside io_cqring_events
  io_uring: fix infinite wait in khread_park() on io_finish_async()
  io_uring: remove 'ev_flags' argument
  io_uring: fix failure to verify SQ_AFF cpu
  io_uring: fix race condition reading SQE data
parents 1718de78 fdb288a6
...@@ -231,7 +231,6 @@ struct io_ring_ctx { ...@@ -231,7 +231,6 @@ struct io_ring_ctx {
struct task_struct *sqo_thread; /* if using sq thread polling */ struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm; struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait; wait_queue_head_t sqo_wait;
unsigned sqo_stop;
struct { struct {
/* CQ ring */ /* CQ ring */
...@@ -329,9 +328,8 @@ struct io_kiocb { ...@@ -329,9 +328,8 @@ struct io_kiocb {
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_SEQ_PREV 8 /* sequential with previous */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */
#define REQ_F_PREPPED 16 /* prep already done */ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAIN 32 /* drain existing IO first */ #define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_IO_DRAINED 64 /* drain done */
u64 user_data; u64 user_data;
u32 error; /* iopoll result from callback */ u32 error; /* iopoll result from callback */
u32 sequence; u32 sequence;
...@@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) ...@@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
} }
static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
long res, unsigned ev_flags) long res)
{ {
struct io_uring_cqe *cqe; struct io_uring_cqe *cqe;
...@@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, ...@@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
if (cqe) { if (cqe) {
WRITE_ONCE(cqe->user_data, ki_user_data); WRITE_ONCE(cqe->user_data, ki_user_data);
WRITE_ONCE(cqe->res, res); WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, ev_flags); WRITE_ONCE(cqe->flags, 0);
} else { } else {
unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
...@@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) ...@@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
} }
static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
long res, unsigned ev_flags) long res)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags); spin_lock_irqsave(&ctx->completion_lock, flags);
io_cqring_fill_event(ctx, user_data, res, ev_flags); io_cqring_fill_event(ctx, user_data, res);
io_commit_cqring(ctx); io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags); spin_unlock_irqrestore(&ctx->completion_lock, flags);
...@@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, list); req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list); list_del(&req->list);
io_cqring_fill_event(ctx, req->user_data, req->error, 0); io_cqring_fill_event(ctx, req->user_data, req->error);
(*nr_events)++; (*nr_events)++;
if (refcount_dec_and_test(&req->refs)) { if (refcount_dec_and_test(&req->refs)) {
...@@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) ...@@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
kiocb_end_write(kiocb); kiocb_end_write(kiocb);
io_cqring_add_event(req->ctx, req->user_data, res, 0); io_cqring_add_event(req->ctx, req->user_data, res);
io_put_req(req); io_put_req(req);
} }
...@@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
if (!req->file) if (!req->file)
return -EBADF; return -EBADF;
/* For -EAGAIN retry, everything is already prepped */
if (req->flags & REQ_F_PREPPED)
return 0;
if (force_nonblock && !io_file_supports_async(req->file)) if (force_nonblock && !io_file_supports_async(req->file))
force_nonblock = false; force_nonblock = false;
...@@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
return -EINVAL; return -EINVAL;
kiocb->ki_complete = io_complete_rw; kiocb->ki_complete = io_complete_rw;
} }
req->flags |= REQ_F_PREPPED;
return 0; return 0;
} }
...@@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data) ...@@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
io_cqring_add_event(ctx, user_data, err, 0); io_cqring_add_event(ctx, user_data, err);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
...@@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!req->file) if (!req->file)
return -EBADF; return -EBADF;
/* Prep already done (EAGAIN retry) */
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL; return -EINVAL;
req->flags |= REQ_F_PREPPED;
return 0; return 0;
} }
...@@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX, end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC); fsync_flags & IORING_FSYNC_DATASYNC);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
...@@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!req->file) if (!req->file)
return -EBADF; return -EBADF;
/* Prep already done (EAGAIN retry) */
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL; return -EINVAL;
req->flags |= REQ_F_PREPPED;
return ret; return ret;
} }
...@@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req, ...@@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req,
ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
...@@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} }
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req); io_put_req(req);
return 0; return 0;
} }
...@@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
__poll_t mask) __poll_t mask)
{ {
req->poll.done = true; req->poll.done = true;
io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask));
io_commit_cqring(ctx); io_commit_cqring(ctx);
} }
...@@ -1700,7 +1686,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) ...@@ -1700,7 +1686,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
io_put_req(req); io_put_req(req);
if (ret) { if (ret) {
io_cqring_add_event(ctx, sqe->user_data, ret, 0); io_cqring_add_event(ctx, sqe->user_data, ret);
io_put_req(req); io_put_req(req);
} }
...@@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, ...@@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
continue; continue;
} }
io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0); io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret);
} }
if (statep) if (statep)
...@@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data) ...@@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data)
set_fs(USER_DS); set_fs(USER_DS);
timeout = inflight = 0; timeout = inflight = 0;
while (!kthread_should_stop() && !ctx->sqo_stop) { while (!kthread_should_park()) {
bool all_fixed, mm_fault = false; bool all_fixed, mm_fault = false;
int i; int i;
...@@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data) ...@@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data)
smp_mb(); smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) { if (!io_get_sqring(ctx, &sqes[0])) {
if (kthread_should_stop()) { if (kthread_should_park()) {
finish_wait(&ctx->sqo_wait, &wait); finish_wait(&ctx->sqo_wait, &wait);
break; break;
} }
...@@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data) ...@@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data)
mmput(cur_mm); mmput(cur_mm);
} }
if (kthread_should_park()) kthread_parkme();
kthread_parkme();
return 0; return 0;
} }
...@@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) ...@@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
ret = io_submit_sqe(ctx, &s, statep); ret = io_submit_sqe(ctx, &s, statep);
if (ret) if (ret)
io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); io_cqring_add_event(ctx, s.sqe->user_data, ret);
} }
io_commit_sqring(ctx); io_commit_sqring(ctx);
...@@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) ...@@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
static unsigned io_cqring_events(struct io_cq_ring *ring) static unsigned io_cqring_events(struct io_cq_ring *ring)
{ {
/* See comment at the top of this file */
smp_rmb();
return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
} }
...@@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
{ {
struct io_cq_ring *ring = ctx->cq_ring; struct io_cq_ring *ring = ctx->cq_ring;
sigset_t ksigmask, sigsaved; sigset_t ksigmask, sigsaved;
DEFINE_WAIT(wait);
int ret; int ret;
/* See comment at the top of this file */
smp_rmb();
if (io_cqring_events(ring) >= min_events) if (io_cqring_events(ring) >= min_events)
return 0; return 0;
...@@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret; return ret;
} }
do { ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE); if (ret == -ERESTARTSYS)
ret = 0;
/* See comment at the top of this file */
smp_rmb();
if (io_cqring_events(ring) >= min_events)
break;
schedule();
ret = -EINTR; ret = -EINTR;
if (signal_pending(current))
break;
} while (1);
finish_wait(&ctx->wait, &wait);
if (sig) if (sig)
restore_user_sigmask(sig, &sigsaved); restore_user_sigmask(sig, &sigsaved);
...@@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) ...@@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_sq_thread_stop(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{ {
if (ctx->sqo_thread) { if (ctx->sqo_thread) {
ctx->sqo_stop = 1; /*
mb(); * The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity
* set to a single CPU.
*/
kthread_park(ctx->sqo_thread); kthread_park(ctx->sqo_thread);
kthread_stop(ctx->sqo_thread); kthread_stop(ctx->sqo_thread);
ctx->sqo_thread = NULL; ctx->sqo_thread = NULL;
...@@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ...@@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
ctx->sq_thread_idle = HZ; ctx->sq_thread_idle = HZ;
if (p->flags & IORING_SETUP_SQ_AFF) { if (p->flags & IORING_SETUP_SQ_AFF) {
int cpu = array_index_nospec(p->sq_thread_cpu, int cpu = p->sq_thread_cpu;
nr_cpu_ids);
ret = -EINVAL; ret = -EINVAL;
if (cpu >= nr_cpu_ids)
goto err;
if (!cpu_online(cpu)) if (!cpu_online(cpu))
goto err; goto err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment