Commit 975a0f40 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20190428' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A set of io_uring fixes that should go into this release. In
  particular, this contains:

   - The mutex lock vs ctx ref count fix (me)

   - Removal of a dead variable (me)

   - Two race fixes (Stefan)

   - Ring head/tail condition fix for poll full SQ detection (Stefan)"

* tag 'for-linus-20190428' of git://git.kernel.dk/linux-block:
  io_uring: remove 'state' argument from io_{read,write} path
  io_uring: fix poll full SQ detection
  io_uring: fix race condition when sq threads goes sleeping
  io_uring: fix race condition reading SQ entries
  io_uring: fail io_uring_register(2) on a dying io_uring instance
parents 14f974d7 8358e3a8
...@@ -740,7 +740,7 @@ static bool io_file_supports_async(struct file *file) ...@@ -740,7 +740,7 @@ static bool io_file_supports_async(struct file *file)
} }
static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock, struct io_submit_state *state) bool force_nonblock)
{ {
const struct io_uring_sqe *sqe = s->sqe; const struct io_uring_sqe *sqe = s->sqe;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
...@@ -938,7 +938,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) ...@@ -938,7 +938,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
} }
static int io_read(struct io_kiocb *req, const struct sqe_submit *s, static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock, struct io_submit_state *state) bool force_nonblock)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw;
...@@ -947,7 +947,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -947,7 +947,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count; size_t iov_count;
int ret; int ret;
ret = io_prep_rw(req, s, force_nonblock, state); ret = io_prep_rw(req, s, force_nonblock);
if (ret) if (ret)
return ret; return ret;
file = kiocb->ki_filp; file = kiocb->ki_filp;
...@@ -985,7 +985,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -985,7 +985,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
} }
static int io_write(struct io_kiocb *req, const struct sqe_submit *s, static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock, struct io_submit_state *state) bool force_nonblock)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw;
...@@ -994,7 +994,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -994,7 +994,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count; size_t iov_count;
int ret; int ret;
ret = io_prep_rw(req, s, force_nonblock, state); ret = io_prep_rw(req, s, force_nonblock);
if (ret) if (ret)
return ret; return ret;
...@@ -1336,8 +1336,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -1336,8 +1336,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} }
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct sqe_submit *s, bool force_nonblock, const struct sqe_submit *s, bool force_nonblock)
struct io_submit_state *state)
{ {
int ret, opcode; int ret, opcode;
...@@ -1353,18 +1352,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -1353,18 +1352,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_READV: case IORING_OP_READV:
if (unlikely(s->sqe->buf_index)) if (unlikely(s->sqe->buf_index))
return -EINVAL; return -EINVAL;
ret = io_read(req, s, force_nonblock, state); ret = io_read(req, s, force_nonblock);
break; break;
case IORING_OP_WRITEV: case IORING_OP_WRITEV:
if (unlikely(s->sqe->buf_index)) if (unlikely(s->sqe->buf_index))
return -EINVAL; return -EINVAL;
ret = io_write(req, s, force_nonblock, state); ret = io_write(req, s, force_nonblock);
break; break;
case IORING_OP_READ_FIXED: case IORING_OP_READ_FIXED:
ret = io_read(req, s, force_nonblock, state); ret = io_read(req, s, force_nonblock);
break; break;
case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE_FIXED:
ret = io_write(req, s, force_nonblock, state); ret = io_write(req, s, force_nonblock);
break; break;
case IORING_OP_FSYNC: case IORING_OP_FSYNC:
ret = io_fsync(req, s->sqe, force_nonblock); ret = io_fsync(req, s->sqe, force_nonblock);
...@@ -1457,7 +1456,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) ...@@ -1457,7 +1456,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
s->has_user = cur_mm != NULL; s->has_user = cur_mm != NULL;
s->needs_lock = true; s->needs_lock = true;
do { do {
ret = __io_submit_sqe(ctx, req, s, false, NULL); ret = __io_submit_sqe(ctx, req, s, false);
/* /*
* We can get EAGAIN for polled IO even though * We can get EAGAIN for polled IO even though
* we're forcing a sync submission from here, * we're forcing a sync submission from here,
...@@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, ...@@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
ret = __io_submit_sqe(ctx, req, s, true, state); ret = __io_submit_sqe(ctx, req, s, true);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
struct io_uring_sqe *sqe_copy; struct io_uring_sqe *sqe_copy;
...@@ -1739,7 +1738,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) ...@@ -1739,7 +1738,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
head = ctx->cached_sq_head; head = ctx->cached_sq_head;
/* See comment at the top of this file */ /* See comment at the top of this file */
smp_rmb(); smp_rmb();
if (head == READ_ONCE(ring->r.tail)) /* make sure SQ entry isn't read before tail */
if (head == smp_load_acquire(&ring->r.tail))
return false; return false;
head = READ_ONCE(ring->array[head & ctx->sq_mask]); head = READ_ONCE(ring->array[head & ctx->sq_mask]);
...@@ -1864,7 +1864,8 @@ static int io_sq_thread(void *data) ...@@ -1864,7 +1864,8 @@ static int io_sq_thread(void *data)
/* Tell userspace we may need a wakeup call */ /* Tell userspace we may need a wakeup call */
ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP; ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
smp_wmb(); /* make sure to read SQ tail after writing flags */
smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) { if (!io_get_sqring(ctx, &sqes[0])) {
if (kthread_should_stop()) { if (kthread_should_stop()) {
...@@ -2574,7 +2575,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) ...@@ -2574,7 +2575,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
poll_wait(file, &ctx->cq_wait, wait); poll_wait(file, &ctx->cq_wait, wait);
/* See comment at the top of this file */ /* See comment at the top of this file */
smp_rmb(); smp_rmb();
if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head) if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
ctx->sq_ring->ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM; mask |= EPOLLOUT | EPOLLWRNORM;
if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail) if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
...@@ -2934,6 +2936,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ...@@ -2934,6 +2936,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
{ {
int ret; int ret;
/*
* We're inside the ring mutex, if the ref is already dying, then
* someone else killed the ctx or is already going through
* io_uring_register().
*/
if (percpu_ref_is_dying(&ctx->refs))
return -ENXIO;
percpu_ref_kill(&ctx->refs); percpu_ref_kill(&ctx->refs);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment