Commit 534121d2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.5-20191226' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - Removal of now unused busy wqe list (Hillf)

 - Add cond_resched() to io-wq work processing (Hillf)

 - And then the series that I hinted at from last week, which removes
   the sqe from the io_kiocb and keeps all sqe handling on the prep
   side. This guarantees that an opcode can't do the wrong thing and
   read the sqe more than once. This is unchanged from last week, no
   issues have been observed with this in testing. Hence I really think
   we should fold this into 5.5.

* tag 'io_uring-5.5-20191226' of git://git.kernel.dk/linux-block:
  io-wq: add cond_resched() to worker thread
  io-wq: remove unused busy list from io_sqe
  io_uring: pass in 'sqe' to the prep handlers
  io_uring: standardize the prep methods
  io_uring: read 'count' for IORING_OP_TIMEOUT in prep handler
  io_uring: move all prep state for IORING_OP_{SEND,RECV}_MGS to prep handler
  io_uring: move all prep state for IORING_OP_CONNECT to prep handler
  io_uring: add and use struct io_rw for read/writes
  io_uring: use u64_to_user_ptr() consistently
parents 0f710a55 fd1c4bc6
...@@ -92,7 +92,6 @@ struct io_wqe { ...@@ -92,7 +92,6 @@ struct io_wqe {
struct io_wqe_acct acct[2]; struct io_wqe_acct acct[2];
struct hlist_nulls_head free_list; struct hlist_nulls_head free_list;
struct hlist_nulls_head busy_list;
struct list_head all_list; struct list_head all_list;
struct io_wq *wq; struct io_wq *wq;
...@@ -327,7 +326,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, ...@@ -327,7 +326,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
if (worker->flags & IO_WORKER_F_FREE) { if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE; worker->flags &= ~IO_WORKER_F_FREE;
hlist_nulls_del_init_rcu(&worker->nulls_node); hlist_nulls_del_init_rcu(&worker->nulls_node);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->busy_list);
} }
/* /*
...@@ -365,7 +363,6 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) ...@@ -365,7 +363,6 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
{ {
if (!(worker->flags & IO_WORKER_F_FREE)) { if (!(worker->flags & IO_WORKER_F_FREE)) {
worker->flags |= IO_WORKER_F_FREE; worker->flags |= IO_WORKER_F_FREE;
hlist_nulls_del_init_rcu(&worker->nulls_node);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
} }
...@@ -432,6 +429,8 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -432,6 +429,8 @@ static void io_worker_handle_work(struct io_worker *worker)
if (signal_pending(current)) if (signal_pending(current))
flush_signals(current); flush_signals(current);
cond_resched();
spin_lock_irq(&worker->lock); spin_lock_irq(&worker->lock);
worker->cur_work = work; worker->cur_work = work;
spin_unlock_irq(&worker->lock); spin_unlock_irq(&worker->lock);
...@@ -798,10 +797,6 @@ void io_wq_cancel_all(struct io_wq *wq) ...@@ -798,10 +797,6 @@ void io_wq_cancel_all(struct io_wq *wq)
set_bit(IO_WQ_BIT_CANCEL, &wq->state); set_bit(IO_WQ_BIT_CANCEL, &wq->state);
/*
* Browse both lists, as there's a gap between handing work off
* to a worker and the worker putting itself on the busy_list
*/
rcu_read_lock(); rcu_read_lock();
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node]; struct io_wqe *wqe = wq->wqes[node];
...@@ -1049,7 +1044,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1049,7 +1044,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
spin_lock_init(&wqe->lock); spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list); INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1);
INIT_LIST_HEAD(&wqe->all_list); INIT_LIST_HEAD(&wqe->all_list);
} }
......
...@@ -330,6 +330,26 @@ struct io_timeout { ...@@ -330,6 +330,26 @@ struct io_timeout {
struct file *file; struct file *file;
u64 addr; u64 addr;
int flags; int flags;
unsigned count;
};
struct io_rw {
/* NOTE: kiocb has the file as the first member, so don't do it here */
struct kiocb kiocb;
u64 addr;
u64 len;
};
struct io_connect {
struct file *file;
struct sockaddr __user *addr;
int addr_len;
};
struct io_sr_msg {
struct file *file;
struct user_msghdr __user *msg;
int msg_flags;
}; };
struct io_async_connect { struct io_async_connect {
...@@ -351,7 +371,6 @@ struct io_async_rw { ...@@ -351,7 +371,6 @@ struct io_async_rw {
}; };
struct io_async_ctx { struct io_async_ctx {
struct io_uring_sqe sqe;
union { union {
struct io_async_rw rw; struct io_async_rw rw;
struct io_async_msghdr msg; struct io_async_msghdr msg;
...@@ -369,15 +388,16 @@ struct io_async_ctx { ...@@ -369,15 +388,16 @@ struct io_async_ctx {
struct io_kiocb { struct io_kiocb {
union { union {
struct file *file; struct file *file;
struct kiocb rw; struct io_rw rw;
struct io_poll_iocb poll; struct io_poll_iocb poll;
struct io_accept accept; struct io_accept accept;
struct io_sync sync; struct io_sync sync;
struct io_cancel cancel; struct io_cancel cancel;
struct io_timeout timeout; struct io_timeout timeout;
struct io_connect connect;
struct io_sr_msg sr_msg;
}; };
const struct io_uring_sqe *sqe;
struct io_async_ctx *io; struct io_async_ctx *io;
struct file *ring_file; struct file *ring_file;
int ring_fd; int ring_fd;
...@@ -411,7 +431,6 @@ struct io_kiocb { ...@@ -411,7 +431,6 @@ struct io_kiocb {
#define REQ_F_INFLIGHT 16384 /* on inflight list */ #define REQ_F_INFLIGHT 16384 /* on inflight list */
#define REQ_F_COMP_LOCKED 32768 /* completion under lock */ #define REQ_F_COMP_LOCKED 32768 /* completion under lock */
#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */ #define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
#define REQ_F_PREPPED 131072 /* request already opcode prepared */
u64 user_data; u64 user_data;
u32 result; u32 result;
u32 sequence; u32 sequence;
...@@ -609,33 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req, ...@@ -609,33 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
{ {
bool do_hashed = false; bool do_hashed = false;
if (req->sqe) { switch (req->opcode) {
switch (req->opcode) { case IORING_OP_WRITEV:
case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED:
case IORING_OP_WRITE_FIXED: /* only regular files should be hashed for writes */
/* only regular files should be hashed for writes */ if (req->flags & REQ_F_ISREG)
if (req->flags & REQ_F_ISREG) do_hashed = true;
do_hashed = true; /* fall-through */
/* fall-through */ case IORING_OP_READV:
case IORING_OP_READV: case IORING_OP_READ_FIXED:
case IORING_OP_READ_FIXED: case IORING_OP_SENDMSG:
case IORING_OP_SENDMSG: case IORING_OP_RECVMSG:
case IORING_OP_RECVMSG: case IORING_OP_ACCEPT:
case IORING_OP_ACCEPT: case IORING_OP_POLL_ADD:
case IORING_OP_POLL_ADD: case IORING_OP_CONNECT:
case IORING_OP_CONNECT: /*
/* * We know REQ_F_ISREG is not set on some of these
* We know REQ_F_ISREG is not set on some of these * opcodes, but this enables us to keep the check in
* opcodes, but this enables us to keep the check in * just one place.
* just one place. */
*/ if (!(req->flags & REQ_F_ISREG))
if (!(req->flags & REQ_F_ISREG)) req->work.flags |= IO_WQ_WORK_UNBOUND;
req->work.flags |= IO_WQ_WORK_UNBOUND; break;
break;
}
if (io_req_needs_user(req))
req->work.flags |= IO_WQ_WORK_NEEDS_USER;
} }
if (io_req_needs_user(req))
req->work.flags |= IO_WQ_WORK_NEEDS_USER;
*link = io_prep_linked_timeout(req); *link = io_prep_linked_timeout(req);
return do_hashed; return do_hashed;
...@@ -1180,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -1180,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
ret = 0; ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) { list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
/* /*
* Move completed entries to our local list. If we find a * Move completed entries to our local list. If we find a
...@@ -1335,7 +1352,7 @@ static inline void req_set_fail_links(struct io_kiocb *req) ...@@ -1335,7 +1352,7 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res) static void io_complete_rw_common(struct kiocb *kiocb, long res)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); kiocb_end_write(req);
...@@ -1347,7 +1364,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) ...@@ -1347,7 +1364,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
static void io_complete_rw(struct kiocb *kiocb, long res, long res2) static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
io_complete_rw_common(kiocb, res); io_complete_rw_common(kiocb, res);
io_put_req(req); io_put_req(req);
...@@ -1355,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) ...@@ -1355,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_kiocb *nxt = NULL; struct io_kiocb *nxt = NULL;
io_complete_rw_common(kiocb, res); io_complete_rw_common(kiocb, res);
...@@ -1366,7 +1383,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) ...@@ -1366,7 +1383,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{ {
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); kiocb_end_write(req);
...@@ -1400,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -1400,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_req = list_first_entry(&ctx->poll_list, struct io_kiocb, list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
list); list);
if (list_req->rw.ki_filp != req->rw.ki_filp) if (list_req->file != req->file)
ctx->poll_multi_file = true; ctx->poll_multi_file = true;
} }
...@@ -1471,11 +1488,11 @@ static bool io_file_supports_async(struct file *file) ...@@ -1471,11 +1488,11 @@ static bool io_file_supports_async(struct file *file)
return false; return false;
} }
static int io_prep_rw(struct io_kiocb *req, bool force_nonblock) static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock)
{ {
const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio; unsigned ioprio;
int ret; int ret;
...@@ -1524,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock) ...@@ -1524,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
return -EINVAL; return -EINVAL;
kiocb->ki_complete = io_complete_rw; kiocb->ki_complete = io_complete_rw;
} }
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
/* we own ->private, reuse it for the buffer index */
req->rw.kiocb.private = (void *) (unsigned long)
READ_ONCE(sqe->buf_index);
return 0; return 0;
} }
...@@ -1557,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt, ...@@ -1557,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
io_rw_done(kiocb, ret); io_rw_done(kiocb, ret);
} }
static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
const struct io_uring_sqe *sqe,
struct iov_iter *iter) struct iov_iter *iter)
{ {
size_t len = READ_ONCE(sqe->len); struct io_ring_ctx *ctx = req->ctx;
size_t len = req->rw.len;
struct io_mapped_ubuf *imu; struct io_mapped_ubuf *imu;
unsigned index, buf_index; unsigned index, buf_index;
size_t offset; size_t offset;
...@@ -1571,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, ...@@ -1571,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
if (unlikely(!ctx->user_bufs)) if (unlikely(!ctx->user_bufs))
return -EFAULT; return -EFAULT;
buf_index = READ_ONCE(sqe->buf_index); buf_index = (unsigned long) req->rw.kiocb.private;
if (unlikely(buf_index >= ctx->nr_user_bufs)) if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT; return -EFAULT;
index = array_index_nospec(buf_index, ctx->nr_user_bufs); index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = &ctx->user_bufs[index]; imu = &ctx->user_bufs[index];
buf_addr = READ_ONCE(sqe->addr); buf_addr = req->rw.addr;
/* overflow */ /* overflow */
if (buf_addr + len < buf_addr) if (buf_addr + len < buf_addr)
...@@ -1634,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, ...@@ -1634,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
static ssize_t io_import_iovec(int rw, struct io_kiocb *req, static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter) struct iovec **iovec, struct iov_iter *iter)
{ {
const struct io_uring_sqe *sqe = req->sqe; void __user *buf = u64_to_user_ptr(req->rw.addr);
void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); size_t sqe_len = req->rw.len;
size_t sqe_len = READ_ONCE(sqe->len);
u8 opcode; u8 opcode;
/*
* We're reading ->opcode for the second time, but the first read
* doesn't care whether it's _FIXED or not, so it doesn't matter
* whether ->opcode changes concurrently. The first read does care
* about whether it is a READ or a WRITE, so we don't trust this read
* for that purpose and instead let the caller pass in the read/write
* flag.
*/
opcode = req->opcode; opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL; *iovec = NULL;
return io_import_fixed(req->ctx, rw, sqe, iter); return io_import_fixed(req, rw, iter);
} }
/* buffer index only valid with fixed read/write */
if (req->rw.kiocb.private)
return -EINVAL;
if (req->io) { if (req->io) {
struct io_async_rw *iorw = &req->io->rw; struct io_async_rw *iorw = &req->io->rw;
...@@ -1750,13 +1768,7 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size, ...@@ -1750,13 +1768,7 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
static int io_alloc_async_ctx(struct io_kiocb *req) static int io_alloc_async_ctx(struct io_kiocb *req)
{ {
req->io = kmalloc(sizeof(*req->io), GFP_KERNEL); req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
if (req->io) { return req->io == NULL;
memcpy(&req->io->sqe, req->sqe, sizeof(req->io->sqe));
req->sqe = &req->io->sqe;
return 0;
}
return 1;
} }
static void io_rw_async(struct io_wq_work **workptr) static void io_rw_async(struct io_wq_work **workptr)
...@@ -1782,46 +1794,52 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, ...@@ -1782,46 +1794,52 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
return 0; return 0;
} }
static int io_read_prep(struct io_kiocb *req, struct iovec **iovec, static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct iov_iter *iter, bool force_nonblock) bool force_nonblock)
{ {
struct io_async_ctx *io;
struct iov_iter iter;
ssize_t ret; ssize_t ret;
ret = io_prep_rw(req, force_nonblock); ret = io_prep_rw(req, sqe, force_nonblock);
if (ret) if (ret)
return ret; return ret;
if (unlikely(!(req->file->f_mode & FMODE_READ))) if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF; return -EBADF;
return io_import_iovec(READ, req, iovec, iter); if (!req->io)
return 0;
io = req->io;
io->rw.iov = io->rw.fast_iov;
req->io = NULL;
ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
req->io = io;
if (ret < 0)
return ret;
io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
return 0;
} }
static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter; struct iov_iter iter;
struct file *file;
size_t iov_count; size_t iov_count;
ssize_t io_size, ret; ssize_t io_size, ret;
if (!req->io) { ret = io_import_iovec(READ, req, &iovec, &iter);
ret = io_read_prep(req, &iovec, &iter, force_nonblock); if (ret < 0)
if (ret < 0) return ret;
return ret;
} else {
ret = io_import_iovec(READ, req, &iovec, &iter);
if (ret < 0)
return ret;
}
/* Ensure we clear previously set non-block flag */ /* Ensure we clear previously set non-block flag */
if (!force_nonblock) if (!force_nonblock)
req->rw.ki_flags &= ~IOCB_NOWAIT; req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
file = req->file;
io_size = ret; io_size = ret;
if (req->flags & REQ_F_LINK) if (req->flags & REQ_F_LINK)
req->result = io_size; req->result = io_size;
...@@ -1830,20 +1848,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1830,20 +1848,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK * we know to async punt it even if it was opened O_NONBLOCK
*/ */
if (force_nonblock && !io_file_supports_async(file)) { if (force_nonblock && !io_file_supports_async(req->file)) {
req->flags |= REQ_F_MUST_PUNT; req->flags |= REQ_F_MUST_PUNT;
goto copy_iov; goto copy_iov;
} }
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (!ret) { if (!ret) {
ssize_t ret2; ssize_t ret2;
if (file->f_op->read_iter) if (req->file->f_op->read_iter)
ret2 = call_read_iter(file, kiocb, &iter); ret2 = call_read_iter(req->file, kiocb, &iter);
else else
ret2 = loop_rw_iter(READ, file, kiocb, &iter); ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
/* /*
* In case of a short read, punt to async. This can happen * In case of a short read, punt to async. This can happen
...@@ -1875,46 +1893,52 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1875,46 +1893,52 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
return ret; return ret;
} }
static int io_write_prep(struct io_kiocb *req, struct iovec **iovec, static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct iov_iter *iter, bool force_nonblock) bool force_nonblock)
{ {
struct io_async_ctx *io;
struct iov_iter iter;
ssize_t ret; ssize_t ret;
ret = io_prep_rw(req, force_nonblock); ret = io_prep_rw(req, sqe, force_nonblock);
if (ret) if (ret)
return ret; return ret;
if (unlikely(!(req->file->f_mode & FMODE_WRITE))) if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF; return -EBADF;
return io_import_iovec(WRITE, req, iovec, iter); if (!req->io)
return 0;
io = req->io;
io->rw.iov = io->rw.fast_iov;
req->io = NULL;
ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
req->io = io;
if (ret < 0)
return ret;
io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
return 0;
} }
static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw; struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter; struct iov_iter iter;
struct file *file;
size_t iov_count; size_t iov_count;
ssize_t ret, io_size; ssize_t ret, io_size;
if (!req->io) { ret = io_import_iovec(WRITE, req, &iovec, &iter);
ret = io_write_prep(req, &iovec, &iter, force_nonblock); if (ret < 0)
if (ret < 0) return ret;
return ret;
} else {
ret = io_import_iovec(WRITE, req, &iovec, &iter);
if (ret < 0)
return ret;
}
/* Ensure we clear previously set non-block flag */ /* Ensure we clear previously set non-block flag */
if (!force_nonblock) if (!force_nonblock)
req->rw.ki_flags &= ~IOCB_NOWAIT; req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
file = kiocb->ki_filp;
io_size = ret; io_size = ret;
if (req->flags & REQ_F_LINK) if (req->flags & REQ_F_LINK)
req->result = io_size; req->result = io_size;
...@@ -1934,7 +1958,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1934,7 +1958,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov; goto copy_iov;
iov_count = iov_iter_count(&iter); iov_count = iov_iter_count(&iter);
ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (!ret) { if (!ret) {
ssize_t ret2; ssize_t ret2;
...@@ -1946,17 +1970,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -1946,17 +1970,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* we return to userspace. * we return to userspace.
*/ */
if (req->flags & REQ_F_ISREG) { if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(file)->i_sb, __sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true); SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb, __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE); SB_FREEZE_WRITE);
} }
kiocb->ki_flags |= IOCB_WRITE; kiocb->ki_flags |= IOCB_WRITE;
if (file->f_op->write_iter) if (req->file->f_op->write_iter)
ret2 = call_write_iter(file, kiocb, &iter); ret2 = call_write_iter(req->file, kiocb, &iter);
else else
ret2 = loop_rw_iter(WRITE, file, kiocb, &iter); ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) { if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async); kiocb_done(kiocb, ret2, nxt, req->in_async);
} else { } else {
...@@ -1989,13 +2013,10 @@ static int io_nop(struct io_kiocb *req) ...@@ -1989,13 +2013,10 @@ static int io_nop(struct io_kiocb *req)
return 0; return 0;
} }
static int io_prep_fsync(struct io_kiocb *req) static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
if (req->flags & REQ_F_PREPPED)
return 0;
if (!req->file) if (!req->file)
return -EBADF; return -EBADF;
...@@ -2010,7 +2031,6 @@ static int io_prep_fsync(struct io_kiocb *req) ...@@ -2010,7 +2031,6 @@ static int io_prep_fsync(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off); req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len); req->sync.len = READ_ONCE(sqe->len);
req->flags |= REQ_F_PREPPED;
return 0; return 0;
} }
...@@ -2036,7 +2056,7 @@ static void io_fsync_finish(struct io_wq_work **workptr) ...@@ -2036,7 +2056,7 @@ static void io_fsync_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req)) if (io_req_cancelled(req))
return; return;
ret = vfs_fsync_range(req->rw.ki_filp, req->sync.off, ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX, end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC); req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0) if (ret < 0)
...@@ -2051,11 +2071,6 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2051,11 +2071,6 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
struct io_wq_work *work, *old_work; struct io_wq_work *work, *old_work;
int ret;
ret = io_prep_fsync(req);
if (ret)
return ret;
/* fsync always requires a blocking context */ /* fsync always requires a blocking context */
if (force_nonblock) { if (force_nonblock) {
...@@ -2071,13 +2086,10 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2071,13 +2086,10 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
return 0; return 0;
} }
static int io_prep_sfr(struct io_kiocb *req) static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
if (req->flags & REQ_F_PREPPED)
return 0;
if (!req->file) if (!req->file)
return -EBADF; return -EBADF;
...@@ -2089,7 +2101,6 @@ static int io_prep_sfr(struct io_kiocb *req) ...@@ -2089,7 +2101,6 @@ static int io_prep_sfr(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off); req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len); req->sync.len = READ_ONCE(sqe->len);
req->sync.flags = READ_ONCE(sqe->sync_range_flags); req->sync.flags = READ_ONCE(sqe->sync_range_flags);
req->flags |= REQ_F_PREPPED;
return 0; return 0;
} }
...@@ -2102,7 +2113,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr) ...@@ -2102,7 +2113,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req)) if (io_req_cancelled(req))
return; return;
ret = sync_file_range(req->rw.ki_filp, req->sync.off, req->sync.len, ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags); req->sync.flags);
if (ret < 0) if (ret < 0)
req_set_fail_links(req); req_set_fail_links(req);
...@@ -2116,11 +2127,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2116,11 +2127,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
struct io_wq_work *work, *old_work; struct io_wq_work *work, *old_work;
int ret;
ret = io_prep_sfr(req);
if (ret)
return ret;
/* sync_file_range always requires a blocking context */ /* sync_file_range always requires a blocking context */
if (force_nonblock) { if (force_nonblock) {
...@@ -2149,19 +2155,23 @@ static void io_sendrecv_async(struct io_wq_work **workptr) ...@@ -2149,19 +2155,23 @@ static void io_sendrecv_async(struct io_wq_work **workptr)
} }
#endif #endif
static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io) static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe; struct io_sr_msg *sr = &req->sr_msg;
struct user_msghdr __user *msg; struct io_async_ctx *io = req->io;
unsigned flags;
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
if (!io)
return 0;
flags = READ_ONCE(sqe->msg_flags);
msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov; io->msg.iov = io->msg.fast_iov;
return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov); return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
&io->msg.iov);
#else #else
return 0; return -EOPNOTSUPP;
#endif #endif
} }
...@@ -2169,7 +2179,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2169,7 +2179,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL; struct io_async_msghdr *kmsg = NULL;
struct socket *sock; struct socket *sock;
int ret; int ret;
...@@ -2183,12 +2192,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2183,12 +2192,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
struct sockaddr_storage addr; struct sockaddr_storage addr;
unsigned flags; unsigned flags;
flags = READ_ONCE(sqe->msg_flags);
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
if (req->io) { if (req->io) {
kmsg = &req->io->msg; kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr; kmsg->msg.msg_name = &addr;
...@@ -2197,13 +2200,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2197,13 +2200,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov; kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov; kmsg->msg.msg_iter.iov = kmsg->iov;
} else { } else {
struct io_sr_msg *sr = &req->sr_msg;
kmsg = &io.msg; kmsg = &io.msg;
kmsg->msg.msg_name = &addr; kmsg->msg.msg_name = &addr;
ret = io_sendmsg_prep(req, &io);
io.msg.iov = io.msg.fast_iov;
ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
sr->msg_flags, &io.msg.iov);
if (ret) if (ret)
goto out; return ret;
} }
flags = req->sr_msg.msg_flags;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (force_nonblock && ret == -EAGAIN) { if (force_nonblock && ret == -EAGAIN) {
if (req->io) if (req->io)
...@@ -2218,7 +2232,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2218,7 +2232,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR; ret = -EINTR;
} }
out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov) if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov); kfree(kmsg->iov);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
...@@ -2231,20 +2244,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2231,20 +2244,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
#endif #endif
} }
static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io) static int io_recvmsg_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe; struct io_sr_msg *sr = &req->sr_msg;
struct user_msghdr __user *msg; struct io_async_ctx *io = req->io;
unsigned flags;
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
if (!io)
return 0;
flags = READ_ONCE(sqe->msg_flags);
msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov; io->msg.iov = io->msg.fast_iov;
return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr, return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
&io->msg.iov); &io->msg.uaddr, &io->msg.iov);
#else #else
return 0; return -EOPNOTSUPP;
#endif #endif
} }
...@@ -2252,7 +2269,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2252,7 +2269,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL; struct io_async_msghdr *kmsg = NULL;
struct socket *sock; struct socket *sock;
int ret; int ret;
...@@ -2262,19 +2278,10 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2262,19 +2278,10 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret); sock = sock_from_file(req->file, &ret);
if (sock) { if (sock) {
struct user_msghdr __user *msg;
struct io_async_ctx io; struct io_async_ctx io;
struct sockaddr_storage addr; struct sockaddr_storage addr;
unsigned flags; unsigned flags;
flags = READ_ONCE(sqe->msg_flags);
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
msg = (struct user_msghdr __user *) (unsigned long)
READ_ONCE(sqe->addr);
if (req->io) { if (req->io) {
kmsg = &req->io->msg; kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr; kmsg->msg.msg_name = &addr;
...@@ -2283,14 +2290,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2283,14 +2290,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov; kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov; kmsg->msg.msg_iter.iov = kmsg->iov;
} else { } else {
struct io_sr_msg *sr = &req->sr_msg;
kmsg = &io.msg; kmsg = &io.msg;
kmsg->msg.msg_name = &addr; kmsg->msg.msg_name = &addr;
ret = io_recvmsg_prep(req, &io);
io.msg.iov = io.msg.fast_iov;
ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
sr->msg_flags, &io.msg.uaddr,
&io.msg.iov);
if (ret) if (ret)
goto out; return ret;
} }
ret = __sys_recvmsg_sock(sock, &kmsg->msg, msg, kmsg->uaddr, flags); flags = req->sr_msg.msg_flags;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN) { if (force_nonblock && ret == -EAGAIN) {
if (req->io) if (req->io)
return -EAGAIN; return -EAGAIN;
...@@ -2304,7 +2324,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2304,7 +2324,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR; ret = -EINTR;
} }
out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov) if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov); kfree(kmsg->iov);
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
...@@ -2317,25 +2336,19 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2317,25 +2336,19 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
#endif #endif
} }
static int io_accept_prep(struct io_kiocb *req) static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe;
struct io_accept *accept = &req->accept; struct io_accept *accept = &req->accept;
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL; return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index) if (sqe->ioprio || sqe->len || sqe->buf_index)
return -EINVAL; return -EINVAL;
accept->addr = (struct sockaddr __user *) accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
(unsigned long) READ_ONCE(sqe->addr); accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2);
accept->flags = READ_ONCE(sqe->accept_flags); accept->flags = READ_ONCE(sqe->accept_flags);
req->flags |= REQ_F_PREPPED;
return 0; return 0;
#else #else
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -2383,10 +2396,6 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2383,10 +2396,6 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
int ret; int ret;
ret = io_accept_prep(req);
if (ret)
return ret;
ret = __io_accept(req, nxt, force_nonblock); ret = __io_accept(req, nxt, force_nonblock);
if (ret == -EAGAIN && force_nonblock) { if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish; req->work.func = io_accept_finish;
...@@ -2400,18 +2409,27 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2400,18 +2409,27 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#endif #endif
} }
static int io_connect_prep(struct io_kiocb *req, struct io_async_ctx *io) static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe; struct io_connect *conn = &req->connect;
struct sockaddr __user *addr; struct io_async_ctx *io = req->io;
int addr_len;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
return -EINVAL;
addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr); conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
addr_len = READ_ONCE(sqe->addr2); conn->addr_len = READ_ONCE(sqe->addr2);
return move_addr_to_kernel(addr, addr_len, &io->connect.address);
if (!io)
return 0;
return move_addr_to_kernel(conn->addr, conn->addr_len,
&io->connect.address);
#else #else
return 0; return -EOPNOTSUPP;
#endif #endif
} }
...@@ -2419,30 +2437,25 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -2419,30 +2437,25 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock) bool force_nonblock)
{ {
#if defined(CONFIG_NET) #if defined(CONFIG_NET)
const struct io_uring_sqe *sqe = req->sqe;
struct io_async_ctx __io, *io; struct io_async_ctx __io, *io;
unsigned file_flags; unsigned file_flags;
int addr_len, ret; int ret;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
return -EINVAL;
addr_len = READ_ONCE(sqe->addr2);
file_flags = force_nonblock ? O_NONBLOCK : 0;
if (req->io) { if (req->io) {
io = req->io; io = req->io;
} else { } else {
ret = io_connect_prep(req, &__io); ret = move_addr_to_kernel(req->connect.addr,
req->connect.addr_len,
&__io.connect.address);
if (ret) if (ret)
goto out; goto out;
io = &__io; io = &__io;
} }
ret = __sys_connect_file(req->file, &io->connect.address, addr_len, file_flags = force_nonblock ? O_NONBLOCK : 0;
file_flags);
ret = __sys_connect_file(req->file, &io->connect.address,
req->connect.addr_len, file_flags);
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
if (req->io) if (req->io)
return -EAGAIN; return -EAGAIN;
...@@ -2513,12 +2526,9 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr) ...@@ -2513,12 +2526,9 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
return -ENOENT; return -ENOENT;
} }
static int io_poll_remove_prep(struct io_kiocb *req) static int io_poll_remove_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{ {
const struct io_uring_sqe *sqe = req->sqe;
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
...@@ -2526,7 +2536,6 @@ static int io_poll_remove_prep(struct io_kiocb *req) ...@@ -2526,7 +2536,6 @@ static int io_poll_remove_prep(struct io_kiocb *req)
return -EINVAL; return -EINVAL;
req->poll.addr = READ_ONCE(sqe->addr); req->poll.addr = READ_ONCE(sqe->addr);
req->flags |= REQ_F_PREPPED;
return 0; return 0;
} }
...@@ -2540,10 +2549,6 @@ static int io_poll_remove(struct io_kiocb *req) ...@@ -2540,10 +2549,6 @@ static int io_poll_remove(struct io_kiocb *req)
u64 addr; u64 addr;
int ret; int ret;
ret = io_poll_remove_prep(req);
if (ret)
return ret;
addr = req->poll.addr; addr = req->poll.addr;
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
ret = io_poll_cancel(ctx, addr); ret = io_poll_cancel(ctx, addr);
...@@ -2681,14 +2686,11 @@ static void io_poll_req_insert(struct io_kiocb *req) ...@@ -2681,14 +2686,11 @@ static void io_poll_req_insert(struct io_kiocb *req)
hlist_add_head(&req->hash_node, list); hlist_add_head(&req->hash_node, list);
} }
static int io_poll_add_prep(struct io_kiocb *req) static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
const struct io_uring_sqe *sqe = req->sqe;
struct io_poll_iocb *poll = &req->poll; struct io_poll_iocb *poll = &req->poll;
u16 events; u16 events;
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
...@@ -2696,7 +2698,6 @@ static int io_poll_add_prep(struct io_kiocb *req) ...@@ -2696,7 +2698,6 @@ static int io_poll_add_prep(struct io_kiocb *req)
if (!poll->file) if (!poll->file)
return -EBADF; return -EBADF;
req->flags |= REQ_F_PREPPED;
events = READ_ONCE(sqe->poll_events); events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
return 0; return 0;
...@@ -2709,11 +2710,6 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt) ...@@ -2709,11 +2710,6 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
struct io_poll_table ipt; struct io_poll_table ipt;
bool cancel = false; bool cancel = false;
__poll_t mask; __poll_t mask;
int ret;
ret = io_poll_add_prep(req);
if (ret)
return ret;
INIT_IO_WORK(&req->work, io_poll_complete_work); INIT_IO_WORK(&req->work, io_poll_complete_work);
INIT_HLIST_NODE(&req->hash_node); INIT_HLIST_NODE(&req->hash_node);
...@@ -2832,12 +2828,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) ...@@ -2832,12 +2828,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
return 0; return 0;
} }
static int io_timeout_remove_prep(struct io_kiocb *req) static int io_timeout_remove_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{ {
const struct io_uring_sqe *sqe = req->sqe;
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len) if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
...@@ -2848,7 +2841,6 @@ static int io_timeout_remove_prep(struct io_kiocb *req) ...@@ -2848,7 +2841,6 @@ static int io_timeout_remove_prep(struct io_kiocb *req)
if (req->timeout.flags) if (req->timeout.flags)
return -EINVAL; return -EINVAL;
req->flags |= REQ_F_PREPPED;
return 0; return 0;
} }
...@@ -2860,10 +2852,6 @@ static int io_timeout_remove(struct io_kiocb *req) ...@@ -2860,10 +2852,6 @@ static int io_timeout_remove(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
int ret; int ret;
ret = io_timeout_remove_prep(req);
if (ret)
return ret;
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
ret = io_timeout_cancel(ctx, req->timeout.addr); ret = io_timeout_cancel(ctx, req->timeout.addr);
...@@ -2877,10 +2865,9 @@ static int io_timeout_remove(struct io_kiocb *req) ...@@ -2877,10 +2865,9 @@ static int io_timeout_remove(struct io_kiocb *req)
return 0; return 0;
} }
static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io, static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool is_timeout_link) bool is_timeout_link)
{ {
const struct io_uring_sqe *sqe = req->sqe;
struct io_timeout_data *data; struct io_timeout_data *data;
unsigned flags; unsigned flags;
...@@ -2894,7 +2881,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io, ...@@ -2894,7 +2881,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
if (flags & ~IORING_TIMEOUT_ABS) if (flags & ~IORING_TIMEOUT_ABS)
return -EINVAL; return -EINVAL;
data = &io->timeout; req->timeout.count = READ_ONCE(sqe->off);
if (!req->io && io_alloc_async_ctx(req))
return -ENOMEM;
data = &req->io->timeout;
data->req = req; data->req = req;
req->flags |= REQ_F_TIMEOUT; req->flags |= REQ_F_TIMEOUT;
...@@ -2912,21 +2904,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io, ...@@ -2912,21 +2904,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
static int io_timeout(struct io_kiocb *req) static int io_timeout(struct io_kiocb *req)
{ {
const struct io_uring_sqe *sqe = req->sqe;
unsigned count; unsigned count;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct io_timeout_data *data; struct io_timeout_data *data;
struct list_head *entry; struct list_head *entry;
unsigned span = 0; unsigned span = 0;
int ret;
if (!req->io) {
if (io_alloc_async_ctx(req))
return -ENOMEM;
ret = io_timeout_prep(req, req->io, false);
if (ret)
return ret;
}
data = &req->io->timeout; data = &req->io->timeout;
/* /*
...@@ -2934,7 +2917,7 @@ static int io_timeout(struct io_kiocb *req) ...@@ -2934,7 +2917,7 @@ static int io_timeout(struct io_kiocb *req)
* timeout event to be satisfied. If it isn't set, then this is * timeout event to be satisfied. If it isn't set, then this is
* a pure timeout request, sequence isn't used. * a pure timeout request, sequence isn't used.
*/ */
count = READ_ONCE(sqe->off); count = req->timeout.count;
if (!count) { if (!count) {
req->flags |= REQ_F_TIMEOUT_NOSEQ; req->flags |= REQ_F_TIMEOUT_NOSEQ;
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
...@@ -3052,19 +3035,15 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx, ...@@ -3052,19 +3035,15 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
io_put_req_find_next(req, nxt); io_put_req_find_next(req, nxt);
} }
static int io_async_cancel_prep(struct io_kiocb *req) static int io_async_cancel_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{ {
const struct io_uring_sqe *sqe = req->sqe;
if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL; return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->off || sqe->len || if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
sqe->cancel_flags) sqe->cancel_flags)
return -EINVAL; return -EINVAL;
req->flags |= REQ_F_PREPPED;
req->cancel.addr = READ_ONCE(sqe->addr); req->cancel.addr = READ_ONCE(sqe->addr);
return 0; return 0;
} }
...@@ -3072,21 +3051,14 @@ static int io_async_cancel_prep(struct io_kiocb *req) ...@@ -3072,21 +3051,14 @@ static int io_async_cancel_prep(struct io_kiocb *req)
static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt) static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
int ret;
ret = io_async_cancel_prep(req);
if (ret)
return ret;
io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0); io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0);
return 0; return 0;
} }
static int io_req_defer_prep(struct io_kiocb *req) static int io_req_defer_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct io_async_ctx *io = req->io;
struct iov_iter iter;
ssize_t ret = 0; ssize_t ret = 0;
switch (req->opcode) { switch (req->opcode) {
...@@ -3094,61 +3066,47 @@ static int io_req_defer_prep(struct io_kiocb *req) ...@@ -3094,61 +3066,47 @@ static int io_req_defer_prep(struct io_kiocb *req)
break; break;
case IORING_OP_READV: case IORING_OP_READV:
case IORING_OP_READ_FIXED: case IORING_OP_READ_FIXED:
/* ensure prep does right import */ ret = io_read_prep(req, sqe, true);
req->io = NULL;
ret = io_read_prep(req, &iovec, &iter, true);
req->io = io;
if (ret < 0)
break;
io_req_map_rw(req, ret, iovec, inline_vecs, &iter);
ret = 0;
break; break;
case IORING_OP_WRITEV: case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE_FIXED:
/* ensure prep does right import */ ret = io_write_prep(req, sqe, true);
req->io = NULL;
ret = io_write_prep(req, &iovec, &iter, true);
req->io = io;
if (ret < 0)
break;
io_req_map_rw(req, ret, iovec, inline_vecs, &iter);
ret = 0;
break; break;
case IORING_OP_POLL_ADD: case IORING_OP_POLL_ADD:
ret = io_poll_add_prep(req); ret = io_poll_add_prep(req, sqe);
break; break;
case IORING_OP_POLL_REMOVE: case IORING_OP_POLL_REMOVE:
ret = io_poll_remove_prep(req); ret = io_poll_remove_prep(req, sqe);
break; break;
case IORING_OP_FSYNC: case IORING_OP_FSYNC:
ret = io_prep_fsync(req); ret = io_prep_fsync(req, sqe);
break; break;
case IORING_OP_SYNC_FILE_RANGE: case IORING_OP_SYNC_FILE_RANGE:
ret = io_prep_sfr(req); ret = io_prep_sfr(req, sqe);
break; break;
case IORING_OP_SENDMSG: case IORING_OP_SENDMSG:
ret = io_sendmsg_prep(req, io); ret = io_sendmsg_prep(req, sqe);
break; break;
case IORING_OP_RECVMSG: case IORING_OP_RECVMSG:
ret = io_recvmsg_prep(req, io); ret = io_recvmsg_prep(req, sqe);
break; break;
case IORING_OP_CONNECT: case IORING_OP_CONNECT:
ret = io_connect_prep(req, io); ret = io_connect_prep(req, sqe);
break; break;
case IORING_OP_TIMEOUT: case IORING_OP_TIMEOUT:
ret = io_timeout_prep(req, io, false); ret = io_timeout_prep(req, sqe, false);
break; break;
case IORING_OP_TIMEOUT_REMOVE: case IORING_OP_TIMEOUT_REMOVE:
ret = io_timeout_remove_prep(req); ret = io_timeout_remove_prep(req, sqe);
break; break;
case IORING_OP_ASYNC_CANCEL: case IORING_OP_ASYNC_CANCEL:
ret = io_async_cancel_prep(req); ret = io_async_cancel_prep(req, sqe);
break; break;
case IORING_OP_LINK_TIMEOUT: case IORING_OP_LINK_TIMEOUT:
ret = io_timeout_prep(req, io, true); ret = io_timeout_prep(req, sqe, true);
break; break;
case IORING_OP_ACCEPT: case IORING_OP_ACCEPT:
ret = io_accept_prep(req); ret = io_accept_prep(req, sqe);
break; break;
default: default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
...@@ -3160,7 +3118,7 @@ static int io_req_defer_prep(struct io_kiocb *req) ...@@ -3160,7 +3118,7 @@ static int io_req_defer_prep(struct io_kiocb *req)
return ret; return ret;
} }
static int io_req_defer(struct io_kiocb *req) static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
int ret; int ret;
...@@ -3169,10 +3127,10 @@ static int io_req_defer(struct io_kiocb *req) ...@@ -3169,10 +3127,10 @@ static int io_req_defer(struct io_kiocb *req)
if (!req_need_defer(req) && list_empty(&ctx->defer_list)) if (!req_need_defer(req) && list_empty(&ctx->defer_list))
return 0; return 0;
if (io_alloc_async_ctx(req)) if (!req->io && io_alloc_async_ctx(req))
return -EAGAIN; return -EAGAIN;
ret = io_req_defer_prep(req); ret = io_req_defer_prep(req, sqe);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -3188,9 +3146,8 @@ static int io_req_defer(struct io_kiocb *req) ...@@ -3188,9 +3146,8 @@ static int io_req_defer(struct io_kiocb *req)
return -EIOCBQUEUED; return -EIOCBQUEUED;
} }
__attribute__((nonnull)) static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, struct io_kiocb **nxt, bool force_nonblock)
bool force_nonblock)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
int ret; int ret;
...@@ -3200,52 +3157,109 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, ...@@ -3200,52 +3157,109 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
ret = io_nop(req); ret = io_nop(req);
break; break;
case IORING_OP_READV: case IORING_OP_READV:
if (unlikely(req->sqe->buf_index))
return -EINVAL;
ret = io_read(req, nxt, force_nonblock);
break;
case IORING_OP_WRITEV:
if (unlikely(req->sqe->buf_index))
return -EINVAL;
ret = io_write(req, nxt, force_nonblock);
break;
case IORING_OP_READ_FIXED: case IORING_OP_READ_FIXED:
if (sqe) {
ret = io_read_prep(req, sqe, force_nonblock);
if (ret < 0)
break;
}
ret = io_read(req, nxt, force_nonblock); ret = io_read(req, nxt, force_nonblock);
break; break;
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE_FIXED:
if (sqe) {
ret = io_write_prep(req, sqe, force_nonblock);
if (ret < 0)
break;
}
ret = io_write(req, nxt, force_nonblock); ret = io_write(req, nxt, force_nonblock);
break; break;
case IORING_OP_FSYNC: case IORING_OP_FSYNC:
if (sqe) {
ret = io_prep_fsync(req, sqe);
if (ret < 0)
break;
}
ret = io_fsync(req, nxt, force_nonblock); ret = io_fsync(req, nxt, force_nonblock);
break; break;
case IORING_OP_POLL_ADD: case IORING_OP_POLL_ADD:
if (sqe) {
ret = io_poll_add_prep(req, sqe);
if (ret)
break;
}
ret = io_poll_add(req, nxt); ret = io_poll_add(req, nxt);
break; break;
case IORING_OP_POLL_REMOVE: case IORING_OP_POLL_REMOVE:
if (sqe) {
ret = io_poll_remove_prep(req, sqe);
if (ret < 0)
break;
}
ret = io_poll_remove(req); ret = io_poll_remove(req);
break; break;
case IORING_OP_SYNC_FILE_RANGE: case IORING_OP_SYNC_FILE_RANGE:
if (sqe) {
ret = io_prep_sfr(req, sqe);
if (ret < 0)
break;
}
ret = io_sync_file_range(req, nxt, force_nonblock); ret = io_sync_file_range(req, nxt, force_nonblock);
break; break;
case IORING_OP_SENDMSG: case IORING_OP_SENDMSG:
if (sqe) {
ret = io_sendmsg_prep(req, sqe);
if (ret < 0)
break;
}
ret = io_sendmsg(req, nxt, force_nonblock); ret = io_sendmsg(req, nxt, force_nonblock);
break; break;
case IORING_OP_RECVMSG: case IORING_OP_RECVMSG:
if (sqe) {
ret = io_recvmsg_prep(req, sqe);
if (ret)
break;
}
ret = io_recvmsg(req, nxt, force_nonblock); ret = io_recvmsg(req, nxt, force_nonblock);
break; break;
case IORING_OP_TIMEOUT: case IORING_OP_TIMEOUT:
if (sqe) {
ret = io_timeout_prep(req, sqe, false);
if (ret)
break;
}
ret = io_timeout(req); ret = io_timeout(req);
break; break;
case IORING_OP_TIMEOUT_REMOVE: case IORING_OP_TIMEOUT_REMOVE:
if (sqe) {
ret = io_timeout_remove_prep(req, sqe);
if (ret)
break;
}
ret = io_timeout_remove(req); ret = io_timeout_remove(req);
break; break;
case IORING_OP_ACCEPT: case IORING_OP_ACCEPT:
if (sqe) {
ret = io_accept_prep(req, sqe);
if (ret)
break;
}
ret = io_accept(req, nxt, force_nonblock); ret = io_accept(req, nxt, force_nonblock);
break; break;
case IORING_OP_CONNECT: case IORING_OP_CONNECT:
if (sqe) {
ret = io_connect_prep(req, sqe);
if (ret)
break;
}
ret = io_connect(req, nxt, force_nonblock); ret = io_connect(req, nxt, force_nonblock);
break; break;
case IORING_OP_ASYNC_CANCEL: case IORING_OP_ASYNC_CANCEL:
if (sqe) {
ret = io_async_cancel_prep(req, sqe);
if (ret)
break;
}
ret = io_async_cancel(req, nxt); ret = io_async_cancel(req, nxt);
break; break;
default: default:
...@@ -3289,7 +3303,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr) ...@@ -3289,7 +3303,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0; req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0;
req->in_async = true; req->in_async = true;
do { do {
ret = io_issue_sqe(req, &nxt, false); ret = io_issue_sqe(req, NULL, &nxt, false);
/* /*
* We can get EAGAIN for polled IO even though we're * We can get EAGAIN for polled IO even though we're
* forcing a sync submission from here, since we can't * forcing a sync submission from here, since we can't
...@@ -3355,14 +3369,15 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, ...@@ -3355,14 +3369,15 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK]; return table->files[index & IORING_FILE_TABLE_MASK];
} }
static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req) static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
unsigned flags; unsigned flags;
int fd, ret; int fd, ret;
flags = READ_ONCE(req->sqe->flags); flags = READ_ONCE(sqe->flags);
fd = READ_ONCE(req->sqe->fd); fd = READ_ONCE(sqe->fd);
if (flags & IOSQE_IO_DRAIN) if (flags & IOSQE_IO_DRAIN)
req->flags |= REQ_F_IO_DRAIN; req->flags |= REQ_F_IO_DRAIN;
...@@ -3494,7 +3509,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) ...@@ -3494,7 +3509,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
return nxt; return nxt;
} }
static void __io_queue_sqe(struct io_kiocb *req) static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
struct io_kiocb *linked_timeout; struct io_kiocb *linked_timeout;
struct io_kiocb *nxt = NULL; struct io_kiocb *nxt = NULL;
...@@ -3503,7 +3518,7 @@ static void __io_queue_sqe(struct io_kiocb *req) ...@@ -3503,7 +3518,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
again: again:
linked_timeout = io_prep_linked_timeout(req); linked_timeout = io_prep_linked_timeout(req);
ret = io_issue_sqe(req, &nxt, true); ret = io_issue_sqe(req, sqe, &nxt, true);
/* /*
* We async punt it if the file wasn't marked NOWAIT, or if the file * We async punt it if the file wasn't marked NOWAIT, or if the file
...@@ -3550,7 +3565,7 @@ static void __io_queue_sqe(struct io_kiocb *req) ...@@ -3550,7 +3565,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
} }
} }
static void io_queue_sqe(struct io_kiocb *req) static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
int ret; int ret;
...@@ -3560,7 +3575,7 @@ static void io_queue_sqe(struct io_kiocb *req) ...@@ -3560,7 +3575,7 @@ static void io_queue_sqe(struct io_kiocb *req)
} }
req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK); req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
ret = io_req_defer(req); ret = io_req_defer(req, sqe);
if (ret) { if (ret) {
if (ret != -EIOCBQUEUED) { if (ret != -EIOCBQUEUED) {
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
...@@ -3568,7 +3583,7 @@ static void io_queue_sqe(struct io_kiocb *req) ...@@ -3568,7 +3583,7 @@ static void io_queue_sqe(struct io_kiocb *req)
io_double_put_req(req); io_double_put_req(req);
} }
} else } else
__io_queue_sqe(req); __io_queue_sqe(req, sqe);
} }
static inline void io_queue_link_head(struct io_kiocb *req) static inline void io_queue_link_head(struct io_kiocb *req)
...@@ -3577,25 +3592,25 @@ static inline void io_queue_link_head(struct io_kiocb *req) ...@@ -3577,25 +3592,25 @@ static inline void io_queue_link_head(struct io_kiocb *req)
io_cqring_add_event(req, -ECANCELED); io_cqring_add_event(req, -ECANCELED);
io_double_put_req(req); io_double_put_req(req);
} else } else
io_queue_sqe(req); io_queue_sqe(req, NULL);
} }
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
IOSQE_IO_HARDLINK) IOSQE_IO_HARDLINK)
static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_kiocb **link) struct io_submit_state *state, struct io_kiocb **link)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
int ret; int ret;
/* enforce forwards compatibility on users */ /* enforce forwards compatibility on users */
if (unlikely(req->sqe->flags & ~SQE_VALID_FLAGS)) { if (unlikely(sqe->flags & ~SQE_VALID_FLAGS)) {
ret = -EINVAL; ret = -EINVAL;
goto err_req; goto err_req;
} }
ret = io_req_set_file(state, req); ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) { if (unlikely(ret)) {
err_req: err_req:
io_cqring_add_event(req, ret); io_cqring_add_event(req, ret);
...@@ -3613,10 +3628,10 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, ...@@ -3613,10 +3628,10 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
if (*link) { if (*link) {
struct io_kiocb *prev = *link; struct io_kiocb *prev = *link;
if (req->sqe->flags & IOSQE_IO_DRAIN) if (sqe->flags & IOSQE_IO_DRAIN)
(*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN; (*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
if (req->sqe->flags & IOSQE_IO_HARDLINK) if (sqe->flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK; req->flags |= REQ_F_HARDLINK;
if (io_alloc_async_ctx(req)) { if (io_alloc_async_ctx(req)) {
...@@ -3624,7 +3639,7 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, ...@@ -3624,7 +3639,7 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
goto err_req; goto err_req;
} }
ret = io_req_defer_prep(req); ret = io_req_defer_prep(req, sqe);
if (ret) { if (ret) {
/* fail even hard links since we don't submit */ /* fail even hard links since we don't submit */
prev->flags |= REQ_F_FAIL_LINK; prev->flags |= REQ_F_FAIL_LINK;
...@@ -3632,15 +3647,18 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, ...@@ -3632,15 +3647,18 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
} }
trace_io_uring_link(ctx, req, prev); trace_io_uring_link(ctx, req, prev);
list_add_tail(&req->link_list, &prev->link_list); list_add_tail(&req->link_list, &prev->link_list);
} else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { } else if (sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK; req->flags |= REQ_F_LINK;
if (req->sqe->flags & IOSQE_IO_HARDLINK) if (sqe->flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK; req->flags |= REQ_F_HARDLINK;
INIT_LIST_HEAD(&req->link_list); INIT_LIST_HEAD(&req->link_list);
ret = io_req_defer_prep(req, sqe);
if (ret)
req->flags |= REQ_F_FAIL_LINK;
*link = req; *link = req;
} else { } else {
io_queue_sqe(req); io_queue_sqe(req, sqe);
} }
return true; return true;
...@@ -3685,14 +3703,15 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) ...@@ -3685,14 +3703,15 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
} }
/* /*
* Fetch an sqe, if one is available. Note that req->sqe will point to memory * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory
* that is mapped by userspace. This means that care needs to be taken to * that is mapped by userspace. This means that care needs to be taken to
* ensure that reads are stable, as we cannot rely on userspace always * ensure that reads are stable, as we cannot rely on userspace always
* being a good citizen. If members of the sqe are validated and then later * being a good citizen. If members of the sqe are validated and then later
* used, it's important that those reads are done through READ_ONCE() to * used, it's important that those reads are done through READ_ONCE() to
* prevent a re-load down the line. * prevent a re-load down the line.
*/ */
static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req) static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe **sqe_ptr)
{ {
struct io_rings *rings = ctx->rings; struct io_rings *rings = ctx->rings;
u32 *sq_array = ctx->sq_array; u32 *sq_array = ctx->sq_array;
...@@ -3719,9 +3738,9 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req) ...@@ -3719,9 +3738,9 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req)
* link list. * link list.
*/ */
req->sequence = ctx->cached_sq_head; req->sequence = ctx->cached_sq_head;
req->sqe = &ctx->sq_sqes[head]; *sqe_ptr = &ctx->sq_sqes[head];
req->opcode = READ_ONCE(req->sqe->opcode); req->opcode = READ_ONCE((*sqe_ptr)->opcode);
req->user_data = READ_ONCE(req->sqe->user_data); req->user_data = READ_ONCE((*sqe_ptr)->user_data);
ctx->cached_sq_head++; ctx->cached_sq_head++;
return true; return true;
} }
...@@ -3753,6 +3772,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, ...@@ -3753,6 +3772,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
} }
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req; struct io_kiocb *req;
unsigned int sqe_flags; unsigned int sqe_flags;
...@@ -3762,7 +3782,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, ...@@ -3762,7 +3782,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
submitted = -EAGAIN; submitted = -EAGAIN;
break; break;
} }
if (!io_get_sqring(ctx, req)) { if (!io_get_sqring(ctx, req, &sqe)) {
__io_free_req(req); __io_free_req(req);
break; break;
} }
...@@ -3776,7 +3796,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, ...@@ -3776,7 +3796,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
} }
submitted++; submitted++;
sqe_flags = req->sqe->flags; sqe_flags = sqe->flags;
req->ring_file = ring_file; req->ring_file = ring_file;
req->ring_fd = ring_fd; req->ring_fd = ring_fd;
...@@ -3784,7 +3804,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, ...@@ -3784,7 +3804,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
req->in_async = async; req->in_async = async;
req->needs_fixed_file = async; req->needs_fixed_file = async;
trace_io_uring_submit_sqe(ctx, req->user_data, true, async); trace_io_uring_submit_sqe(ctx, req->user_data, true, async);
if (!io_submit_sqe(req, statep, &link)) if (!io_submit_sqe(req, sqe, statep, &link))
break; break;
/* /*
* If previous wasn't linked and we have a linked command, * If previous wasn't linked and we have a linked command,
...@@ -4702,7 +4722,7 @@ static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, ...@@ -4702,7 +4722,7 @@ static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
return -EFAULT; return -EFAULT;
dst->iov_base = (void __user *) (unsigned long) ciov.iov_base; dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
dst->iov_len = ciov.iov_len; dst->iov_len = ciov.iov_len;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment