Commit 956eb6cb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block

Pull aio updates from Jens Axboe:
 "Flushing out pre-patches for the buffered/polled aio series. Some
  fixes in here, but also optimizations"

* tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block:
  aio: abstract out io_event filler helper
  aio: split out iocb copy from io_submit_one()
  aio: use iocb_put() instead of open coding it
  aio: only use blk plugs for > 2 depth submissions
  aio: don't zero entire aio_kiocb aio_get_req()
  aio: separate out ring reservation from req allocation
  aio: use assigned completion handler
parents 0e9da3fb 875736bb
...@@ -70,6 +70,12 @@ struct aio_ring { ...@@ -70,6 +70,12 @@ struct aio_ring {
struct io_event io_events[0]; struct io_event io_events[0];
}; /* 128 bytes + ring size */ }; /* 128 bytes + ring size */
/*
* Plugging is meant to work with larger batches of IOs. If we don't
* have more than the below, then don't bother setting up a plug.
*/
#define AIO_PLUG_THRESHOLD 2
#define AIO_RING_PAGES 8 #define AIO_RING_PAGES 8
struct kioctx_table { struct kioctx_table {
...@@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr) ...@@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
local_irq_restore(flags); local_irq_restore(flags);
} }
static bool get_reqs_available(struct kioctx *ctx) static bool __get_reqs_available(struct kioctx *ctx)
{ {
struct kioctx_cpu *kcpu; struct kioctx_cpu *kcpu;
bool ret = false; bool ret = false;
...@@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx) ...@@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx)
spin_unlock_irq(&ctx->completion_lock); spin_unlock_irq(&ctx->completion_lock);
} }
static bool get_reqs_available(struct kioctx *ctx)
{
if (__get_reqs_available(ctx))
return true;
user_refill_reqs_available(ctx);
return __get_reqs_available(ctx);
}
/* aio_get_req /* aio_get_req
* Allocate a slot for an aio request. * Allocate a slot for an aio request.
* Returns NULL if no requests are free. * Returns NULL if no requests are free.
...@@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) ...@@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{ {
struct aio_kiocb *req; struct aio_kiocb *req;
if (!get_reqs_available(ctx)) { req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
user_refill_reqs_available(ctx);
if (!get_reqs_available(ctx))
return NULL;
}
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
if (unlikely(!req)) if (unlikely(!req))
goto out_put; return NULL;
percpu_ref_get(&ctx->reqs); percpu_ref_get(&ctx->reqs);
req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list); INIT_LIST_HEAD(&req->ki_list);
refcount_set(&req->ki_refcnt, 0); refcount_set(&req->ki_refcnt, 0);
req->ki_ctx = ctx; req->ki_eventfd = NULL;
return req; return req;
out_put:
put_reqs_available(ctx, 1);
return NULL;
} }
static struct kioctx *lookup_ioctx(unsigned long ctx_id) static struct kioctx *lookup_ioctx(unsigned long ctx_id)
...@@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb) ...@@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb)
} }
} }
static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
long res, long res2)
{
ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
ev->data = iocb->ki_user_data;
ev->res = res;
ev->res2 = res2;
}
/* aio_complete /* aio_complete
* Called when the io request on the given iocb is complete. * Called when the io request on the given iocb is complete.
*/ */
...@@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) ...@@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE; event = ev_page + pos % AIO_EVENTS_PER_PAGE;
event->obj = (u64)(unsigned long)iocb->ki_user_iocb; aio_fill_event(event, iocb, res, res2);
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
kunmap_atomic(ev_page); kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
...@@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) ...@@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
aio_complete(iocb, res, res2); aio_complete(iocb, res, res2);
} }
static int aio_prep_rw(struct kiocb *req, struct iocb *iocb) static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{ {
int ret; int ret;
...@@ -1457,7 +1469,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb) ...@@ -1457,7 +1469,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
return ret; return ret;
} }
static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter) bool vectored, bool compat, struct iov_iter *iter)
{ {
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
...@@ -1492,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret) ...@@ -1492,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
ret = -EINTR; ret = -EINTR;
/*FALLTHRU*/ /*FALLTHRU*/
default: default:
aio_complete_rw(req, ret, 0); req->ki_complete(req, ret, 0);
} }
} }
static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
bool compat) bool vectored, bool compat)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter; struct iov_iter iter;
...@@ -1529,8 +1541,8 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, ...@@ -1529,8 +1541,8 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
return ret; return ret;
} }
static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
bool compat) bool vectored, bool compat)
{ {
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter; struct iov_iter iter;
...@@ -1585,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work) ...@@ -1585,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work)
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
} }
static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
bool datasync)
{ {
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags)) iocb->aio_rw_flags))
...@@ -1713,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, ...@@ -1713,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->iocb->poll.wait); add_wait_queue(head, &pt->iocb->poll.wait);
} }
static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
{ {
struct kioctx *ctx = aiocb->ki_ctx; struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll; struct poll_iocb *req = &aiocb->poll;
...@@ -1733,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) ...@@ -1733,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
if (unlikely(!req->file)) if (unlikely(!req->file))
return -EBADF; return -EBADF;
req->head = NULL;
req->woken = false;
req->cancelled = false;
apt.pt._qproc = aio_poll_queue_proc; apt.pt._qproc = aio_poll_queue_proc;
apt.pt._key = req->events; apt.pt._key = req->events;
apt.iocb = aiocb; apt.iocb = aiocb;
...@@ -1781,44 +1798,44 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) ...@@ -1781,44 +1798,44 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
return 0; return 0;
} }
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
bool compat) struct iocb __user *user_iocb, bool compat)
{ {
struct aio_kiocb *req; struct aio_kiocb *req;
struct iocb iocb;
ssize_t ret; ssize_t ret;
if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
return -EFAULT;
/* enforce forwards compatibility on users */ /* enforce forwards compatibility on users */
if (unlikely(iocb.aio_reserved2)) { if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n"); pr_debug("EINVAL: reserve field set\n");
return -EINVAL; return -EINVAL;
} }
/* prevent overflows */ /* prevent overflows */
if (unlikely( if (unlikely(
(iocb.aio_buf != (unsigned long)iocb.aio_buf) || (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
(iocb.aio_nbytes != (size_t)iocb.aio_nbytes) || (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
((ssize_t)iocb.aio_nbytes < 0) ((ssize_t)iocb->aio_nbytes < 0)
)) { )) {
pr_debug("EINVAL: overflow check\n"); pr_debug("EINVAL: overflow check\n");
return -EINVAL; return -EINVAL;
} }
if (!get_reqs_available(ctx))
return -EAGAIN;
ret = -EAGAIN;
req = aio_get_req(ctx); req = aio_get_req(ctx);
if (unlikely(!req)) if (unlikely(!req))
return -EAGAIN; goto out_put_reqs_available;
if (iocb.aio_flags & IOCB_FLAG_RESFD) { if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/* /*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be * instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed * an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function. * event using the eventfd_signal() function.
*/ */
req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd); req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) { if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd); ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL; req->ki_eventfd = NULL;
...@@ -1833,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ...@@ -1833,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
} }
req->ki_user_iocb = user_iocb; req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb.aio_data; req->ki_user_data = iocb->aio_data;
switch (iocb.aio_lio_opcode) { switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD: case IOCB_CMD_PREAD:
ret = aio_read(&req->rw, &iocb, false, compat); ret = aio_read(&req->rw, iocb, false, compat);
break; break;
case IOCB_CMD_PWRITE: case IOCB_CMD_PWRITE:
ret = aio_write(&req->rw, &iocb, false, compat); ret = aio_write(&req->rw, iocb, false, compat);
break; break;
case IOCB_CMD_PREADV: case IOCB_CMD_PREADV:
ret = aio_read(&req->rw, &iocb, true, compat); ret = aio_read(&req->rw, iocb, true, compat);
break; break;
case IOCB_CMD_PWRITEV: case IOCB_CMD_PWRITEV:
ret = aio_write(&req->rw, &iocb, true, compat); ret = aio_write(&req->rw, iocb, true, compat);
break; break;
case IOCB_CMD_FSYNC: case IOCB_CMD_FSYNC:
ret = aio_fsync(&req->fsync, &iocb, false); ret = aio_fsync(&req->fsync, iocb, false);
break; break;
case IOCB_CMD_FDSYNC: case IOCB_CMD_FDSYNC:
ret = aio_fsync(&req->fsync, &iocb, true); ret = aio_fsync(&req->fsync, iocb, true);
break; break;
case IOCB_CMD_POLL: case IOCB_CMD_POLL:
ret = aio_poll(req, &iocb); ret = aio_poll(req, iocb);
break; break;
default: default:
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
...@@ -1872,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ...@@ -1872,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req; goto out_put_req;
return 0; return 0;
out_put_req: out_put_req:
put_reqs_available(ctx, 1);
percpu_ref_put(&ctx->reqs);
if (req->ki_eventfd) if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd); eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req); iocb_put(req);
out_put_reqs_available:
put_reqs_available(ctx, 1);
return ret; return ret;
} }
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
struct iocb iocb;
if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
return -EFAULT;
return __io_submit_one(ctx, &iocb, user_iocb, compat);
}
/* sys_io_submit: /* sys_io_submit:
* Queue the nr iocbs pointed to by iocbpp for processing. Returns * Queue the nr iocbs pointed to by iocbpp for processing. Returns
* the number of iocbs queued. May return -EINVAL if the aio_context * the number of iocbs queued. May return -EINVAL if the aio_context
...@@ -1912,7 +1940,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, ...@@ -1912,7 +1940,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
if (nr > ctx->nr_events) if (nr > ctx->nr_events)
nr = ctx->nr_events; nr = ctx->nr_events;
blk_start_plug(&plug); if (nr > AIO_PLUG_THRESHOLD)
blk_start_plug(&plug);
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct iocb __user *user_iocb; struct iocb __user *user_iocb;
...@@ -1925,7 +1954,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, ...@@ -1925,7 +1954,8 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
if (ret) if (ret)
break; break;
} }
blk_finish_plug(&plug); if (nr > AIO_PLUG_THRESHOLD)
blk_finish_plug(&plug);
percpu_ref_put(&ctx->users); percpu_ref_put(&ctx->users);
return i ? i : ret; return i ? i : ret;
...@@ -1952,7 +1982,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, ...@@ -1952,7 +1982,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
if (nr > ctx->nr_events) if (nr > ctx->nr_events)
nr = ctx->nr_events; nr = ctx->nr_events;
blk_start_plug(&plug); if (nr > AIO_PLUG_THRESHOLD)
blk_start_plug(&plug);
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
compat_uptr_t user_iocb; compat_uptr_t user_iocb;
...@@ -1965,7 +1996,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, ...@@ -1965,7 +1996,8 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
if (ret) if (ret)
break; break;
} }
blk_finish_plug(&plug); if (nr > AIO_PLUG_THRESHOLD)
blk_finish_plug(&plug);
percpu_ref_put(&ctx->users); percpu_ref_put(&ctx->users);
return i ? i : ret; return i ? i : ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment