Commit c610c4b6 authored by Pavel Shilovsky's avatar Pavel Shilovsky Committed by Steve French

CIFS: Add asynchronous write support through kernel AIO

This patch adds support to process write calls passed by io_submit()
asynchronously. It based on the previously introduced async context
that allows to process i/o responses in a separate thread and
return the caller immediately for asynchronous calls.

This improves writing performance of single threaded applications
with increasing of i/o queue depth size.
Signed-off-by: default avatarPavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: default avatarSteve French <smfrench@gmail.com>
parent 6685c5e2
...@@ -1124,6 +1124,7 @@ struct cifs_aio_ctx { ...@@ -1124,6 +1124,7 @@ struct cifs_aio_ctx {
struct kiocb *iocb; struct kiocb *iocb;
struct cifsFileInfo *cfile; struct cifsFileInfo *cfile;
struct bio_vec *bv; struct bio_vec *bv;
loff_t pos;
unsigned int npages; unsigned int npages;
ssize_t rc; ssize_t rc;
unsigned int len; unsigned int len;
...@@ -1171,6 +1172,7 @@ struct cifs_writedata { ...@@ -1171,6 +1172,7 @@ struct cifs_writedata {
enum writeback_sync_modes sync_mode; enum writeback_sync_modes sync_mode;
struct work_struct work; struct work_struct work;
struct cifsFileInfo *cfile; struct cifsFileInfo *cfile;
struct cifs_aio_ctx *ctx;
__u64 offset; __u64 offset;
pid_t pid; pid_t pid;
unsigned int bytes; unsigned int bytes;
......
...@@ -2458,11 +2458,14 @@ cifs_uncached_writedata_release(struct kref *refcount) ...@@ -2458,11 +2458,14 @@ cifs_uncached_writedata_release(struct kref *refcount)
struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata *wdata = container_of(refcount,
struct cifs_writedata, refcount); struct cifs_writedata, refcount);
kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
for (i = 0; i < wdata->nr_pages; i++) for (i = 0; i < wdata->nr_pages; i++)
put_page(wdata->pages[i]); put_page(wdata->pages[i]);
cifs_writedata_release(refcount); cifs_writedata_release(refcount);
} }
static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
static void static void
cifs_uncached_writev_complete(struct work_struct *work) cifs_uncached_writev_complete(struct work_struct *work)
{ {
...@@ -2478,7 +2481,8 @@ cifs_uncached_writev_complete(struct work_struct *work) ...@@ -2478,7 +2481,8 @@ cifs_uncached_writev_complete(struct work_struct *work)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
complete(&wdata->done); complete(&wdata->done);
collect_uncached_write_data(wdata->ctx);
/* the below call can possibly free the last ref to aio ctx */
kref_put(&wdata->refcount, cifs_uncached_writedata_release); kref_put(&wdata->refcount, cifs_uncached_writedata_release);
} }
...@@ -2527,7 +2531,8 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, ...@@ -2527,7 +2531,8 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
static int static int
cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
struct cifsFileInfo *open_file, struct cifsFileInfo *open_file,
struct cifs_sb_info *cifs_sb, struct list_head *wdata_list) struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
struct cifs_aio_ctx *ctx)
{ {
int rc = 0; int rc = 0;
size_t cur_len; size_t cur_len;
...@@ -2595,6 +2600,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, ...@@ -2595,6 +2600,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
wdata->pagesz = PAGE_SIZE; wdata->pagesz = PAGE_SIZE;
wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
wdata->credits = credits; wdata->credits = credits;
wdata->ctx = ctx;
kref_get(&ctx->refcount);
if (!wdata->cfile->invalidHandle || if (!wdata->cfile->invalidHandle ||
!(rc = cifs_reopen_file(wdata->cfile, false))) !(rc = cifs_reopen_file(wdata->cfile, false)))
...@@ -2620,81 +2627,61 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, ...@@ -2620,81 +2627,61 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
return rc; return rc;
} }
ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
{ {
struct file *file = iocb->ki_filp; struct cifs_writedata *wdata, *tmp;
ssize_t total_written = 0;
struct cifsFileInfo *open_file;
struct cifs_tcon *tcon; struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb; struct cifs_sb_info *cifs_sb;
struct cifs_writedata *wdata, *tmp; struct dentry *dentry = ctx->cfile->dentry;
struct list_head wdata_list; unsigned int i;
struct iov_iter saved_from = *from;
int rc; int rc;
/* tcon = tlink_tcon(ctx->cfile->tlink);
* BB - optimize the way when signing is disabled. We can drop this cifs_sb = CIFS_SB(dentry->d_sb);
* extra memory-to-memory copying and use iovec buffers for constructing
* write request.
*/
rc = generic_write_checks(iocb, from);
if (rc <= 0)
return rc;
INIT_LIST_HEAD(&wdata_list);
cifs_sb = CIFS_FILE_SB(file);
open_file = file->private_data;
tcon = tlink_tcon(open_file->tlink);
if (!tcon->ses->server->ops->async_writev)
return -ENOSYS;
rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from, mutex_lock(&ctx->aio_mutex);
open_file, cifs_sb, &wdata_list);
/* if (list_empty(&ctx->list)) {
* If at least one write was successfully sent, then discard any rc mutex_unlock(&ctx->aio_mutex);
* value from the later writes. If the other write succeeds, then return;
* we'll end up returning whatever was written. If it fails, then }
* we'll get a new rc value from that.
*/
if (!list_empty(&wdata_list))
rc = 0;
rc = ctx->rc;
/* /*
* Wait for and collect replies for any successful sends in order of * Wait for and collect replies for any successful sends in order of
* increasing offset. Once an error is hit or we get a fatal signal * increasing offset. Once an error is hit, then return without waiting
* while waiting, then return without waiting for any more replies. * for any more replies.
*/ */
restart_loop: restart_loop:
list_for_each_entry_safe(wdata, tmp, &wdata_list, list) { list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
if (!rc) { if (!rc) {
/* FIXME: freezable too? */ if (!try_wait_for_completion(&wdata->done)) {
rc = wait_for_completion_killable(&wdata->done); mutex_unlock(&ctx->aio_mutex);
if (rc) return;
rc = -EINTR; }
else if (wdata->result)
if (wdata->result)
rc = wdata->result; rc = wdata->result;
else else
total_written += wdata->bytes; ctx->total_len += wdata->bytes;
/* resend call if it's a retryable error */ /* resend call if it's a retryable error */
if (rc == -EAGAIN) { if (rc == -EAGAIN) {
struct list_head tmp_list; struct list_head tmp_list;
struct iov_iter tmp_from = saved_from; struct iov_iter tmp_from = ctx->iter;
INIT_LIST_HEAD(&tmp_list); INIT_LIST_HEAD(&tmp_list);
list_del_init(&wdata->list); list_del_init(&wdata->list);
iov_iter_advance(&tmp_from, iov_iter_advance(&tmp_from,
wdata->offset - iocb->ki_pos); wdata->offset - ctx->pos);
rc = cifs_write_from_iter(wdata->offset, rc = cifs_write_from_iter(wdata->offset,
wdata->bytes, &tmp_from, wdata->bytes, &tmp_from,
open_file, cifs_sb, &tmp_list); ctx->cfile, cifs_sb, &tmp_list,
ctx);
list_splice(&tmp_list, &wdata_list); list_splice(&tmp_list, &ctx->list);
kref_put(&wdata->refcount, kref_put(&wdata->refcount,
cifs_uncached_writedata_release); cifs_uncached_writedata_release);
...@@ -2705,12 +2692,111 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) ...@@ -2705,12 +2692,111 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
kref_put(&wdata->refcount, cifs_uncached_writedata_release); kref_put(&wdata->refcount, cifs_uncached_writedata_release);
} }
for (i = 0; i < ctx->npages; i++)
put_page(ctx->bv[i].bv_page);
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
ctx->rc = (rc == 0) ? ctx->total_len : rc;
mutex_unlock(&ctx->aio_mutex);
if (ctx->iocb && ctx->iocb->ki_complete)
ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
else
complete(&ctx->done);
}
ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
ssize_t total_written = 0;
struct cifsFileInfo *cfile;
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
struct cifs_aio_ctx *ctx;
struct iov_iter saved_from = *from;
int rc;
/*
* BB - optimize the way when signing is disabled. We can drop this
* extra memory-to-memory copying and use iovec buffers for constructing
* write request.
*/
rc = generic_write_checks(iocb, from);
if (rc <= 0)
return rc;
cifs_sb = CIFS_FILE_SB(file);
cfile = file->private_data;
tcon = tlink_tcon(cfile->tlink);
if (!tcon->ses->server->ops->async_writev)
return -ENOSYS;
ctx = cifs_aio_ctx_alloc();
if (!ctx)
return -ENOMEM;
ctx->cfile = cifsFileInfo_get(cfile);
if (!is_sync_kiocb(iocb))
ctx->iocb = iocb;
ctx->pos = iocb->ki_pos;
rc = setup_aio_ctx_iter(ctx, from, WRITE);
if (rc) {
kref_put(&ctx->refcount, cifs_aio_ctx_release);
return rc;
}
/* grab a lock here due to read response handlers can access ctx */
mutex_lock(&ctx->aio_mutex);
rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
cfile, cifs_sb, &ctx->list, ctx);
/*
* If at least one write was successfully sent, then discard any rc
* value from the later writes. If the other write succeeds, then
* we'll end up returning whatever was written. If it fails, then
* we'll get a new rc value from that.
*/
if (!list_empty(&ctx->list))
rc = 0;
mutex_unlock(&ctx->aio_mutex);
if (rc) {
kref_put(&ctx->refcount, cifs_aio_ctx_release);
return rc;
}
if (!is_sync_kiocb(iocb)) {
kref_put(&ctx->refcount, cifs_aio_ctx_release);
return -EIOCBQUEUED;
}
rc = wait_for_completion_killable(&ctx->done);
if (rc) {
mutex_lock(&ctx->aio_mutex);
ctx->rc = rc = -EINTR;
total_written = ctx->total_len;
mutex_unlock(&ctx->aio_mutex);
} else {
rc = ctx->rc;
total_written = ctx->total_len;
}
kref_put(&ctx->refcount, cifs_aio_ctx_release);
if (unlikely(!total_written)) if (unlikely(!total_written))
return rc; return rc;
iocb->ki_pos += total_written; iocb->ki_pos += total_written;
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
cifs_stats_bytes_written(tcon, total_written);
return total_written; return total_written;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment