Commit ee4cdf7b authored by David Howells's avatar David Howells Committed by Christian Brauner

netfs: Speed up buffered reading

Improve the efficiency of buffered reads in a number of ways:

 (1) Overhaul the algorithm in general so that it's a lot more compact and
     split the read submission code between buffered and unbuffered
     versions.  The unbuffered version can be vastly simplified.

 (2) Read-result collection is handed off to a work queue rather than being
     done in the I/O thread.  Multiple subrequests can be processes
     simultaneously.

 (3) When a subrequest is collected, any folios it fully spans are
     collected and "spare" data on either side is donated to either the
     previous or the next subrequest in the sequence.

Notes:

 (*) Readahead expansion is massively slows down fio, presumably because it
     causes a load of extra allocations, both folio and xarray, up front
     before RPC requests can be transmitted.

 (*) RDMA with cifs does appear to work, both with SIW and RXE.

 (*) PG_private_2-based reading and copy-to-cache is split out into its own
     file and altered to use folio_queue.  Note that the copy to the cache
     now creates a new write transaction against the cache and adds the
     folios to be copied into it.  This allows it to use part of the
     writeback I/O code.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 2e45b922
...@@ -68,17 +68,22 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -68,17 +68,22 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
{ {
struct netfs_io_request *rreq = subreq->rreq; struct netfs_io_request *rreq = subreq->rreq;
struct p9_fid *fid = rreq->netfs_priv; struct p9_fid *fid = rreq->netfs_priv;
unsigned long long pos = subreq->start + subreq->transferred;
int total, err; int total, err;
total = p9_client_read(fid, subreq->start + subreq->transferred, total = p9_client_read(fid, pos, &subreq->io_iter, &err);
&subreq->io_iter, &err);
/* if we just extended the file size, any portion not in /* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */ * cache won't be on server and is zeroes */
if (subreq->rreq->origin != NETFS_DIO_READ) if (subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
netfs_subreq_terminated(subreq, err ?: total, false); if (!err)
subreq->transferred += total;
netfs_read_subreq_terminated(subreq, err, false);
} }
/** /**
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/netfs.h> #include <linux/netfs.h>
#include <trace/events/netfs.h>
#include "internal.h" #include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
...@@ -242,9 +243,10 @@ static void afs_fetch_data_notify(struct afs_operation *op) ...@@ -242,9 +243,10 @@ static void afs_fetch_data_notify(struct afs_operation *op)
req->error = error; req->error = error;
if (subreq) { if (subreq) {
if (subreq->rreq->origin != NETFS_DIO_READ) subreq->rreq->i_size = req->file_size;
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (req->pos + req->actual_len >= req->file_size)
netfs_subreq_terminated(subreq, error ?: req->actual_len, false); __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
netfs_read_subreq_terminated(subreq, error, false);
req->subreq = NULL; req->subreq = NULL;
} else if (req->done) { } else if (req->done) {
req->done(req); req->done(req);
...@@ -262,6 +264,12 @@ static void afs_fetch_data_success(struct afs_operation *op) ...@@ -262,6 +264,12 @@ static void afs_fetch_data_success(struct afs_operation *op)
afs_fetch_data_notify(op); afs_fetch_data_notify(op);
} }
static void afs_fetch_data_aborted(struct afs_operation *op)
{
afs_check_for_remote_deletion(op);
afs_fetch_data_notify(op);
}
static void afs_fetch_data_put(struct afs_operation *op) static void afs_fetch_data_put(struct afs_operation *op)
{ {
op->fetch.req->error = afs_op_error(op); op->fetch.req->error = afs_op_error(op);
...@@ -272,7 +280,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = { ...@@ -272,7 +280,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = {
.issue_afs_rpc = afs_fs_fetch_data, .issue_afs_rpc = afs_fs_fetch_data,
.issue_yfs_rpc = yfs_fs_fetch_data, .issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success, .success = afs_fetch_data_success,
.aborted = afs_check_for_remote_deletion, .aborted = afs_fetch_data_aborted,
.failed = afs_fetch_data_notify, .failed = afs_fetch_data_notify,
.put = afs_fetch_data_put, .put = afs_fetch_data_put,
}; };
...@@ -294,7 +302,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req) ...@@ -294,7 +302,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
op = afs_alloc_operation(req->key, vnode->volume); op = afs_alloc_operation(req->key, vnode->volume);
if (IS_ERR(op)) { if (IS_ERR(op)) {
if (req->subreq) if (req->subreq)
netfs_subreq_terminated(req->subreq, PTR_ERR(op), false); netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false);
return PTR_ERR(op); return PTR_ERR(op);
} }
...@@ -313,7 +321,7 @@ static void afs_read_worker(struct work_struct *work) ...@@ -313,7 +321,7 @@ static void afs_read_worker(struct work_struct *work)
fsreq = afs_alloc_read(GFP_NOFS); fsreq = afs_alloc_read(GFP_NOFS);
if (!fsreq) if (!fsreq)
return netfs_subreq_terminated(subreq, -ENOMEM, false); return netfs_read_subreq_terminated(subreq, -ENOMEM, false);
fsreq->subreq = subreq; fsreq->subreq = subreq;
fsreq->pos = subreq->start + subreq->transferred; fsreq->pos = subreq->start + subreq->transferred;
...@@ -322,6 +330,7 @@ static void afs_read_worker(struct work_struct *work) ...@@ -322,6 +330,7 @@ static void afs_read_worker(struct work_struct *work)
fsreq->vnode = vnode; fsreq->vnode = vnode;
fsreq->iter = &subreq->io_iter; fsreq->iter = &subreq->io_iter;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
afs_fetch_data(fsreq->vnode, fsreq); afs_fetch_data(fsreq->vnode, fsreq);
afs_put_read(fsreq); afs_put_read(fsreq);
} }
......
...@@ -304,6 +304,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) ...@@ -304,6 +304,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
struct afs_vnode_param *vp = &op->file[0]; struct afs_vnode_param *vp = &op->file[0];
struct afs_read *req = op->fetch.req; struct afs_read *req = op->fetch.req;
const __be32 *bp; const __be32 *bp;
size_t count_before;
int ret; int ret;
_enter("{%u,%zu,%zu/%llu}", _enter("{%u,%zu,%zu/%llu}",
...@@ -345,10 +346,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) ...@@ -345,10 +346,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
/* extract the returned data */ /* extract the returned data */
case 2: case 2:
_debug("extract data %zu/%llu", count_before = call->iov_len;
iov_iter_count(call->iter), req->actual_len); _debug("extract data %zu/%llu", count_before, req->actual_len);
ret = afs_extract_data(call, true); ret = afs_extract_data(call, true);
if (req->subreq) {
req->subreq->transferred += count_before - call->iov_len;
netfs_read_subreq_progress(req->subreq, false);
}
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -355,6 +355,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) ...@@ -355,6 +355,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
struct afs_vnode_param *vp = &op->file[0]; struct afs_vnode_param *vp = &op->file[0];
struct afs_read *req = op->fetch.req; struct afs_read *req = op->fetch.req;
const __be32 *bp; const __be32 *bp;
size_t count_before;
int ret; int ret;
_enter("{%u,%zu, %zu/%llu}", _enter("{%u,%zu, %zu/%llu}",
...@@ -391,10 +392,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) ...@@ -391,10 +392,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
/* extract the returned data */ /* extract the returned data */
case 2: case 2:
_debug("extract data %zu/%llu", count_before = call->iov_len;
iov_iter_count(call->iter), req->actual_len); _debug("extract data %zu/%llu", count_before, req->actual_len);
ret = afs_extract_data(call, true); ret = afs_extract_data(call, true);
if (req->subreq) {
req->subreq->transferred += count_before - call->iov_len;
netfs_read_subreq_progress(req->subreq, false);
}
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/netfs.h> #include <linux/netfs.h>
#include <trace/events/netfs.h>
#include "super.h" #include "super.h"
#include "mds_client.h" #include "mds_client.h"
...@@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) ...@@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
} }
} }
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
{
struct inode *inode = subreq->rreq->inode;
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 objno, objoff;
u32 xlen;
/* Truncate the extent at the end of the current block */
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
&objno, &objoff, &xlen);
subreq->len = min(xlen, fsc->mount_options->rsize);
return true;
}
static void finish_netfs_read(struct ceph_osd_request *req) static void finish_netfs_read(struct ceph_osd_request *req)
{ {
struct inode *inode = req->r_inode; struct inode *inode = req->r_inode;
...@@ -264,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req) ...@@ -264,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req)
calc_pages_for(osd_data->alignment, calc_pages_for(osd_data->alignment,
osd_data->length), false); osd_data->length), false);
} }
netfs_subreq_terminated(subreq, err, false); if (err > 0) {
subreq->transferred = err;
err = 0;
}
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
netfs_read_subreq_terminated(subreq, err, false);
iput(req->r_inode); iput(req->r_inode);
ceph_dec_osd_stopping_blocker(fsc->mdsc); ceph_dec_osd_stopping_blocker(fsc->mdsc);
} }
...@@ -278,7 +269,6 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) ...@@ -278,7 +269,6 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct iov_iter iter;
ssize_t err = 0; ssize_t err = 0;
size_t len; size_t len;
int mode; int mode;
...@@ -301,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) ...@@ -301,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
req->r_num_caps = 2; req->r_num_caps = 2;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
err = ceph_mdsc_do_request(mdsc, NULL, req); err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) if (err < 0)
goto out; goto out;
...@@ -314,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) ...@@ -314,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
} }
len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter);
err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter); if (err == 0) {
if (err == 0)
err = -EFAULT; err = -EFAULT;
} else {
subreq->transferred += err;
err = 0;
}
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
out: out:
netfs_subreq_terminated(subreq, err, false); netfs_read_subreq_terminated(subreq, err, false);
return true; return true;
} }
static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
struct inode *inode = rreq->inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
u64 objno, objoff;
u32 xlen;
/* Truncate the extent at the end of the current block */
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
&objno, &objoff, &xlen);
rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize);
return 0;
}
static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
{ {
struct netfs_io_request *rreq = subreq->rreq; struct netfs_io_request *rreq = subreq->rreq;
...@@ -334,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -334,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct ceph_client *cl = fsc->client; struct ceph_client *cl = fsc->client;
struct ceph_osd_request *req = NULL; struct ceph_osd_request *req = NULL;
struct ceph_vino vino = ceph_vino(inode); struct ceph_vino vino = ceph_vino(inode);
struct iov_iter iter; int err;
int err = 0; u64 len;
u64 len = subreq->len;
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 off = subreq->start; u64 off = subreq->start;
int extent_cnt; int extent_cnt;
...@@ -349,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -349,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return; return;
// TODO: This rounding here is slightly dodgy. It *should* work, for
// now, as the cache only deals in blocks that are a multiple of
// PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to
// happen is for the fscrypt driving to be moved into netfslib and the
// data in the cache also to be stored encrypted.
len = subreq->len;
ceph_fscrypt_adjust_off_and_len(inode, &off, &len); ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
...@@ -371,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -371,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n", doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
ceph_vinop(inode), subreq->start, subreq->len, len); ceph_vinop(inode), subreq->start, subreq->len, len);
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
/* /*
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
* encrypted inodes. We'd need infrastructure that handles an iov_iter * encrypted inodes. We'd need infrastructure that handles an iov_iter
...@@ -384,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -384,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct page **pages; struct page **pages;
size_t page_off; size_t page_off;
err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off);
if (err < 0) { if (err < 0) {
doutc(cl, "%llx.%llx failed to allocate pages, %d\n", doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
ceph_vinop(inode), err); ceph_vinop(inode), err);
...@@ -399,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -399,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
false); false);
} else { } else {
osd_req_op_extent_osd_iter(req, 0, &iter); osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter);
} }
if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
err = -EIO; err = -EIO;
...@@ -410,17 +423,19 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ...@@ -410,17 +423,19 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
req->r_inode = inode; req->r_inode = inode;
ihold(inode); ihold(inode);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
ceph_osdc_start_request(req->r_osdc, req); ceph_osdc_start_request(req->r_osdc, req);
out: out:
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
if (err) if (err)
netfs_subreq_terminated(subreq, err, false); netfs_read_subreq_terminated(subreq, err, false);
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err); doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
} }
static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
{ {
struct inode *inode = rreq->inode; struct inode *inode = rreq->inode;
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_client *cl = ceph_inode_to_client(inode);
int got = 0, want = CEPH_CAP_FILE_CACHE; int got = 0, want = CEPH_CAP_FILE_CACHE;
struct ceph_netfs_request_data *priv; struct ceph_netfs_request_data *priv;
...@@ -472,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) ...@@ -472,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
priv->caps = got; priv->caps = got;
rreq->netfs_priv = priv; rreq->netfs_priv = priv;
rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize;
out: out:
if (ret < 0) if (ret < 0)
...@@ -496,9 +512,9 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq) ...@@ -496,9 +512,9 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
const struct netfs_request_ops ceph_netfs_ops = { const struct netfs_request_ops ceph_netfs_ops = {
.init_request = ceph_init_request, .init_request = ceph_init_request,
.free_request = ceph_netfs_free_request, .free_request = ceph_netfs_free_request,
.prepare_read = ceph_netfs_prepare_read,
.issue_read = ceph_netfs_issue_read, .issue_read = ceph_netfs_issue_read,
.expand_readahead = ceph_netfs_expand_readahead, .expand_readahead = ceph_netfs_expand_readahead,
.clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin, .check_write_begin = ceph_netfs_check_write_begin,
}; };
......
...@@ -5,12 +5,14 @@ netfs-y := \ ...@@ -5,12 +5,14 @@ netfs-y := \
buffered_write.o \ buffered_write.o \
direct_read.o \ direct_read.o \
direct_write.o \ direct_write.o \
io.o \
iterator.o \ iterator.o \
locking.o \ locking.o \
main.o \ main.o \
misc.o \ misc.o \
objects.o \ objects.o \
read_collect.o \
read_pgpriv2.o \
read_retry.o \
write_collect.o \ write_collect.o \
write_issue.o write_issue.o
......
This diff is collapsed.
...@@ -16,6 +16,143 @@ ...@@ -16,6 +16,143 @@
#include <linux/netfs.h> #include <linux/netfs.h>
#include "internal.h" #include "internal.h"
static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
size_t rsize;
rsize = umin(subreq->len, rreq->io_streams[0].sreq_max_len);
subreq->len = rsize;
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
rreq->io_streams[0].sreq_max_segs);
if (limit < rsize) {
subreq->len = limit;
trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
}
}
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
subreq->io_iter = rreq->iter;
iov_iter_truncate(&subreq->io_iter, subreq->len);
iov_iter_advance(&rreq->iter, subreq->len);
}
/*
* Perform a read to a buffer from the server, slicing up the region to be read
* according to the network rsize.
*/
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
{
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
atomic_set(&rreq->nr_outstanding, 1);
do {
struct netfs_io_subrequest *subreq;
ssize_t slice;
subreq = netfs_alloc_subrequest(rreq);
if (!subreq) {
ret = -ENOMEM;
break;
}
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start;
subreq->len = size;
atomic_inc(&rreq->nr_outstanding);
spin_lock_bh(&rreq->lock);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
subreq->prev_donated = rreq->prev_donated;
rreq->prev_donated = 0;
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
spin_unlock_bh(&rreq->lock);
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
atomic_dec(&rreq->nr_outstanding);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
break;
}
}
netfs_prepare_dio_read_iterator(subreq);
slice = subreq->len;
rreq->netfs_ops->issue_read(subreq);
size -= slice;
start += slice;
rreq->submitted += slice;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
cond_resched();
} while (size > 0);
if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_terminated(rreq, false);
return ret;
}
/*
* Perform a read to an application buffer, bypassing the pagecache and the
* local disk cache.
*/
static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
{
int ret;
_enter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
return -EIO;
}
// TODO: Use bounce buffer if requested
inode_dio_begin(rreq->inode);
ret = netfs_dispatch_unbuffered_reads(rreq);
if (!rreq->submitted) {
netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
inode_dio_end(rreq->inode);
ret = 0;
goto out;
}
if (sync) {
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
TASK_UNINTERRUPTIBLE);
ret = rreq->error;
if (ret == 0 && rreq->submitted < rreq->len &&
rreq->origin != NETFS_DIO_READ) {
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
ret = -EIO;
}
} else {
ret = -EIOCBQUEUED;
}
out:
_leave(" = %d", ret);
return ret;
}
/** /**
* netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
* @iocb: The I/O control descriptor describing the read * @iocb: The I/O control descriptor describing the read
...@@ -31,7 +168,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i ...@@ -31,7 +168,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
struct netfs_io_request *rreq; struct netfs_io_request *rreq;
ssize_t ret; ssize_t ret;
size_t orig_count = iov_iter_count(iter); size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb); bool sync = is_sync_kiocb(iocb);
_enter(""); _enter("");
...@@ -78,13 +215,13 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i ...@@ -78,13 +215,13 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
// TODO: Set up bounce buffer if needed // TODO: Set up bounce buffer if needed
if (async) if (!sync)
rreq->iocb = iocb; rreq->iocb = iocb;
ret = netfs_begin_read(rreq, is_sync_kiocb(iocb)); ret = netfs_unbuffered_read(rreq, sync);
if (ret < 0) if (ret < 0)
goto out; /* May be -EIOCBQUEUED */ goto out; /* May be -EIOCBQUEUED */
if (!async) { if (sync) {
// TODO: Copy from bounce buffer // TODO: Copy from bounce buffer
iocb->ki_pos += rreq->transferred; iocb->ki_pos += rreq->transferred;
ret = rreq->transferred; ret = rreq->transferred;
...@@ -94,8 +231,6 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i ...@@ -94,8 +231,6 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
netfs_put_request(rreq, false, netfs_rreq_trace_put_return); netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
if (ret > 0) if (ret > 0)
orig_count -= ret; orig_count -= ret;
if (ret != -EIOCBQUEUED)
iov_iter_revert(iter, orig_count - iov_iter_count(iter));
return ret; return ret;
} }
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked); EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
......
...@@ -23,16 +23,9 @@ ...@@ -23,16 +23,9 @@
/* /*
* buffered_read.c * buffered_read.c
*/ */
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq);
int netfs_prefetch_for_write(struct file *file, struct folio *folio, int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len); size_t offset, size_t len);
/*
* io.c
*/
void netfs_rreq_work(struct work_struct *work);
int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/* /*
* main.c * main.c
*/ */
...@@ -90,6 +83,28 @@ static inline void netfs_see_request(struct netfs_io_request *rreq, ...@@ -90,6 +83,28 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what); trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what);
} }
/*
* read_collect.c
*/
void netfs_read_termination_worker(struct work_struct *work);
void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async);
/*
* read_pgpriv2.c
*/
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq,
struct folio_queue *folioq,
int slot);
void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq);
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq);
/*
* read_retry.c
*/
void netfs_retry_reads(struct netfs_io_request *rreq);
void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq);
/* /*
* stats.c * stats.c
*/ */
...@@ -117,6 +132,7 @@ extern atomic_t netfs_n_wh_buffered_write; ...@@ -117,6 +132,7 @@ extern atomic_t netfs_n_wh_buffered_write;
extern atomic_t netfs_n_wh_writethrough; extern atomic_t netfs_n_wh_writethrough;
extern atomic_t netfs_n_wh_dio_write; extern atomic_t netfs_n_wh_dio_write;
extern atomic_t netfs_n_wh_writepages; extern atomic_t netfs_n_wh_writepages;
extern atomic_t netfs_n_wh_copy_to_cache;
extern atomic_t netfs_n_wh_wstream_conflict; extern atomic_t netfs_n_wh_wstream_conflict;
extern atomic_t netfs_n_wh_upload; extern atomic_t netfs_n_wh_upload;
extern atomic_t netfs_n_wh_upload_done; extern atomic_t netfs_n_wh_upload_done;
...@@ -162,6 +178,11 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, ...@@ -162,6 +178,11 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
void netfs_reissue_write(struct netfs_io_stream *stream, void netfs_reissue_write(struct netfs_io_stream *stream,
struct netfs_io_subrequest *subreq, struct netfs_io_subrequest *subreq,
struct iov_iter *source); struct iov_iter *source);
void netfs_issue_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream);
int netfs_advance_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream,
loff_t start, size_t len, bool to_eof);
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len); struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *folio, size_t copied, bool to_page_end, struct folio *folio, size_t copied, bool to_page_end,
......
...@@ -188,9 +188,59 @@ static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offse ...@@ -188,9 +188,59 @@ static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offse
return min(span, max_size); return min(span, max_size);
} }
/*
* Select the span of a folio queue iterator we're going to use. Limit it by
* both maximum size and maximum number of segments. Returns the size of the
* span in bytes.
*/
static size_t netfs_limit_folioq(const struct iov_iter *iter, size_t start_offset,
size_t max_size, size_t max_segs)
{
const struct folio_queue *folioq = iter->folioq;
unsigned int nsegs = 0;
unsigned int slot = iter->folioq_slot;
size_t span = 0, n = iter->count;
if (WARN_ON(!iov_iter_is_folioq(iter)) ||
WARN_ON(start_offset > n) ||
n == 0)
return 0;
max_size = umin(max_size, n - start_offset);
if (slot >= folioq_nr_slots(folioq)) {
folioq = folioq->next;
slot = 0;
}
start_offset += iter->iov_offset;
do {
size_t flen = folioq_folio_size(folioq, slot);
if (start_offset < flen) {
span += flen - start_offset;
nsegs++;
start_offset = 0;
} else {
start_offset -= flen;
}
if (span >= max_size || nsegs >= max_segs)
break;
slot++;
if (slot >= folioq_nr_slots(folioq)) {
folioq = folioq->next;
slot = 0;
}
} while (folioq);
return umin(span, max_size);
}
size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
size_t max_size, size_t max_segs) size_t max_size, size_t max_segs)
{ {
if (iov_iter_is_folioq(iter))
return netfs_limit_folioq(iter, start_offset, max_size, max_segs);
if (iov_iter_is_bvec(iter)) if (iov_iter_is_bvec(iter))
return netfs_limit_bvec(iter, start_offset, max_size, max_segs); return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
if (iov_iter_is_xarray(iter)) if (iov_iter_is_xarray(iter))
......
...@@ -36,12 +36,14 @@ DEFINE_SPINLOCK(netfs_proc_lock); ...@@ -36,12 +36,14 @@ DEFINE_SPINLOCK(netfs_proc_lock);
static const char *netfs_origins[nr__netfs_io_origin] = { static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READAHEAD] = "RA", [NETFS_READAHEAD] = "RA",
[NETFS_READPAGE] = "RP", [NETFS_READPAGE] = "RP",
[NETFS_READ_GAPS] = "RG",
[NETFS_READ_FOR_WRITE] = "RW", [NETFS_READ_FOR_WRITE] = "RW",
[NETFS_DIO_READ] = "DR", [NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB", [NETFS_WRITEBACK] = "WB",
[NETFS_WRITETHROUGH] = "WT", [NETFS_WRITETHROUGH] = "WT",
[NETFS_UNBUFFERED_WRITE] = "UW", [NETFS_UNBUFFERED_WRITE] = "UW",
[NETFS_DIO_WRITE] = "DW", [NETFS_DIO_WRITE] = "DW",
[NETFS_PGPRIV2_COPY_TO_CACHE] = "2C",
}; };
/* /*
...@@ -61,7 +63,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) ...@@ -61,7 +63,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
rreq = list_entry(v, struct netfs_io_request, proc_link); rreq = list_entry(v, struct netfs_io_request, proc_link);
seq_printf(m, seq_printf(m,
"%08x %s %3d %2lx %4d %3d @%04llx %llx/%llx", "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx",
rreq->debug_id, rreq->debug_id,
netfs_origins[rreq->origin], netfs_origins[rreq->origin],
refcount_read(&rreq->ref), refcount_read(&rreq->ref),
......
...@@ -36,7 +36,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, ...@@ -36,7 +36,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
memset(rreq, 0, kmem_cache_size(cache)); memset(rreq, 0, kmem_cache_size(cache));
rreq->start = start; rreq->start = start;
rreq->len = len; rreq->len = len;
rreq->upper_len = len;
rreq->origin = origin; rreq->origin = origin;
rreq->netfs_ops = ctx->ops; rreq->netfs_ops = ctx->ops;
rreq->mapping = mapping; rreq->mapping = mapping;
...@@ -44,6 +43,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, ...@@ -44,6 +43,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
rreq->i_size = i_size_read(inode); rreq->i_size = i_size_read(inode);
rreq->debug_id = atomic_inc_return(&debug_ids); rreq->debug_id = atomic_inc_return(&debug_ids);
rreq->wsize = INT_MAX; rreq->wsize = INT_MAX;
rreq->io_streams[0].sreq_max_len = ULONG_MAX;
rreq->io_streams[0].sreq_max_segs = 0;
spin_lock_init(&rreq->lock); spin_lock_init(&rreq->lock);
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
...@@ -52,9 +53,10 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, ...@@ -52,9 +53,10 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
if (origin == NETFS_READAHEAD || if (origin == NETFS_READAHEAD ||
origin == NETFS_READPAGE || origin == NETFS_READPAGE ||
origin == NETFS_READ_GAPS ||
origin == NETFS_READ_FOR_WRITE || origin == NETFS_READ_FOR_WRITE ||
origin == NETFS_DIO_READ) origin == NETFS_DIO_READ)
INIT_WORK(&rreq->work, netfs_rreq_work); INIT_WORK(&rreq->work, netfs_read_termination_worker);
else else
INIT_WORK(&rreq->work, netfs_write_collection_worker); INIT_WORK(&rreq->work, netfs_write_collection_worker);
...@@ -163,7 +165,7 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async, ...@@ -163,7 +165,7 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
if (was_async) { if (was_async) {
rreq->work.func = netfs_free_request; rreq->work.func = netfs_free_request;
if (!queue_work(system_unbound_wq, &rreq->work)) if (!queue_work(system_unbound_wq, &rreq->work))
BUG(); WARN_ON(1);
} else { } else {
netfs_free_request(&rreq->work); netfs_free_request(&rreq->work);
} }
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-only
/* Read with PG_private_2 [DEPRECATED].
*
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
/*
* [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2. The
* third mark in the folio queue is used to indicate that this folio needs
* writing.
*/
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq,
struct folio_queue *folioq,
int slot)
{
struct folio *folio = folioq_folio(folioq, slot);
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_start_private_2(folio);
folioq_mark3(folioq, slot);
}
/*
* [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an
* unrecoverable error.
*/
static void netfs_pgpriv2_cancel(struct folio_queue *folioq)
{
struct folio *folio;
int slot;
while (folioq) {
if (!folioq->marks3) {
folioq = folioq->next;
continue;
}
slot = __ffs(folioq->marks3);
folio = folioq_folio(folioq, slot);
trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
folio_end_private_2(folio);
folioq_unmark3(folioq, slot);
}
}
/*
* [DEPRECATED] Copy a folio to the cache with PG_private_2 set.
*/
static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio)
{
struct netfs_io_stream *cache = &wreq->io_streams[1];
size_t fsize = folio_size(folio), flen = fsize;
loff_t fpos = folio_pos(folio), i_size;
bool to_eof = false;
_enter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
* page since we have it locked.
*/
i_size = i_size_read(wreq->inode);
if (fpos >= i_size) {
/* mmap beyond eof. */
_debug("beyond eof");
folio_end_private_2(folio);
return 0;
}
if (fpos + fsize > wreq->i_size)
wreq->i_size = i_size;
if (flen > i_size - fpos) {
flen = i_size - fpos;
to_eof = true;
} else if (flen == i_size - fpos) {
to_eof = true;
}
_debug("folio %zx %zx", flen, fsize);
trace_netfs_folio(folio, netfs_folio_trace_store_copy);
/* Attach the folio to the rolling buffer. */
if (netfs_buffer_append_folio(wreq, folio, false) < 0)
return -ENOMEM;
cache->submit_max_len = fsize;
cache->submit_off = 0;
cache->submit_len = flen;
/* Attach the folio to one or more subrequests. For a big folio, we
* could end up with thousands of subrequests if the wsize is small -
* but we might need to wait during the creation of subrequests for
* network resources (eg. SMB credits).
*/
do {
ssize_t part;
wreq->io_iter.iov_offset = cache->submit_off;
atomic64_set(&wreq->issued_to, fpos + cache->submit_off);
part = netfs_advance_write(wreq, cache, fpos + cache->submit_off,
cache->submit_len, to_eof);
cache->submit_off += part;
cache->submit_max_len -= part;
if (part > cache->submit_len)
cache->submit_len = 0;
else
cache->submit_len -= part;
} while (cache->submit_len > 0);
wreq->io_iter.iov_offset = 0;
iov_iter_advance(&wreq->io_iter, fsize);
atomic64_set(&wreq->issued_to, fpos + fsize);
if (flen < fsize)
netfs_issue_write(wreq, cache);
_leave(" = 0");
return 0;
}
/*
* [DEPRECATED] Go through the buffer and write any folios that are marked with
* the third mark to the cache.
*/
void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
{
struct netfs_io_request *wreq;
struct folio_queue *folioq;
struct folio *folio;
int error = 0;
int slot = 0;
_enter("");
if (!fscache_resources_valid(&rreq->cache_resources))
goto couldnt_start;
/* Need the first folio to be able to set up the op. */
for (folioq = rreq->buffer; folioq; folioq = folioq->next) {
if (folioq->marks3) {
slot = __ffs(folioq->marks3);
break;
}
}
if (!folioq)
return;
folio = folioq_folio(folioq, slot);
wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
NETFS_PGPRIV2_COPY_TO_CACHE);
if (IS_ERR(wreq)) {
kleave(" [create %ld]", PTR_ERR(wreq));
goto couldnt_start;
}
trace_netfs_write(wreq, netfs_write_trace_copy_to_cache);
netfs_stat(&netfs_n_wh_copy_to_cache);
for (;;) {
error = netfs_pgpriv2_copy_folio(wreq, folio);
if (error < 0)
break;
folioq_unmark3(folioq, slot);
if (!folioq->marks3) {
folioq = folioq->next;
if (!folioq)
break;
}
slot = __ffs(folioq->marks3);
folio = folioq_folio(folioq, slot);
}
netfs_issue_write(wreq, &wreq->io_streams[1]);
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
_leave(" = %d", error);
couldnt_start:
netfs_pgpriv2_cancel(rreq->buffer);
}
/*
* [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished
* copying.
*/
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
{
struct folio_queue *folioq = wreq->buffer;
unsigned long long collected_to = wreq->collected_to;
unsigned int slot = wreq->buffer_head_slot;
bool made_progress = false;
if (slot >= folioq_nr_slots(folioq)) {
folioq = netfs_delete_buffer_head(wreq);
slot = 0;
}
for (;;) {
struct folio *folio;
unsigned long long fpos, fend;
size_t fsize, flen;
folio = folioq_folio(folioq, slot);
if (WARN_ONCE(!folio_test_private_2(folio),
"R=%08x: folio %lx is not marked private_2\n",
wreq->debug_id, folio->index))
trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
fpos = folio_pos(folio);
fsize = folio_size(folio);
flen = fsize;
fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
trace_netfs_collect_folio(wreq, folio, fend, collected_to);
/* Unlock any folio we've transferred all of. */
if (collected_to < fend)
break;
trace_netfs_folio(folio, netfs_folio_trace_end_copy);
folio_end_private_2(folio);
wreq->cleaned_to = fpos + fsize;
made_progress = true;
/* Clean up the head folioq. If we clear an entire folioq, then
* we can get rid of it provided it's not also the tail folioq
* being filled by the issuer.
*/
folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
if (READ_ONCE(wreq->buffer_tail) == folioq)
break;
folioq = netfs_delete_buffer_head(wreq);
slot = 0;
}
if (fpos + fsize >= collected_to)
break;
}
wreq->buffer = folioq;
wreq->buffer_head_slot = slot;
return made_progress;
}
// SPDX-License-Identifier: GPL-2.0-only
/* Network filesystem read subrequest retrying.
*
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/fs.h>
#include <linux/slab.h>
#include "internal.h"
static void netfs_reissue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
struct iov_iter *io_iter = &subreq->io_iter;
if (iov_iter_is_folioq(io_iter)) {
subreq->curr_folioq = (struct folio_queue *)io_iter->folioq;
subreq->curr_folioq_slot = io_iter->folioq_slot;
subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
}
atomic_inc(&rreq->nr_outstanding);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
subreq->rreq->netfs_ops->issue_read(subreq);
}
/*
* Go through the list of failed/short reads, retrying all retryable ones. We
* need to switch failed cache reads to network downloads.
*/
static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream0 = &rreq->io_streams[0];
LIST_HEAD(sublist);
LIST_HEAD(queue);
_enter("R=%x", rreq->debug_id);
if (list_empty(&rreq->subrequests))
return;
if (rreq->netfs_ops->retry_request)
rreq->netfs_ops->retry_request(rreq, NULL);
/* If there's no renegotiation to do, just resend each retryable subreq
* up to the first permanently failed one.
*/
if (!rreq->netfs_ops->prepare_read &&
!test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) {
struct netfs_io_subrequest *subreq;
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
netfs_reset_iter(subreq);
netfs_reissue_read(rreq, subreq);
}
}
return;
}
/* Okay, we need to renegotiate all the download requests and flip any
* failed cache reads over to being download requests and negotiate
* those also. All fully successful subreqs have been removed from the
* list and any spare data from those has been donated.
*
* What we do is decant the list and rebuild it one subreq at a time so
* that we don't end up with donations jumping over a gap we're busy
* populating with smaller subrequests. In the event that the subreq
* we just launched finishes before we insert the next subreq, it'll
* fill in rreq->prev_donated instead.
* Note: Alternatively, we could split the tail subrequest right before
* we reissue it and fix up the donations under lock.
*/
list_splice_init(&rreq->subrequests, &queue);
do {
struct netfs_io_subrequest *from;
struct iov_iter source;
unsigned long long start, len;
size_t part, deferred_next_donated = 0;
bool boundary = false;
/* Go through the subreqs and find the next span of contiguous
* buffer that we then rejig (cifs, for example, needs the
* rsize renegotiating) and reissue.
*/
from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link);
list_move_tail(&from->rreq_link, &sublist);
start = from->start + from->transferred;
len = from->len - from->transferred;
_debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx",
rreq->debug_id, from->debug_index,
from->start, from->consumed, from->transferred, from->len);
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
goto abandon;
deferred_next_donated = from->next_donated;
while ((subreq = list_first_entry_or_null(
&queue, struct netfs_io_subrequest, rreq_link))) {
if (subreq->start != start + len ||
subreq->transferred > 0 ||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
break;
list_move_tail(&subreq->rreq_link, &sublist);
len += subreq->len;
deferred_next_donated = subreq->next_donated;
if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags))
break;
}
_debug(" - range: %llx-%llx %llx", start, start + len - 1, len);
/* Determine the set of buffers we're going to use. Each
* subreq gets a subset of a single overall contiguous buffer.
*/
netfs_reset_iter(from);
source = from->io_iter;
source.count = len;
/* Work through the sublist. */
while ((subreq = list_first_entry_or_null(
&sublist, struct netfs_io_subrequest, rreq_link))) {
list_del(&subreq->rreq_link);
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start - subreq->transferred;
subreq->len = len + subreq->transferred;
stream0->sreq_max_len = subreq->len;
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
spin_lock_bh(&rreq->lock);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
subreq->prev_donated += rreq->prev_donated;
rreq->prev_donated = 0;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
spin_unlock_bh(&rreq->lock);
BUG_ON(!len);
/* Renegotiate max_len (rsize) */
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
}
part = umin(len, stream0->sreq_max_len);
if (unlikely(rreq->io_streams[0].sreq_max_segs))
part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs);
subreq->len = subreq->transferred + part;
subreq->io_iter = source;
iov_iter_truncate(&subreq->io_iter, part);
iov_iter_advance(&source, part);
len -= part;
start += part;
if (!len) {
if (boundary)
__set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
subreq->next_donated = deferred_next_donated;
} else {
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
subreq->next_donated = 0;
}
netfs_reissue_read(rreq, subreq);
if (!len)
break;
/* If we ran out of subrequests, allocate another. */
if (list_empty(&sublist)) {
subreq = netfs_alloc_subrequest(rreq);
if (!subreq)
goto abandon;
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start;
/* We get two refs, but need just one. */
netfs_put_subrequest(subreq, false, netfs_sreq_trace_new);
trace_netfs_sreq(subreq, netfs_sreq_trace_split);
list_add_tail(&subreq->rreq_link, &sublist);
}
}
/* If we managed to use fewer subreqs, we can discard the
* excess.
*/
while ((subreq = list_first_entry_or_null(
&sublist, struct netfs_io_subrequest, rreq_link))) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
}
} while (!list_empty(&queue));
return;
/* If we hit ENOMEM, fail all remaining subrequests */
abandon:
list_splice_init(&sublist, &queue);
list_for_each_entry(subreq, &queue, rreq_link) {
if (!subreq->error)
subreq->error = -ENOMEM;
__clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
__clear_bit(NETFS_SREQ_RETRYING, &subreq->flags);
}
spin_lock_bh(&rreq->lock);
list_splice_tail_init(&queue, &rreq->subrequests);
spin_unlock_bh(&rreq->lock);
}
/*
* Retry reads.
*/
void netfs_retry_reads(struct netfs_io_request *rreq)
{
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
atomic_inc(&rreq->nr_outstanding);
netfs_retry_read_subrequests(rreq);
if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_terminated(rreq, false);
}
/*
* Unlock any the pages that haven't been unlocked yet due to abandoned
* subrequests.
*/
void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
{
struct folio_queue *p;
for (p = rreq->buffer; p; p = p->next) {
for (int slot = 0; slot < folioq_count(p); slot++) {
struct folio *folio = folioq_folio(p, slot);
if (folio && !folioq_is_marked2(p, slot)) {
trace_netfs_folio(folio, netfs_folio_trace_abandon);
folio_unlock(folio);
}
}
}
}
...@@ -32,6 +32,7 @@ atomic_t netfs_n_wh_buffered_write; ...@@ -32,6 +32,7 @@ atomic_t netfs_n_wh_buffered_write;
atomic_t netfs_n_wh_writethrough; atomic_t netfs_n_wh_writethrough;
atomic_t netfs_n_wh_dio_write; atomic_t netfs_n_wh_dio_write;
atomic_t netfs_n_wh_writepages; atomic_t netfs_n_wh_writepages;
atomic_t netfs_n_wh_copy_to_cache;
atomic_t netfs_n_wh_wstream_conflict; atomic_t netfs_n_wh_wstream_conflict;
atomic_t netfs_n_wh_upload; atomic_t netfs_n_wh_upload;
atomic_t netfs_n_wh_upload_done; atomic_t netfs_n_wh_upload_done;
...@@ -51,11 +52,12 @@ int netfs_stats_show(struct seq_file *m, void *v) ...@@ -51,11 +52,12 @@ int netfs_stats_show(struct seq_file *m, void *v)
atomic_read(&netfs_n_rh_read_folio), atomic_read(&netfs_n_rh_read_folio),
atomic_read(&netfs_n_rh_write_begin), atomic_read(&netfs_n_rh_write_begin),
atomic_read(&netfs_n_rh_write_zskip)); atomic_read(&netfs_n_rh_write_zskip));
seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u\n", seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u 2C=%u\n",
atomic_read(&netfs_n_wh_buffered_write), atomic_read(&netfs_n_wh_buffered_write),
atomic_read(&netfs_n_wh_writethrough), atomic_read(&netfs_n_wh_writethrough),
atomic_read(&netfs_n_wh_dio_write), atomic_read(&netfs_n_wh_dio_write),
atomic_read(&netfs_n_wh_writepages)); atomic_read(&netfs_n_wh_writepages),
atomic_read(&netfs_n_wh_copy_to_cache));
seq_printf(m, "ZeroOps: ZR=%u sh=%u sk=%u\n", seq_printf(m, "ZeroOps: ZR=%u sh=%u sk=%u\n",
atomic_read(&netfs_n_rh_zero), atomic_read(&netfs_n_rh_zero),
atomic_read(&netfs_n_rh_short_read), atomic_read(&netfs_n_rh_short_read),
......
...@@ -87,6 +87,12 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq, ...@@ -87,6 +87,12 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
unsigned long long collected_to = wreq->collected_to; unsigned long long collected_to = wreq->collected_to;
unsigned int slot = wreq->buffer_head_slot; unsigned int slot = wreq->buffer_head_slot;
if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
if (netfs_pgpriv2_unlock_copied_folios(wreq))
*notes |= MADE_PROGRESS;
return;
}
if (slot >= folioq_nr_slots(folioq)) { if (slot >= folioq_nr_slots(folioq)) {
folioq = netfs_delete_buffer_head(wreq); folioq = netfs_delete_buffer_head(wreq);
slot = 0; slot = 0;
...@@ -383,7 +389,8 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) ...@@ -383,7 +389,8 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
smp_rmb(); smp_rmb();
collected_to = ULLONG_MAX; collected_to = ULLONG_MAX;
if (wreq->origin == NETFS_WRITEBACK || if (wreq->origin == NETFS_WRITEBACK ||
wreq->origin == NETFS_WRITETHROUGH) wreq->origin == NETFS_WRITETHROUGH ||
wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
notes = BUFFERED; notes = BUFFERED;
else else
notes = 0; notes = 0;
......
...@@ -95,7 +95,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, ...@@ -95,7 +95,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
struct netfs_io_request *wreq; struct netfs_io_request *wreq;
struct netfs_inode *ictx; struct netfs_inode *ictx;
bool is_buffered = (origin == NETFS_WRITEBACK || bool is_buffered = (origin == NETFS_WRITEBACK ||
origin == NETFS_WRITETHROUGH); origin == NETFS_WRITETHROUGH ||
origin == NETFS_PGPRIV2_COPY_TO_CACHE);
wreq = netfs_alloc_request(mapping, file, start, 0, origin); wreq = netfs_alloc_request(mapping, file, start, 0, origin);
if (IS_ERR(wreq)) if (IS_ERR(wreq))
...@@ -161,10 +162,6 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, ...@@ -161,10 +162,6 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
_enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
stream->sreq_max_len = UINT_MAX; stream->sreq_max_len = UINT_MAX;
...@@ -241,8 +238,8 @@ void netfs_reissue_write(struct netfs_io_stream *stream, ...@@ -241,8 +238,8 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
netfs_do_issue_write(stream, subreq); netfs_do_issue_write(stream, subreq);
} }
static void netfs_issue_write(struct netfs_io_request *wreq, void netfs_issue_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream) struct netfs_io_stream *stream)
{ {
struct netfs_io_subrequest *subreq = stream->construct; struct netfs_io_subrequest *subreq = stream->construct;
...@@ -259,9 +256,9 @@ static void netfs_issue_write(struct netfs_io_request *wreq, ...@@ -259,9 +256,9 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
* we can avoid overrunning the credits obtained (cifs) and try to parallelise * we can avoid overrunning the credits obtained (cifs) and try to parallelise
* content-crypto preparation with network writes. * content-crypto preparation with network writes.
*/ */
static int netfs_advance_write(struct netfs_io_request *wreq, int netfs_advance_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream, struct netfs_io_stream *stream,
loff_t start, size_t len, bool to_eof) loff_t start, size_t len, bool to_eof)
{ {
struct netfs_io_subrequest *subreq = stream->construct; struct netfs_io_subrequest *subreq = stream->construct;
size_t part; size_t part;
......
...@@ -267,6 +267,7 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi ...@@ -267,6 +267,7 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize;
return 0; return 0;
} }
...@@ -288,14 +289,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre ...@@ -288,14 +289,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre
return netfs; return netfs;
} }
static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq)
{
size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize;
sreq->len = min(sreq->len, rsize);
return true;
}
static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
{ {
struct nfs_netfs_io_data *netfs; struct nfs_netfs_io_data *netfs;
...@@ -304,17 +297,18 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) ...@@ -304,17 +297,18 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
struct nfs_open_context *ctx = sreq->rreq->netfs_priv; struct nfs_open_context *ctx = sreq->rreq->netfs_priv;
struct page *page; struct page *page;
unsigned long idx; unsigned long idx;
pgoff_t start, last;
int err; int err;
pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
pgoff_t last = ((sreq->start + sreq->len - start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
sreq->transferred - 1) >> PAGE_SHIFT); last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT);
nfs_pageio_init_read(&pgio, inode, false, nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops); &nfs_async_read_completion_ops);
netfs = nfs_netfs_alloc(sreq); netfs = nfs_netfs_alloc(sreq);
if (!netfs) if (!netfs)
return netfs_subreq_terminated(sreq, -ENOMEM, false); return netfs_read_subreq_terminated(sreq, -ENOMEM, false);
pgio.pg_netfs = netfs; /* used in completion */ pgio.pg_netfs = netfs; /* used in completion */
...@@ -380,5 +374,4 @@ const struct netfs_request_ops nfs_netfs_ops = { ...@@ -380,5 +374,4 @@ const struct netfs_request_ops nfs_netfs_ops = {
.init_request = nfs_netfs_init_request, .init_request = nfs_netfs_init_request,
.free_request = nfs_netfs_free_request, .free_request = nfs_netfs_free_request,
.issue_read = nfs_netfs_issue_read, .issue_read = nfs_netfs_issue_read,
.clamp_length = nfs_netfs_clamp_length
}; };
...@@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs) ...@@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs)
static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
{ {
ssize_t final_len;
/* Only the last RPC completion should call netfs_subreq_terminated() */ /* Only the last RPC completion should call netfs_subreq_terminated() */
if (!refcount_dec_and_test(&netfs->refcount)) if (!refcount_dec_and_test(&netfs->refcount))
return; return;
...@@ -74,8 +72,9 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) ...@@ -74,8 +72,9 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
* Correct the final length here to be no larger than the netfs subrequest * Correct the final length here to be no larger than the netfs subrequest
* length, and thus avoid netfs's "Subreq overread" warning message. * length, and thus avoid netfs's "Subreq overread" warning message.
*/ */
final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred)); netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false); atomic64_read(&netfs->transferred));
netfs_read_subreq_terminated(netfs->sreq, netfs->error, false);
kfree(netfs); kfree(netfs);
} }
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
......
...@@ -1485,7 +1485,6 @@ struct cifs_io_subrequest { ...@@ -1485,7 +1485,6 @@ struct cifs_io_subrequest {
struct cifs_io_request *req; struct cifs_io_request *req;
}; };
ssize_t got_bytes; ssize_t got_bytes;
size_t actual_len;
unsigned int xid; unsigned int xid;
int result; int result;
bool have_xid; bool have_xid;
......
...@@ -1309,10 +1309,8 @@ cifs_readv_callback(struct mid_q_entry *mid) ...@@ -1309,10 +1309,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
if (rdata->result == 0 || rdata->result == -EAGAIN) if (rdata->result == 0 || rdata->result == -EAGAIN)
iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes); iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes);
rdata->credits.value = 0; rdata->credits.value = 0;
netfs_subreq_terminated(&rdata->subreq, rdata->subreq.transferred += rdata->got_bytes;
(rdata->result == 0 || rdata->result == -EAGAIN) ? netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false);
rdata->got_bytes : rdata->result,
false);
release_mid(mid); release_mid(mid);
add_credits(server, &credits, 0); add_credits(server, &credits, 0);
} }
......
...@@ -112,7 +112,6 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq) ...@@ -112,7 +112,6 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq)
goto fail; goto fail;
} }
wdata->actual_len = wdata->subreq.len;
rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust); rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust);
if (rc) if (rc)
goto fail; goto fail;
...@@ -141,25 +140,22 @@ static void cifs_netfs_invalidate_cache(struct netfs_io_request *wreq) ...@@ -141,25 +140,22 @@ static void cifs_netfs_invalidate_cache(struct netfs_io_request *wreq)
} }
/* /*
* Split the read up according to how many credits we can get for each piece. * Negotiate the size of a read operation on behalf of the netfs library.
* It's okay to sleep here if we need to wait for more credit to become
* available.
*
* We also choose the server and allocate an operation ID to be cleaned up
* later.
*/ */
static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) static int cifs_prepare_read(struct netfs_io_subrequest *subreq)
{ {
struct netfs_io_request *rreq = subreq->rreq; struct netfs_io_request *rreq = subreq->rreq;
struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr];
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
struct TCP_Server_Info *server = req->server; struct TCP_Server_Info *server = req->server;
struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
int rc; size_t size;
int rc = 0;
rdata->xid = get_xid(); if (!rdata->have_xid) {
rdata->have_xid = true; rdata->xid = get_xid();
rdata->have_xid = true;
}
rdata->server = server; rdata->server = server;
if (cifs_sb->ctx->rsize == 0) if (cifs_sb->ctx->rsize == 0)
...@@ -167,13 +163,12 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) ...@@ -167,13 +163,12 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink),
cifs_sb->ctx); cifs_sb->ctx);
rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
&stream->sreq_max_len, &rdata->credits); &size, &rdata->credits);
if (rc) { if (rc)
subreq->error = rc; return rc;
return false;
} rreq->io_streams[0].sreq_max_len = size;
rdata->credits.in_flight_check = 1; rdata->credits.in_flight_check = 1;
rdata->credits.rreq_debug_id = rreq->debug_id; rdata->credits.rreq_debug_id = rreq->debug_id;
...@@ -185,14 +180,11 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) ...@@ -185,14 +180,11 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
server->credits, server->in_flight, 0, server->credits, server->in_flight, 0,
cifs_trace_rw_credits_read_submit); cifs_trace_rw_credits_read_submit);
subreq->len = umin(subreq->len, stream->sreq_max_len);
rdata->actual_len = subreq->len;
#ifdef CONFIG_CIFS_SMB_DIRECT #ifdef CONFIG_CIFS_SMB_DIRECT
if (server->smbd_conn) if (server->smbd_conn)
stream->sreq_max_segs = server->smbd_conn->max_frmr_depth; rreq->io_streams[0].sreq_max_segs = server->smbd_conn->max_frmr_depth;
#endif #endif
return true; return 0;
} }
/* /*
...@@ -201,59 +193,41 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq) ...@@ -201,59 +193,41 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
* to only read a portion of that, but as long as we read something, the netfs * to only read a portion of that, but as long as we read something, the netfs
* helper will call us again so that we can issue another read. * helper will call us again so that we can issue another read.
*/ */
static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) static void cifs_issue_read(struct netfs_io_subrequest *subreq)
{ {
struct netfs_io_request *rreq = subreq->rreq; struct netfs_io_request *rreq = subreq->rreq;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
struct TCP_Server_Info *server = req->server; struct TCP_Server_Info *server = req->server;
struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
int rc = 0; int rc = 0;
cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
__func__, rreq->debug_id, subreq->debug_index, rreq->mapping, __func__, rreq->debug_id, subreq->debug_index, rreq->mapping,
subreq->transferred, subreq->len); subreq->transferred, subreq->len);
if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) { rc = adjust_credits(server, rdata, cifs_trace_rw_credits_issue_read_adjust);
/* if (rc)
* As we're issuing a retry, we need to negotiate some new goto failed;
* credits otherwise the server may reject the op with
* INVALID_PARAMETER. Note, however, we may get back less
* credit than we need to complete the op, in which case, we
* shorten the op and rely on additional rounds of retry.
*/
size_t rsize = umin(subreq->len - subreq->transferred,
cifs_sb->ctx->rsize);
rc = server->ops->wait_mtu_credits(server, rsize, &rdata->actual_len,
&rdata->credits);
if (rc)
goto out;
rdata->credits.in_flight_check = 1;
trace_smb3_rw_credits(rdata->rreq->debug_id,
rdata->subreq.debug_index,
rdata->credits.value,
server->credits, server->in_flight, 0,
cifs_trace_rw_credits_read_resubmit);
}
if (req->cfile->invalidHandle) { if (req->cfile->invalidHandle) {
do { do {
rc = cifs_reopen_file(req->cfile, true); rc = cifs_reopen_file(req->cfile, true);
} while (rc == -EAGAIN); } while (rc == -EAGAIN);
if (rc) if (rc)
goto out; goto failed;
} }
if (subreq->rreq->origin != NETFS_DIO_READ) if (subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
rc = rdata->server->ops->async_readv(rdata); rc = rdata->server->ops->async_readv(rdata);
out:
if (rc) if (rc)
netfs_subreq_terminated(subreq, rc, false); goto failed;
return;
failed:
netfs_read_subreq_terminated(subreq, rc, false);
} }
/* /*
...@@ -364,8 +338,8 @@ const struct netfs_request_ops cifs_req_ops = { ...@@ -364,8 +338,8 @@ const struct netfs_request_ops cifs_req_ops = {
.init_request = cifs_init_request, .init_request = cifs_init_request,
.free_request = cifs_free_request, .free_request = cifs_free_request,
.free_subrequest = cifs_free_subrequest, .free_subrequest = cifs_free_subrequest,
.clamp_length = cifs_clamp_length, .prepare_read = cifs_prepare_read,
.issue_read = cifs_req_issue_read, .issue_read = cifs_issue_read,
.done = cifs_rreq_done, .done = cifs_rreq_done,
.begin_writeback = cifs_begin_writeback, .begin_writeback = cifs_begin_writeback,
.prepare_write = cifs_prepare_write, .prepare_write = cifs_prepare_write,
......
...@@ -301,7 +301,8 @@ smb2_adjust_credits(struct TCP_Server_Info *server, ...@@ -301,7 +301,8 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
unsigned int /*enum smb3_rw_credits_trace*/ trace) unsigned int /*enum smb3_rw_credits_trace*/ trace)
{ {
struct cifs_credits *credits = &subreq->credits; struct cifs_credits *credits = &subreq->credits;
int new_val = DIV_ROUND_UP(subreq->actual_len, SMB2_MAX_BUFFER_SIZE); int new_val = DIV_ROUND_UP(subreq->subreq.len - subreq->subreq.transferred,
SMB2_MAX_BUFFER_SIZE);
int scredits, in_flight; int scredits, in_flight;
if (!credits->value || credits->value == new_val) if (!credits->value || credits->value == new_val)
......
...@@ -4498,9 +4498,7 @@ static void smb2_readv_worker(struct work_struct *work) ...@@ -4498,9 +4498,7 @@ static void smb2_readv_worker(struct work_struct *work)
struct cifs_io_subrequest *rdata = struct cifs_io_subrequest *rdata =
container_of(work, struct cifs_io_subrequest, subreq.work); container_of(work, struct cifs_io_subrequest, subreq.work);
netfs_subreq_terminated(&rdata->subreq, netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false);
(rdata->result == 0 || rdata->result == -EAGAIN) ?
rdata->got_bytes : rdata->result, true);
} }
static void static void
...@@ -4532,7 +4530,7 @@ smb2_readv_callback(struct mid_q_entry *mid) ...@@ -4532,7 +4530,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu/%zu\n", cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu/%zu\n",
__func__, mid->mid, mid->mid_state, rdata->result, __func__, mid->mid, mid->mid_state, rdata->result,
rdata->actual_len, rdata->subreq.len - rdata->subreq.transferred); rdata->got_bytes, rdata->subreq.len - rdata->subreq.transferred);
switch (mid->mid_state) { switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED: case MID_RESPONSE_RECEIVED:
...@@ -4554,6 +4552,7 @@ smb2_readv_callback(struct mid_q_entry *mid) ...@@ -4554,6 +4552,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
break; break;
case MID_REQUEST_SUBMITTED: case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED: case MID_RETRY_NEEDED:
__set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN; rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes) if (server->sign && rdata->got_bytes)
/* reset bytes number since we can not check a sign */ /* reset bytes number since we can not check a sign */
...@@ -4588,7 +4587,7 @@ smb2_readv_callback(struct mid_q_entry *mid) ...@@ -4588,7 +4587,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
rdata->req->cfile->fid.persistent_fid, rdata->req->cfile->fid.persistent_fid,
tcon->tid, tcon->ses->Suid, tcon->tid, tcon->ses->Suid,
rdata->subreq.start + rdata->subreq.transferred, rdata->subreq.start + rdata->subreq.transferred,
rdata->actual_len, rdata->subreq.len - rdata->subreq.transferred,
rdata->result); rdata->result);
} else } else
trace_smb3_read_done(rdata->rreq->debug_id, trace_smb3_read_done(rdata->rreq->debug_id,
...@@ -4603,9 +4602,9 @@ smb2_readv_callback(struct mid_q_entry *mid) ...@@ -4603,9 +4602,9 @@ smb2_readv_callback(struct mid_q_entry *mid)
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0; rdata->result = 0;
} else { } else {
if (rdata->got_bytes < rdata->actual_len && size_t trans = rdata->subreq.transferred + rdata->got_bytes;
rdata->subreq.start + rdata->subreq.transferred + rdata->got_bytes == if (trans < rdata->subreq.len &&
ictx->remote_i_size) { rdata->subreq.start + trans == ictx->remote_i_size) {
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0; rdata->result = 0;
} }
...@@ -4614,6 +4613,10 @@ smb2_readv_callback(struct mid_q_entry *mid) ...@@ -4614,6 +4613,10 @@ smb2_readv_callback(struct mid_q_entry *mid)
server->credits, server->in_flight, server->credits, server->in_flight,
0, cifs_trace_rw_credits_read_response_clear); 0, cifs_trace_rw_credits_read_response_clear);
rdata->credits.value = 0; rdata->credits.value = 0;
rdata->subreq.transferred += rdata->got_bytes;
if (rdata->subreq.start + rdata->subreq.transferred >= rdata->subreq.rreq->i_size)
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress);
INIT_WORK(&rdata->subreq.work, smb2_readv_worker); INIT_WORK(&rdata->subreq.work, smb2_readv_worker);
queue_work(cifsiod_wq, &rdata->subreq.work); queue_work(cifsiod_wq, &rdata->subreq.work);
release_mid(mid); release_mid(mid);
...@@ -4648,7 +4651,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) ...@@ -4648,7 +4651,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink); io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink);
io_parms.server = server = rdata->server; io_parms.server = server = rdata->server;
io_parms.offset = subreq->start + subreq->transferred; io_parms.offset = subreq->start + subreq->transferred;
io_parms.length = rdata->actual_len; io_parms.length = subreq->len - subreq->transferred;
io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid; io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid;
io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid; io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid;
io_parms.pid = rdata->req->pid; io_parms.pid = rdata->req->pid;
...@@ -4669,7 +4672,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) ...@@ -4669,7 +4672,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
shdr = (struct smb2_hdr *)buf; shdr = (struct smb2_hdr *)buf;
if (rdata->credits.value > 0) { if (rdata->credits.value > 0) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->actual_len, shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(io_parms.length,
SMB2_MAX_BUFFER_SIZE)); SMB2_MAX_BUFFER_SIZE));
credit_request = le16_to_cpu(shdr->CreditCharge) + 8; credit_request = le16_to_cpu(shdr->CreditCharge) + 8;
if (server->credits >= server->max_credits) if (server->credits >= server->max_credits)
...@@ -4697,7 +4700,8 @@ smb2_async_readv(struct cifs_io_subrequest *rdata) ...@@ -4697,7 +4700,8 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
rdata->xid, io_parms.persistent_fid, rdata->xid, io_parms.persistent_fid,
io_parms.tcon->tid, io_parms.tcon->tid,
io_parms.tcon->ses->Suid, io_parms.tcon->ses->Suid,
io_parms.offset, rdata->actual_len, rc); io_parms.offset,
subreq->len - subreq->transferred, rc);
} }
async_readv_out: async_readv_out:
...@@ -4880,6 +4884,7 @@ smb2_writev_callback(struct mid_q_entry *mid) ...@@ -4880,6 +4884,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight, server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear); 0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0; wdata->credits.value = 0;
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
cifs_write_subrequest_terminated(wdata, result ?: written, true); cifs_write_subrequest_terminated(wdata, result ?: written, true);
release_mid(mid); release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
......
...@@ -27,6 +27,7 @@ struct folio_queue { ...@@ -27,6 +27,7 @@ struct folio_queue {
struct folio_queue *prev; /* Previous queue segment of NULL */ struct folio_queue *prev; /* Previous queue segment of NULL */
unsigned long marks; /* 1-bit mark per folio */ unsigned long marks; /* 1-bit mark per folio */
unsigned long marks2; /* Second 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */
unsigned long marks3; /* Third 1-bit mark per folio */
#if PAGEVEC_SIZE > BITS_PER_LONG #if PAGEVEC_SIZE > BITS_PER_LONG
#error marks is not big enough #error marks is not big enough
#endif #endif
...@@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq) ...@@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq)
folioq->prev = NULL; folioq->prev = NULL;
folioq->marks = 0; folioq->marks = 0;
folioq->marks2 = 0; folioq->marks2 = 0;
folioq->marks3 = 0;
} }
static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq)
...@@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) ...@@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot)
clear_bit(slot, &folioq->marks2); clear_bit(slot, &folioq->marks2);
} }
static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot)
{
return test_bit(slot, &folioq->marks3);
}
static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot)
{
set_bit(slot, &folioq->marks3);
}
static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot)
{
clear_bit(slot, &folioq->marks3);
}
static inline unsigned int __folio_order(struct folio *folio) static inline unsigned int __folio_order(struct folio *folio)
{ {
if (!folio_test_large(folio)) if (!folio_test_large(folio))
...@@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) ...@@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot)
folioq->vec.folios[slot] = NULL; folioq->vec.folios[slot] = NULL;
folioq_unmark(folioq, slot); folioq_unmark(folioq, slot);
folioq_unmark2(folioq, slot); folioq_unmark2(folioq, slot);
folioq_unmark3(folioq, slot);
} }
#endif /* _LINUX_FOLIO_QUEUE_H */ #endif /* _LINUX_FOLIO_QUEUE_H */
...@@ -178,36 +178,43 @@ struct netfs_io_subrequest { ...@@ -178,36 +178,43 @@ struct netfs_io_subrequest {
unsigned long long start; /* Where to start the I/O */ unsigned long long start; /* Where to start the I/O */
size_t len; /* Size of the I/O */ size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */ size_t transferred; /* Amount of data transferred */
size_t consumed; /* Amount of read data consumed */
size_t prev_donated; /* Amount of data donated from previous subreq */
size_t next_donated; /* Amount of data donated from next subreq */
refcount_t ref; refcount_t ref;
short error; /* 0 or error that occurred */ short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */ unsigned short debug_index; /* Index in list (for debugging output) */
unsigned int nr_segs; /* Number of segs in io_iter */ unsigned int nr_segs; /* Number of segs in io_iter */
enum netfs_io_source source; /* Where to read from/write to */ enum netfs_io_source source; /* Where to read from/write to */
unsigned char stream_nr; /* I/O stream this belongs to */ unsigned char stream_nr; /* I/O stream this belongs to */
unsigned char curr_folioq_slot; /* Folio currently being read */
unsigned char curr_folio_order; /* Order of folio */
struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
unsigned long flags; unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */
}; };
enum netfs_io_origin { enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITEBACK, /* This write was triggered by writepages */
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
NETFS_DIO_WRITE, /* This is a direct I/O write */ NETFS_DIO_WRITE, /* This is a direct I/O write */
NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */
nr__netfs_io_origin nr__netfs_io_origin
} __mode(byte); } __mode(byte);
...@@ -224,6 +231,7 @@ struct netfs_io_request { ...@@ -224,6 +231,7 @@ struct netfs_io_request {
struct address_space *mapping; /* The mapping being accessed */ struct address_space *mapping; /* The mapping being accessed */
struct kiocb *iocb; /* AIO completion vector */ struct kiocb *iocb; /* AIO completion vector */
struct netfs_cache_resources cache_resources; struct netfs_cache_resources cache_resources;
struct readahead_control *ractl; /* Readahead descriptor */
struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head proc_link; /* Link in netfs_iorequests */
struct list_head subrequests; /* Contributory I/O operations */ struct list_head subrequests; /* Contributory I/O operations */
struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
...@@ -244,12 +252,10 @@ struct netfs_io_request { ...@@ -244,12 +252,10 @@ struct netfs_io_request {
unsigned int nr_group_rel; /* Number of refs to release on ->group */ unsigned int nr_group_rel; /* Number of refs to release on ->group */
spinlock_t lock; /* Lock for queuing subreqs */ spinlock_t lock; /* Lock for queuing subreqs */
atomic_t nr_outstanding; /* Number of ops in progress */ atomic_t nr_outstanding; /* Number of ops in progress */
atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
size_t upper_len; /* Length can be extended to here */
unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long submitted; /* Amount submitted for I/O so far */
unsigned long long len; /* Length of the request */ unsigned long long len; /* Length of the request */
size_t transferred; /* Amount to be indicated as transferred */ size_t transferred; /* Amount to be indicated as transferred */
short error; /* 0 or error that occurred */ long error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */ enum netfs_io_origin origin; /* Origin of the request */
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
u8 buffer_head_slot; /* First slot in ->buffer */ u8 buffer_head_slot; /* First slot in ->buffer */
...@@ -260,9 +266,9 @@ struct netfs_io_request { ...@@ -260,9 +266,9 @@ struct netfs_io_request {
unsigned long long collected_to; /* Point we've collected to */ unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
size_t prev_donated; /* Fallback for subreq->prev_donated */
refcount_t ref; refcount_t ref;
unsigned long flags; unsigned long flags;
#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
...@@ -274,6 +280,7 @@ struct netfs_io_request { ...@@ -274,6 +280,7 @@ struct netfs_io_request {
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */ * write to cache on read */
const struct netfs_request_ops *netfs_ops; const struct netfs_request_ops *netfs_ops;
...@@ -292,7 +299,7 @@ struct netfs_request_ops { ...@@ -292,7 +299,7 @@ struct netfs_request_ops {
/* Read request handling */ /* Read request handling */
void (*expand_readahead)(struct netfs_io_request *rreq); void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq); int (*prepare_read)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq);
bool (*is_still_valid)(struct netfs_io_request *rreq); bool (*is_still_valid)(struct netfs_io_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
...@@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); ...@@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
/* (Sub)request management API. */ /* (Sub)request management API. */
void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
bool was_async);
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
int error, bool was_async);
void netfs_get_subrequest(struct netfs_io_subrequest *subreq, void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what); enum netfs_sreq_ref_trace what);
void netfs_put_subrequest(struct netfs_io_subrequest *subreq, void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
EM(netfs_read_trace_expanded, "EXPANDED ") \ EM(netfs_read_trace_expanded, "EXPANDED ") \
EM(netfs_read_trace_readahead, "READAHEAD") \ EM(netfs_read_trace_readahead, "READAHEAD") \
EM(netfs_read_trace_readpage, "READPAGE ") \ EM(netfs_read_trace_readpage, "READPAGE ") \
EM(netfs_read_trace_read_gaps, "READ-GAPS") \
EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \
E_(netfs_read_trace_write_begin, "WRITEBEGN") E_(netfs_read_trace_write_begin, "WRITEBEGN")
...@@ -33,12 +34,14 @@ ...@@ -33,12 +34,14 @@
#define netfs_rreq_origins \ #define netfs_rreq_origins \
EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READAHEAD, "RA") \
EM(NETFS_READPAGE, "RP") \ EM(NETFS_READPAGE, "RP") \
EM(NETFS_READ_GAPS, "RG") \
EM(NETFS_READ_FOR_WRITE, "RW") \ EM(NETFS_READ_FOR_WRITE, "RW") \
EM(NETFS_DIO_READ, "DR") \ EM(NETFS_DIO_READ, "DR") \
EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITEBACK, "WB") \
EM(NETFS_WRITETHROUGH, "WT") \ EM(NETFS_WRITETHROUGH, "WT") \
EM(NETFS_UNBUFFERED_WRITE, "UW") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \
E_(NETFS_DIO_WRITE, "DW") EM(NETFS_DIO_WRITE, "DW") \
E_(NETFS_PGPRIV2_COPY_TO_CACHE, "2C")
#define netfs_rreq_traces \ #define netfs_rreq_traces \
EM(netfs_rreq_trace_assess, "ASSESS ") \ EM(netfs_rreq_trace_assess, "ASSESS ") \
...@@ -69,15 +72,25 @@ ...@@ -69,15 +72,25 @@
E_(NETFS_INVALID_WRITE, "INVL") E_(NETFS_INVALID_WRITE, "INVL")
#define netfs_sreq_traces \ #define netfs_sreq_traces \
EM(netfs_sreq_trace_add_donations, "+DON ") \
EM(netfs_sreq_trace_added, "ADD ") \
EM(netfs_sreq_trace_clear, "CLEAR") \
EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_discard, "DSCRD") \
EM(netfs_sreq_trace_donate_to_prev, "DON-P") \
EM(netfs_sreq_trace_donate_to_next, "DON-N") \
EM(netfs_sreq_trace_download_instead, "RDOWN") \ EM(netfs_sreq_trace_download_instead, "RDOWN") \
EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_fail, "FAIL ") \
EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_free, "FREE ") \
EM(netfs_sreq_trace_hit_eof, "EOF ") \
EM(netfs_sreq_trace_io_progress, "IO ") \
EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_prepare, "PREP ") \
EM(netfs_sreq_trace_prep_failed, "PRPFL") \ EM(netfs_sreq_trace_prep_failed, "PRPFL") \
EM(netfs_sreq_trace_resubmit_short, "SHORT") \ EM(netfs_sreq_trace_progress, "PRGRS") \
EM(netfs_sreq_trace_reprep_failed, "REPFL") \
EM(netfs_sreq_trace_retry, "RETRY") \ EM(netfs_sreq_trace_retry, "RETRY") \
EM(netfs_sreq_trace_short, "SHORT") \
EM(netfs_sreq_trace_split, "SPLIT") \
EM(netfs_sreq_trace_submit, "SUBMT") \ EM(netfs_sreq_trace_submit, "SUBMT") \
EM(netfs_sreq_trace_terminated, "TERM ") \ EM(netfs_sreq_trace_terminated, "TERM ") \
EM(netfs_sreq_trace_write, "WRITE") \ EM(netfs_sreq_trace_write, "WRITE") \
...@@ -118,7 +131,7 @@ ...@@ -118,7 +131,7 @@
EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_new, "NEW ") \
EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \
EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \
EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \
EM(netfs_sreq_trace_put_done, "PUT DONE ") \ EM(netfs_sreq_trace_put_done, "PUT DONE ") \
EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \
EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \ EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \
...@@ -138,6 +151,7 @@ ...@@ -138,6 +151,7 @@
EM(netfs_flush_content, "flush") \ EM(netfs_flush_content, "flush") \
EM(netfs_streaming_filled_page, "mod-streamw-f") \ EM(netfs_streaming_filled_page, "mod-streamw-f") \
EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \
EM(netfs_folio_trace_abandon, "abandon") \
EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \
EM(netfs_folio_trace_clear, "clear") \ EM(netfs_folio_trace_clear, "clear") \
EM(netfs_folio_trace_clear_cc, "clear-cc") \ EM(netfs_folio_trace_clear_cc, "clear-cc") \
...@@ -154,7 +168,11 @@ ...@@ -154,7 +168,11 @@
EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \
EM(netfs_folio_trace_not_under_wback, "!wback") \ EM(netfs_folio_trace_not_under_wback, "!wback") \
EM(netfs_folio_trace_put, "put") \ EM(netfs_folio_trace_put, "put") \
EM(netfs_folio_trace_read, "read") \
EM(netfs_folio_trace_read_done, "read-done") \
EM(netfs_folio_trace_read_gaps, "read-gaps") \ EM(netfs_folio_trace_read_gaps, "read-gaps") \
EM(netfs_folio_trace_read_put, "read-put") \
EM(netfs_folio_trace_read_unlock, "read-unlock") \
EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_redirtied, "redirtied") \
EM(netfs_folio_trace_store, "store") \ EM(netfs_folio_trace_store, "store") \
EM(netfs_folio_trace_store_copy, "store-copy") \ EM(netfs_folio_trace_store_copy, "store-copy") \
...@@ -167,6 +185,12 @@ ...@@ -167,6 +185,12 @@
EM(netfs_contig_trace_jump, "-->JUMP-->") \ EM(netfs_contig_trace_jump, "-->JUMP-->") \
E_(netfs_contig_trace_unlock, "Unlock") E_(netfs_contig_trace_unlock, "Unlock")
#define netfs_donate_traces \
EM(netfs_trace_donate_tail_to_prev, "tail-to-prev") \
EM(netfs_trace_donate_to_prev, "to-prev") \
EM(netfs_trace_donate_to_next, "to-next") \
E_(netfs_trace_donate_to_deferred_next, "defer-next")
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
...@@ -184,6 +208,7 @@ enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte); ...@@ -184,6 +208,7 @@ enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte);
enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
enum netfs_folio_trace { netfs_folio_traces } __mode(byte); enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte);
enum netfs_donate_trace { netfs_donate_traces } __mode(byte);
#endif #endif
...@@ -206,6 +231,7 @@ netfs_rreq_ref_traces; ...@@ -206,6 +231,7 @@ netfs_rreq_ref_traces;
netfs_sreq_ref_traces; netfs_sreq_ref_traces;
netfs_folio_traces; netfs_folio_traces;
netfs_collect_contig_traces; netfs_collect_contig_traces;
netfs_donate_traces;
/* /*
* Now redefine the EM() and E_() macros to map the enums to the strings that * Now redefine the EM() and E_() macros to map the enums to the strings that
...@@ -226,6 +252,7 @@ TRACE_EVENT(netfs_read, ...@@ -226,6 +252,7 @@ TRACE_EVENT(netfs_read,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned int, rreq ) __field(unsigned int, rreq )
__field(unsigned int, cookie ) __field(unsigned int, cookie )
__field(loff_t, i_size )
__field(loff_t, start ) __field(loff_t, start )
__field(size_t, len ) __field(size_t, len )
__field(enum netfs_read_trace, what ) __field(enum netfs_read_trace, what )
...@@ -235,18 +262,19 @@ TRACE_EVENT(netfs_read, ...@@ -235,18 +262,19 @@ TRACE_EVENT(netfs_read,
TP_fast_assign( TP_fast_assign(
__entry->rreq = rreq->debug_id; __entry->rreq = rreq->debug_id;
__entry->cookie = rreq->cache_resources.debug_id; __entry->cookie = rreq->cache_resources.debug_id;
__entry->i_size = rreq->i_size;
__entry->start = start; __entry->start = start;
__entry->len = len; __entry->len = len;
__entry->what = what; __entry->what = what;
__entry->netfs_inode = rreq->inode->i_ino; __entry->netfs_inode = rreq->inode->i_ino;
), ),
TP_printk("R=%08x %s c=%08x ni=%x s=%llx %zx", TP_printk("R=%08x %s c=%08x ni=%x s=%llx l=%zx sz=%llx",
__entry->rreq, __entry->rreq,
__print_symbolic(__entry->what, netfs_read_traces), __print_symbolic(__entry->what, netfs_read_traces),
__entry->cookie, __entry->cookie,
__entry->netfs_inode, __entry->netfs_inode,
__entry->start, __entry->len) __entry->start, __entry->len, __entry->i_size)
); );
TRACE_EVENT(netfs_rreq, TRACE_EVENT(netfs_rreq,
...@@ -651,6 +679,71 @@ TRACE_EVENT(netfs_collect_stream, ...@@ -651,6 +679,71 @@ TRACE_EVENT(netfs_collect_stream,
__entry->collected_to, __entry->front) __entry->collected_to, __entry->front)
); );
TRACE_EVENT(netfs_progress,
TP_PROTO(const struct netfs_io_subrequest *subreq,
unsigned long long start, size_t avail, size_t part),
TP_ARGS(subreq, start, avail, part),
TP_STRUCT__entry(
__field(unsigned int, rreq)
__field(unsigned int, subreq)
__field(unsigned int, consumed)
__field(unsigned int, transferred)
__field(unsigned long long, f_start)
__field(unsigned int, f_avail)
__field(unsigned int, f_part)
__field(unsigned char, slot)
),
TP_fast_assign(
__entry->rreq = subreq->rreq->debug_id;
__entry->subreq = subreq->debug_index;
__entry->consumed = subreq->consumed;
__entry->transferred = subreq->transferred;
__entry->f_start = start;
__entry->f_avail = avail;
__entry->f_part = part;
__entry->slot = subreq->curr_folioq_slot;
),
TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x",
__entry->rreq, __entry->subreq, __entry->f_start,
__entry->consumed, __entry->transferred,
__entry->f_part, __entry->f_avail, __entry->slot)
);
TRACE_EVENT(netfs_donate,
TP_PROTO(const struct netfs_io_request *rreq,
const struct netfs_io_subrequest *from,
const struct netfs_io_subrequest *to,
size_t amount,
enum netfs_donate_trace trace),
TP_ARGS(rreq, from, to, amount, trace),
TP_STRUCT__entry(
__field(unsigned int, rreq)
__field(unsigned int, from)
__field(unsigned int, to)
__field(unsigned int, amount)
__field(enum netfs_donate_trace, trace)
),
TP_fast_assign(
__entry->rreq = rreq->debug_id;
__entry->from = from->debug_index;
__entry->to = to ? to->debug_index : -1;
__entry->amount = amount;
__entry->trace = trace;
),
TP_printk("R=%08x[%02x] -> [%02x] %s am=%x",
__entry->rreq, __entry->from, __entry->to,
__print_symbolic(__entry->trace, netfs_donate_traces),
__entry->amount)
);
#undef EM #undef EM
#undef E_ #undef E_
#endif /* _TRACE_NETFS_H */ #endif /* _TRACE_NETFS_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment