Commit c065c429 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:
 "Highlights:

   - Update NFSv2 and NFSv3 XDR encoding functions

   - Add batch Receive posting to the server's RPC/RDMA transport (take 2)

   - Reduce page allocator traffic in svcrdma"

* tag 'nfsd-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (70 commits)
  NFSD: Use DEFINE_SPINLOCK() for spinlock
  sunrpc: Remove unused function ip_map_lookup
  NFSv4.2: fix copy stateid copying for the async copy
  UAPI: nfsfh.h: Replace one-element array with flexible-array member
  svcrdma: Clean up dto_q critical section in svc_rdma_recvfrom()
  svcrdma: Remove svc_rdma_recv_ctxt::rc_pages and ::rc_arg
  svcrdma: Remove sc_read_complete_q
  svcrdma: Single-stage RDMA Read
  SUNRPC: Move svc_xprt_received() call sites
  SUNRPC: Export svc_xprt_received()
  svcrdma: Retain the page backing rq_res.head[0].iov_base
  svcrdma: Remove unused sc_pages field
  svcrdma: Normalize Send page handling
  svcrdma: Add a "deferred close" helper
  svcrdma: Maintain a Receive water mark
  svcrdma: Use svc_rdma_refresh_recvs() in wc_receive
  svcrdma: Add a batch Receive posting mechanism
  svcrdma: Remove stale comment for svc_rdma_wc_receive()
  svcrdma: Provide an explanatory comment in CMA event handler
  svcrdma: RPCDBG_FACILITY is no longer used
  ...
parents b5b3097d b73ac680
......@@ -136,6 +136,77 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
}
EXPORT_SYMBOL_GPL(nfsacl_encode);
/**
* nfs_stream_encode_acl - Encode an NFSv3 ACL
*
* @xdr: an xdr_stream positioned to receive an encoded ACL
* @inode: inode of file whose ACL this is
* @acl: posix_acl to encode
* @encode_entries: whether to encode ACEs as well
* @typeflag: ACL type: NFS_ACL_DEFAULT or zero
*
* Return values:
* %false: The ACL could not be encoded
* %true: @xdr is advanced to the next available position
*/
bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode,
struct posix_acl *acl, int encode_entries,
int typeflag)
{
const size_t elem_size = XDR_UNIT * 3;
u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
struct nfsacl_encode_desc nfsacl_desc = {
.desc = {
.elem_size = elem_size,
.array_len = encode_entries ? entries : 0,
.xcode = xdr_nfsace_encode,
},
.acl = acl,
.typeflag = typeflag,
.uid = inode->i_uid,
.gid = inode->i_gid,
};
struct nfsacl_simple_acl aclbuf;
unsigned int base;
int err;
if (entries > NFS_ACL_MAX_ENTRIES)
return false;
if (xdr_stream_encode_u32(xdr, entries) < 0)
return false;
if (encode_entries && acl && acl->a_count == 3) {
struct posix_acl *acl2 = &aclbuf.acl;
/* Avoid the use of posix_acl_alloc(). nfsacl_encode() is
* invoked in contexts where a memory allocation failure is
* fatal. Fortunately this fake ACL is small enough to
* construct on the stack. */
posix_acl_init(acl2, 4);
/* Insert entries in canonical order: other orders seem
to confuse Solaris VxFS. */
acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */
acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */
acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */
acl2->a_entries[2].e_tag = ACL_MASK;
acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */
nfsacl_desc.acl = acl2;
}
base = xdr_stream_pos(xdr);
if (!xdr_reserve_space(xdr, XDR_UNIT +
elem_size * nfsacl_desc.desc.array_len))
return false;
err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc);
if (err)
return false;
return true;
}
EXPORT_SYMBOL_GPL(nfs_stream_encode_acl);
struct nfsacl_decode_desc {
struct xdr_array2_desc desc;
unsigned int count;
......
......@@ -99,7 +99,7 @@ config NFSD_BLOCKLAYOUT
help
This option enables support for the exporting pNFS block layouts
in the kernel's NFS server. The pNFS block layout enables NFS
clients to directly perform I/O to block devices accesible to both
clients to directly perform I/O to block devices accessible to both
the server and the clients. See RFC 5663 for more details.
If unsure, say N.
......@@ -113,7 +113,7 @@ config NFSD_SCSILAYOUT
help
This option enables support for the exporting pNFS SCSI layouts
in the kernel's NFS server. The pNFS SCSI layout enables NFS
clients to directly perform I/O to SCSI devices accesible to both
clients to directly perform I/O to SCSI devices accessible to both
the server and the clients. See draft-ietf-nfsv4-scsi-layout for
more details.
......@@ -127,7 +127,7 @@ config NFSD_FLEXFILELAYOUT
This option enables support for the exporting pNFS Flex File
layouts in the kernel's NFS server. The pNFS Flex File layout
enables NFS clients to directly perform I/O to NFSv3 devices
accesible to both the server and the clients. See
accessible to both the server and the clients. See
draft-ietf-nfsv4-flex-files for more details.
Warning, this server implements the bare minimum functionality
......
......@@ -51,9 +51,6 @@ struct nfsd_net {
bool grace_ended;
time64_t boot_time;
/* internal mount of the "nfsd" pseudofilesystem: */
struct vfsmount *nfsd_mnt;
struct dentry *nfsd_client_dir;
/*
......@@ -130,6 +127,9 @@ struct nfsd_net {
wait_queue_head_t ntf_wq;
atomic_t ntf_refcnt;
/* Allow umount to wait for nfsd state cleanup */
struct completion nfsd_shutdown_complete;
/*
* clientid and stateid data for construction of net unique COPY
* stateids.
......
......@@ -242,79 +242,61 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
/* GETACL */
static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
{
struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct nfsd3_getaclres *resp = rqstp->rq_resp;
struct dentry *dentry = resp->fh.fh_dentry;
struct inode *inode;
struct kvec *head = rqstp->rq_res.head;
unsigned int base;
int n;
int w;
*p++ = resp->status;
if (resp->status != nfs_ok)
return xdr_ressize_check(rqstp, p);
if (!svcxdr_encode_stat(xdr, resp->status))
return 0;
/*
* Since this is version 2, the check for nfserr in
* nfsd_dispatch actually ensures the following cannot happen.
* However, it seems fragile to depend on that.
*/
if (dentry == NULL || d_really_is_negative(dentry))
return 0;
return 1;
inode = d_inode(dentry);
p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
*p++ = htonl(resp->mask);
if (!xdr_ressize_check(rqstp, p))
if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
return 0;
if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
return 0;
base = (char *)p - (char *)head->iov_base;
rqstp->rq_res.page_len = w = nfsacl_size(
(resp->mask & NFS_ACL) ? resp->acl_access : NULL,
(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
while (w > 0) {
if (!*(rqstp->rq_next_page++))
return 0;
return 1;
w -= PAGE_SIZE;
}
n = nfsacl_encode(&rqstp->rq_res, base, inode,
resp->acl_access,
resp->mask & NFS_ACL, 0);
if (n > 0)
n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
resp->acl_default,
resp->mask & NFS_DFACL,
NFS_ACL_DEFAULT);
return (n > 0);
}
static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_attrstat *resp = rqstp->rq_resp;
*p++ = resp->status;
if (resp->status != nfs_ok)
goto out;
if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
resp->mask & NFS_ACL, 0))
return 0;
if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
resp->mask & NFS_DFACL, NFS_ACL_DEFAULT))
return 0;
p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
out:
return xdr_ressize_check(rqstp, p);
return 1;
}
/* ACCESS */
static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
{
struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct nfsd3_accessres *resp = rqstp->rq_resp;
*p++ = resp->status;
if (resp->status != nfs_ok)
goto out;
if (!svcxdr_encode_stat(xdr, resp->status))
return 0;
switch (resp->status) {
case nfs_ok:
if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
return 0;
if (xdr_stream_encode_u32(xdr, resp->access) < 0)
return 0;
break;
}
p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
*p++ = htonl(resp->access);
out:
return xdr_ressize_check(rqstp, p);
return 1;
}
/*
......@@ -329,13 +311,6 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
posix_acl_release(resp->acl_default);
}
static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
{
struct nfsd_attrstat *resp = rqstp->rq_resp;
fh_put(&resp->fh);
}
static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
{
struct nfsd3_accessres *resp = rqstp->rq_resp;
......@@ -375,8 +350,8 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
[ACLPROC2_SETACL] = {
.pc_func = nfsacld_proc_setacl,
.pc_decode = nfsaclsvc_decode_setaclargs,
.pc_encode = nfsaclsvc_encode_attrstatres,
.pc_release = nfsaclsvc_release_attrstat,
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd3_setaclargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_NOCACHE,
......@@ -386,8 +361,8 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
[ACLPROC2_GETATTR] = {
.pc_func = nfsacld_proc_getattr,
.pc_decode = nfssvc_decode_fhandleargs,
.pc_encode = nfsaclsvc_encode_attrstatres,
.pc_release = nfsaclsvc_release_attrstat,
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_NOCACHE,
......
......@@ -168,22 +168,25 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
/* GETACL */
static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
{
struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct nfsd3_getaclres *resp = rqstp->rq_resp;
struct dentry *dentry = resp->fh.fh_dentry;
struct kvec *head = rqstp->rq_res.head;
struct inode *inode = d_inode(dentry);
unsigned int base;
int n;
int w;
*p++ = resp->status;
p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
if (resp->status == 0 && dentry && d_really_is_positive(dentry)) {
struct inode *inode = d_inode(dentry);
struct kvec *head = rqstp->rq_res.head;
unsigned int base;
int n;
int w;
*p++ = htonl(resp->mask);
if (!xdr_ressize_check(rqstp, p))
if (!svcxdr_encode_nfsstat3(xdr, resp->status))
return 0;
switch (resp->status) {
case nfs_ok:
if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
return 0;
if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
return 0;
base = (char *)p - (char *)head->iov_base;
base = (char *)xdr->p - (char *)head->iov_base;
rqstp->rq_res.page_len = w = nfsacl_size(
(resp->mask & NFS_ACL) ? resp->acl_access : NULL,
......@@ -204,9 +207,11 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
NFS_ACL_DEFAULT);
if (n <= 0)
return 0;
} else
if (!xdr_ressize_check(rqstp, p))
break;
default:
if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
return 0;
}
return 1;
}
......@@ -214,11 +219,11 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
/* SETACL */
static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
{
struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
*p++ = resp->status;
p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
return xdr_ressize_check(rqstp, p);
return svcxdr_encode_nfsstat3(xdr, resp->status) &&
svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh);
}
/*
......
......@@ -126,14 +126,15 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_readlinkres *resp = rqstp->rq_resp;
char *buffer = page_address(*(rqstp->rq_next_page++));
dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
/* Read the symlink. */
fh_copy(&resp->fh, &argp->fh);
resp->len = NFS3_MAXPATHLEN;
resp->status = nfsd_readlink(rqstp, &resp->fh, buffer, &resp->len);
resp->pages = rqstp->rq_next_page++;
resp->status = nfsd_readlink(rqstp, &resp->fh,
page_address(*resp->pages), &resp->len);
return rpc_success;
}
......@@ -158,6 +159,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
......@@ -439,17 +441,30 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
struct nfsd3_readdirres *resp,
int count)
{
struct xdr_buf *buf = &resp->dirlist;
struct xdr_stream *xdr = &resp->xdr;
count = min_t(u32, count, svc_max_payload(rqstp));
/* Convert byte count to number of words (i.e. >> 2),
* and reserve room for the NULL ptr & eof flag (-2 words) */
resp->buflen = (count >> 2) - 2;
memset(buf, 0, sizeof(*buf));
resp->buffer = page_address(*rqstp->rq_next_page);
/* Reserve room for the NULL ptr & eof flag (-2 words) */
buf->buflen = count - XDR_UNIT * 2;
buf->pages = rqstp->rq_next_page;
while (count > 0) {
rqstp->rq_next_page++;
count -= PAGE_SIZE;
}
/* This is xdr_init_encode(), but it assumes that
* the head kvec has already been consumed. */
xdr_set_scratch_buffer(xdr, NULL, 0);
xdr->buf = buf;
xdr->page_ptr = buf->pages;
xdr->iov = NULL;
xdr->p = page_address(*buf->pages);
xdr->end = xdr->p + (PAGE_SIZE >> 2);
xdr->rqst = NULL;
}
/*
......@@ -460,10 +475,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
{
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
int count = 0;
loff_t offset;
struct page **p;
caddr_t page_addr = NULL;
dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
......@@ -471,39 +483,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
/* Read directory and encode entries on the fly */
fh_copy(&resp->fh, &argp->fh);
resp->common.err = nfs_ok;
resp->cookie_offset = 0;
resp->rqstp = rqstp;
offset = argp->cookie;
resp->status = nfsd_readdir(rqstp, &resp->fh, &offset,
&resp->common, nfs3svc_encode_entry);
&resp->common, nfs3svc_encode_entry3);
memcpy(resp->verf, argp->verf, 8);
count = 0;
for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
page_addr = page_address(*p);
nfs3svc_encode_cookie3(resp, offset);
if (((caddr_t)resp->buffer >= page_addr) &&
((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
count += (caddr_t)resp->buffer - page_addr;
break;
}
count += PAGE_SIZE;
}
resp->count = count >> 2;
if (resp->offset) {
if (unlikely(resp->offset1)) {
/* we ended up with offset on a page boundary */
*resp->offset = htonl(offset >> 32);
*resp->offset1 = htonl(offset & 0xffffffff);
resp->offset1 = NULL;
} else {
xdr_encode_hyper(resp->offset, offset);
}
resp->offset = NULL;
}
/* Recycle only pages that were part of the reply */
rqstp->rq_next_page = resp->xdr.page_ptr + 1;
return rpc_success;
}
......@@ -517,10 +508,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
{
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
int count = 0;
loff_t offset;
struct page **p;
caddr_t page_addr = NULL;
dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
......@@ -528,10 +516,9 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
/* Read directory and encode entries on the fly */
fh_copy(&resp->fh, &argp->fh);
resp->common.err = nfs_ok;
resp->cookie_offset = 0;
resp->rqstp = rqstp;
offset = argp->cookie;
......@@ -545,30 +532,12 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
}
resp->status = nfsd_readdir(rqstp, &resp->fh, &offset,
&resp->common, nfs3svc_encode_entry_plus);
&resp->common, nfs3svc_encode_entryplus3);
memcpy(resp->verf, argp->verf, 8);
for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
page_addr = page_address(*p);
nfs3svc_encode_cookie3(resp, offset);
if (((caddr_t)resp->buffer >= page_addr) &&
((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
count += (caddr_t)resp->buffer - page_addr;
break;
}
count += PAGE_SIZE;
}
resp->count = count >> 2;
if (resp->offset) {
if (unlikely(resp->offset1)) {
/* we ended up with offset on a page boundary */
*resp->offset = htonl(offset >> 32);
*resp->offset1 = htonl(offset & 0xffffffff);
resp->offset1 = NULL;
} else {
xdr_encode_hyper(resp->offset, offset);
}
resp->offset = NULL;
}
/* Recycle only pages that were part of the reply */
rqstp->rq_next_page = resp->xdr.page_ptr + 1;
out:
return rpc_success;
......@@ -736,7 +705,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
[NFS3PROC_GETATTR] = {
.pc_func = nfsd3_proc_getattr,
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_attrstatres,
.pc_encode = nfs3svc_encode_getattrres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd3_attrstatres),
......@@ -758,7 +727,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
[NFS3PROC_LOOKUP] = {
.pc_func = nfsd3_proc_lookup,
.pc_decode = nfs3svc_decode_diropargs,
.pc_encode = nfs3svc_encode_diropres,
.pc_encode = nfs3svc_encode_lookupres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_diropres),
......
This diff is collapsed.
......@@ -1383,10 +1383,13 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{
ssize_t bytes_copied = 0;
size_t bytes_total = copy->cp_count;
u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
u64 dst_pos = copy->cp_dst_pos;
/* See RFC 7862 p.67: */
if (bytes_total == 0)
bytes_total = ULLONG_MAX;
do {
if (kthread_should_stop())
break;
......@@ -1538,8 +1541,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!nfs4_init_copy_state(nn, copy))
goto out_err;
refcount_set(&async_copy->refcount, 1);
memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid,
sizeof(copy->cp_stateid));
memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.stid,
sizeof(copy->cp_res.cb_stateid));
dup_copy_fields(copy, async_copy);
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
......@@ -2262,25 +2265,6 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
}
static void svcxdr_init_encode(struct svc_rqst *rqstp,
struct nfsd4_compoundres *resp)
{
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = &rqstp->rq_res;
struct kvec *head = buf->head;
xdr->buf = buf;
xdr->iov = head;
xdr->p = head->iov_base + head->iov_len;
xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
/* Tail and page_len should be zero at this point: */
buf->len = buf->head[0].iov_len;
xdr_reset_scratch_buffer(xdr);
xdr->page_ptr = buf->pages - 1;
buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
- rqstp->rq_auth_slack;
}
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
static void
check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
......@@ -2335,10 +2319,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 status;
svcxdr_init_encode(rqstp, resp);
resp->tagp = resp->xdr.p;
resp->xdr = &rqstp->rq_res_stream;
/* reserve space for: NFS status code */
xdr_reserve_space(resp->xdr, XDR_UNIT);
resp->tagp = resp->xdr->p;
/* reserve space for: taglen, tag, and opcnt */
xdr_reserve_space(&resp->xdr, 8 + args->taglen);
xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen);
resp->taglen = args->taglen;
resp->tag = args->tag;
resp->rqstp = rqstp;
......@@ -2444,7 +2432,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
encode_op:
if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay;
nfsd4_encode_replay(&resp->xdr, op);
nfsd4_encode_replay(resp->xdr, op);
status = op->status = op->replay->rp_status;
} else {
nfsd4_encode_operation(resp, op);
......
......@@ -626,7 +626,7 @@ nfsd4_legacy_tracking_init(struct net *net)
status = nfsd4_load_reboot_recovery_data(net);
if (status)
goto err;
printk("NFSD: Using legacy client tracking operations.\n");
pr_info("NFSD: Using legacy client tracking operations.\n");
return 0;
err:
......@@ -1028,7 +1028,7 @@ nfsd4_init_cld_pipe(struct net *net)
status = __nfsd4_init_cld_pipe(net);
if (!status)
printk("NFSD: Using old nfsdcld client tracking operations.\n");
pr_info("NFSD: Using old nfsdcld client tracking operations.\n");
return status;
}
......@@ -1605,7 +1605,7 @@ nfsd4_cld_tracking_init(struct net *net)
nfs4_release_reclaim(nn);
goto err_remove;
} else
printk("NFSD: Using nfsdcld client tracking operations.\n");
pr_info("NFSD: Using nfsdcld client tracking operations.\n");
return 0;
err_remove:
......@@ -1864,7 +1864,7 @@ nfsd4_umh_cltrack_init(struct net *net)
ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
kfree(grace_start);
if (!ret)
printk("NFSD: Using UMH upcall client tracking operations.\n");
pr_info("NFSD: Using UMH upcall client tracking operations.\n");
return ret;
}
......
......@@ -43,6 +43,7 @@
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
#include <linux/string_helpers.h>
#include <linux/fsnotify.h>
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
......@@ -2352,6 +2353,10 @@ static int client_info_show(struct seq_file *m, void *v)
memcpy(&clid, &clp->cl_clientid, sizeof(clid));
seq_printf(m, "clientid: 0x%llx\n", clid);
seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
seq_puts(m, "status: confirmed\n");
else
seq_puts(m, "status: unconfirmed\n");
seq_printf(m, "name: ");
seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
......@@ -2702,6 +2707,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
int ret;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct dentry *dentries[ARRAY_SIZE(client_files)];
clp = alloc_client(name);
if (clp == NULL)
......@@ -2721,9 +2727,11 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
clp->cl_clientid.cl_id - nn->clientid_base,
client_files);
clp->cl_nfsd_dentry = nfsd_client_mkdir(
nn, &clp->cl_nfsdfs,
clp->cl_clientid.cl_id - nn->clientid_base,
client_files, dentries);
clp->cl_nfsd_info_dentry = dentries[0];
if (!clp->cl_nfsd_dentry) {
free_client(clp);
return NULL;
......@@ -2798,7 +2806,10 @@ move_to_confirmed(struct nfs4_client *clp)
list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
add_clp_to_name_tree(clp, &nn->conf_name_tree);
set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
if (!test_and_set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags) &&
clp->cl_nfsd_dentry &&
clp->cl_nfsd_info_dentry)
fsnotify_dentry(clp->cl_nfsd_info_dentry, FS_MODIFY);
renew_client_locked(clp);
}
......@@ -2903,7 +2914,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
static void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{
struct xdr_buf *buf = resp->xdr.buf;
struct xdr_buf *buf = resp->xdr->buf;
struct nfsd4_slot *slot = resp->cstate.slot;
unsigned int base;
......@@ -2973,7 +2984,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq)
{
struct nfsd4_slot *slot = resp->cstate.slot;
struct xdr_stream *xdr = &resp->xdr;
struct xdr_stream *xdr = resp->xdr;
__be32 *p;
__be32 status;
......@@ -3708,7 +3719,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_sequence *seq = &u->sequence;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct xdr_stream *xdr = &resp->xdr;
struct xdr_stream *xdr = resp->xdr;
struct nfsd4_session *session;
struct nfs4_client *clp;
struct nfsd4_slot *slot;
......@@ -5338,6 +5349,22 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
return true;
}
struct laundry_time {
time64_t cutoff;
time64_t new_timeo;
};
static bool state_expired(struct laundry_time *lt, time64_t last_refresh)
{
time64_t time_remaining;
if (last_refresh < lt->cutoff)
return true;
time_remaining = last_refresh - lt->cutoff;
lt->new_timeo = min(lt->new_timeo, time_remaining);
return false;
}
static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
......@@ -5347,14 +5374,16 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_ol_stateid *stp;
struct nfsd4_blocked_lock *nbl;
struct list_head *pos, *next, reaplist;
time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease;
time64_t t, new_timeo = nn->nfsd4_lease;
struct laundry_time lt = {
.cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease,
.new_timeo = nn->nfsd4_lease
};
struct nfs4_cpntf_state *cps;
copy_stateid_t *cps_t;
int i;
if (clients_still_reclaiming(nn)) {
new_timeo = 0;
lt.new_timeo = 0;
goto out;
}
nfsd4_end_grace(nn);
......@@ -5364,7 +5393,7 @@ nfs4_laundromat(struct nfsd_net *nn)
idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
cps->cpntf_time < cutoff)
state_expired(&lt, cps->cpntf_time))
_free_cpntf_state_locked(nn, cps);
}
spin_unlock(&nn->s2s_cp_lock);
......@@ -5372,11 +5401,8 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
if (clp->cl_time > cutoff) {
t = clp->cl_time - cutoff;
new_timeo = min(new_timeo, t);
if (!state_expired(&lt, clp->cl_time))
break;
}
if (mark_client_expired_locked(clp)) {
trace_nfsd_clid_expired(&clp->cl_clientid);
continue;
......@@ -5393,11 +5419,8 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (dp->dl_time > cutoff) {
t = dp->dl_time - cutoff;
new_timeo = min(new_timeo, t);
if (!state_expired(&lt, dp->dl_time))
break;
}
WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, &reaplist);
}
......@@ -5413,11 +5436,8 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->close_lru)) {
oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
oo_close_lru);
if (oo->oo_time > cutoff) {
t = oo->oo_time - cutoff;
new_timeo = min(new_timeo, t);
if (!state_expired(&lt, oo->oo_time))
break;
}
list_del_init(&oo->oo_close_lru);
stp = oo->oo_last_closed_stid;
oo->oo_last_closed_stid = NULL;
......@@ -5443,11 +5463,8 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->blocked_locks_lru)) {
nbl = list_first_entry(&nn->blocked_locks_lru,
struct nfsd4_blocked_lock, nbl_lru);
if (nbl->nbl_time > cutoff) {
t = nbl->nbl_time - cutoff;
new_timeo = min(new_timeo, t);
if (!state_expired(&lt, nbl->nbl_time))
break;
}
list_move(&nbl->nbl_lru, &reaplist);
list_del_init(&nbl->nbl_list);
}
......@@ -5460,8 +5477,7 @@ nfs4_laundromat(struct nfsd_net *nn)
free_blocked_lock(nbl);
}
out:
new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
}
static struct workqueue_struct *laundry_wq;
......@@ -7321,14 +7337,9 @@ nfs4_state_start_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
ret = get_nfsdfs(net);
if (ret)
return ret;
ret = nfs4_state_create_net(net);
if (ret) {
mntput(nn->nfsd_mnt);
if (ret)
return ret;
}
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
......@@ -7398,7 +7409,6 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
mntput(nn->nfsd_mnt);
}
void
......
This diff is collapsed.
......@@ -1266,7 +1266,8 @@ static void nfsdfs_remove_files(struct dentry *root)
/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
* code instead. */
static int nfsdfs_create_files(struct dentry *root,
const struct tree_descr *files)
const struct tree_descr *files,
struct dentry **fdentries)
{
struct inode *dir = d_inode(root);
struct inode *inode;
......@@ -1275,8 +1276,6 @@ static int nfsdfs_create_files(struct dentry *root,
inode_lock(dir);
for (i = 0; files->name && files->name[0]; i++, files++) {
if (!files->name)
continue;
dentry = d_alloc_name(root, files->name);
if (!dentry)
goto out;
......@@ -1290,6 +1289,8 @@ static int nfsdfs_create_files(struct dentry *root,
inode->i_private = __get_nfsdfs_client(dir);
d_add(dentry, inode);
fsnotify_create(dir, dentry);
if (fdentries)
fdentries[i] = dentry;
}
inode_unlock(dir);
return 0;
......@@ -1301,8 +1302,9 @@ static int nfsdfs_create_files(struct dentry *root,
/* on success, returns positive number unique to that client. */
struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
struct nfsdfs_client *ncl, u32 id,
const struct tree_descr *files)
struct nfsdfs_client *ncl, u32 id,
const struct tree_descr *files,
struct dentry **fdentries)
{
struct dentry *dentry;
char name[11];
......@@ -1313,7 +1315,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
if (IS_ERR(dentry)) /* XXX: tossing errors? */
return NULL;
ret = nfsdfs_create_files(dentry, files);
ret = nfsdfs_create_files(dentry, files, fdentries);
if (ret) {
nfsd_client_rmdir(dentry);
return NULL;
......@@ -1416,6 +1418,8 @@ static void nfsd_umount(struct super_block *sb)
{
struct net *net = sb->s_fs_info;
nfsd_shutdown_threads(net);
kill_litter_super(sb);
put_net(net);
}
......@@ -1428,18 +1432,6 @@ static struct file_system_type nfsd_fs_type = {
};
MODULE_ALIAS_FS("nfsd");
int get_nfsdfs(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct vfsmount *mnt;
mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
nn->nfsd_mnt = mnt;
return 0;
}
#ifdef CONFIG_PROC_FS
static int create_proc_exports_entry(void)
{
......
......@@ -93,13 +93,12 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
int nfsd_set_nrthreads(int n, int *, struct net *);
int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
void nfsd_destroy(struct net *net);
bool i_am_nfsd(void);
int get_nfsdfs(struct net *);
struct nfsdfs_client {
struct kref cl_ref;
void (*cl_release)(struct kref *kref);
......@@ -107,7 +106,9 @@ struct nfsdfs_client {
struct nfsdfs_client *get_nfsdfs_client(struct inode *);
struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
struct nfsdfs_client *ncl, u32 id,
const struct tree_descr *,
struct dentry **fdentries);
void nfsd_client_rmdir(struct dentry *dentry);
......
......@@ -711,7 +711,7 @@ char * SVCFH_fmt(struct svc_fh *fhp)
return buf;
}
enum fsid_source fsid_source(struct svc_fh *fhp)
enum fsid_source fsid_source(const struct svc_fh *fhp)
{
if (fhp->fh_handle.fh_version != 1)
return FSIDSOURCE_DEV;
......
......@@ -82,7 +82,7 @@ enum fsid_source {
FSIDSOURCE_FSID,
FSIDSOURCE_UUID,
};
extern enum fsid_source fsid_source(struct svc_fh *fhp);
extern enum fsid_source fsid_source(const struct svc_fh *fhp);
/*
......
......@@ -151,13 +151,14 @@ nfsd_proc_readlink(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_readlinkres *resp = rqstp->rq_resp;
char *buffer = page_address(*(rqstp->rq_next_page++));
dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
/* Read the symlink. */
resp->len = NFS_MAXPATHLEN;
resp->status = nfsd_readlink(rqstp, &argp->fh, buffer, &resp->len);
resp->page = *(rqstp->rq_next_page++);
resp->status = nfsd_readlink(rqstp, &argp->fh,
page_address(resp->page), &resp->len);
fh_put(&argp->fh);
return rpc_success;
......@@ -184,6 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
......@@ -557,14 +559,27 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
struct nfsd_readdirres *resp,
int count)
{
struct xdr_buf *buf = &resp->dirlist;
struct xdr_stream *xdr = &resp->xdr;
count = min_t(u32, count, PAGE_SIZE);
/* Convert byte count to number of words (i.e. >> 2),
* and reserve room for the NULL ptr & eof flag (-2 words) */
resp->buflen = (count >> 2) - 2;
memset(buf, 0, sizeof(*buf));
resp->buffer = page_address(*rqstp->rq_next_page);
/* Reserve room for the NULL ptr & eof flag (-2 words) */
buf->buflen = count - sizeof(__be32) * 2;
buf->pages = rqstp->rq_next_page;
rqstp->rq_next_page++;
/* This is xdr_init_encode(), but it assumes that
* the head kvec has already been consumed. */
xdr_set_scratch_buffer(xdr, NULL, 0);
xdr->buf = buf;
xdr->page_ptr = buf->pages;
xdr->iov = NULL;
xdr->p = page_address(*buf->pages);
xdr->end = xdr->p + (PAGE_SIZE >> 2);
xdr->rqst = NULL;
}
/*
......@@ -576,25 +591,19 @@ nfsd_proc_readdir(struct svc_rqst *rqstp)
struct nfsd_readdirargs *argp = rqstp->rq_argp;
struct nfsd_readdirres *resp = rqstp->rq_resp;
loff_t offset;
__be32 *buffer;
dprintk("nfsd: READDIR %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->count, argp->cookie);
nfsd_init_dirlist_pages(rqstp, resp, argp->count);
buffer = resp->buffer;
resp->offset = NULL;
resp->common.err = nfs_ok;
/* Read directory and encode entries on the fly */
resp->cookie_offset = 0;
offset = argp->cookie;
resp->status = nfsd_readdir(rqstp, &argp->fh, &offset,
&resp->common, nfssvc_encode_entry);
resp->count = resp->buffer - buffer;
if (resp->offset)
*resp->offset = htonl(offset);
nfssvc_encode_nfscookie(resp, offset);
fh_put(&argp->fh);
return rpc_success;
......@@ -640,7 +649,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_GETATTR] = {
.pc_func = nfsd_proc_getattr,
.pc_decode = nfssvc_decode_fhandleargs,
.pc_encode = nfssvc_encode_attrstat,
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_attrstat),
......@@ -651,7 +660,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_SETATTR] = {
.pc_func = nfsd_proc_setattr,
.pc_decode = nfssvc_decode_sattrargs,
.pc_encode = nfssvc_encode_attrstat,
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_sattrargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
......@@ -714,7 +723,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_WRITE] = {
.pc_func = nfsd_proc_write,
.pc_decode = nfssvc_decode_writeargs,
.pc_encode = nfssvc_encode_attrstat,
.pc_encode = nfssvc_encode_attrstatres,
.pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_writeargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
......@@ -736,7 +745,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_REMOVE] = {
.pc_func = nfsd_proc_remove,
.pc_decode = nfssvc_decode_diropargs,
.pc_encode = nfssvc_encode_stat,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_diropargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
......@@ -746,7 +755,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_RENAME] = {
.pc_func = nfsd_proc_rename,
.pc_decode = nfssvc_decode_renameargs,
.pc_encode = nfssvc_encode_stat,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_renameargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
......@@ -756,7 +765,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_LINK] = {
.pc_func = nfsd_proc_link,
.pc_decode = nfssvc_decode_linkargs,
.pc_encode = nfssvc_encode_stat,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_linkargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
......@@ -766,7 +775,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_SYMLINK] = {
.pc_func = nfsd_proc_symlink,
.pc_decode = nfssvc_decode_symlinkargs,
.pc_encode = nfssvc_encode_stat,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_symlinkargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
......@@ -787,7 +796,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_RMDIR] = {
.pc_func = nfsd_proc_rmdir,
.pc_decode = nfssvc_decode_diropargs,
.pc_encode = nfssvc_encode_stat,
.pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_diropargs),
.pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
......
......@@ -84,7 +84,7 @@ DEFINE_MUTEX(nfsd_mutex);
* version 4.1 DRC caches.
* nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
*/
spinlock_t nfsd_drc_lock;
DEFINE_SPINLOCK(nfsd_drc_lock);
unsigned long nfsd_drc_max_mem;
unsigned long nfsd_drc_mem_used;
......@@ -563,7 +563,6 @@ static void set_max_drc(void)
nfsd_drc_max_mem = (nr_free_buffer_pages()
>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
nfsd_drc_mem_used = 0;
spin_lock_init(&nfsd_drc_lock);
dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
}
......@@ -596,6 +595,37 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_module = THIS_MODULE,
};
static void nfsd_complete_shutdown(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
WARN_ON(!mutex_is_locked(&nfsd_mutex));
nn->nfsd_serv = NULL;
complete(&nn->nfsd_shutdown_complete);
}
void nfsd_shutdown_threads(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_serv *serv;
mutex_lock(&nfsd_mutex);
serv = nn->nfsd_serv;
if (serv == NULL) {
mutex_unlock(&nfsd_mutex);
return;
}
svc_get(serv);
/* Kill outstanding nfsd threads */
serv->sv_ops->svo_setup(serv, NULL, 0);
nfsd_destroy(net);
mutex_unlock(&nfsd_mutex);
/* Wait for shutdown of nfsd_serv to complete */
wait_for_completion(&nn->nfsd_shutdown_complete);
}
bool i_am_nfsd(void)
{
return kthread_func(current) == nfsd;
......@@ -618,11 +648,13 @@ int nfsd_create_serv(struct net *net)
&nfsd_thread_sv_ops);
if (nn->nfsd_serv == NULL)
return -ENOMEM;
init_completion(&nn->nfsd_shutdown_complete);
nn->nfsd_serv->sv_maxconn = nn->max_connections;
error = svc_bind(nn->nfsd_serv, net);
if (error < 0) {
svc_destroy(nn->nfsd_serv);
nfsd_complete_shutdown(net);
return error;
}
......@@ -671,7 +703,7 @@ void nfsd_destroy(struct net *net)
svc_shutdown_net(nn->nfsd_serv, net);
svc_destroy(nn->nfsd_serv);
if (destroy)
nn->nfsd_serv = NULL;
nfsd_complete_shutdown(net);
}
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
......@@ -997,7 +1029,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
* NFSv4 does some encoding while processing
*/
p = resv->iov_base + resv->iov_len;
resv->iov_len += sizeof(__be32);
svcxdr_init_encode(rqstp);
*statp = proc->pc_func(rqstp);
if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
......@@ -1052,7 +1084,7 @@ int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
*/
int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
{
return xdr_ressize_check(rqstp, p);
return 1;
}
int nfsd_pool_stats_open(struct inode *inode, struct file *file)
......
This diff is collapsed.
......@@ -371,6 +371,10 @@ struct nfs4_client {
/* debugging info directory under nfsd/clients/ : */
struct dentry *cl_nfsd_dentry;
/* 'info' file within that directory. Ref is not counted,
* but will remain valid iff cl_nfsd_dentry != NULL
*/
struct dentry *cl_nfsd_info_dentry;
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
......
......@@ -391,6 +391,30 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name, \
DEFINE_NFSD_ERR_EVENT(read_err);
DEFINE_NFSD_ERR_EVENT(write_err);
TRACE_EVENT(nfsd_dirent,
TP_PROTO(struct svc_fh *fhp,
u64 ino,
const char *name,
int namlen),
TP_ARGS(fhp, ino, name, namlen),
TP_STRUCT__entry(
__field(u32, fh_hash)
__field(u64, ino)
__field(int, len)
__dynamic_array(unsigned char, name, namlen)
),
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
__entry->len = namlen;
memcpy(__get_str(name), name, namlen);
__assign_str(name, name);
),
TP_printk("fh_hash=0x%08x ino=%llu name=%.*s",
__entry->fh_hash, __entry->ino,
__entry->len, __get_str(name))
)
#include "state.h"
#include "filecache.h"
#include "vfs.h"
......
......@@ -1968,8 +1968,9 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
return 0;
}
static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
struct readdir_cd *cdp, loff_t *offsetp)
static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp,
nfsd_filldir_t func, struct readdir_cd *cdp,
loff_t *offsetp)
{
struct buffered_dirent *de;
int host_err;
......@@ -2015,6 +2016,8 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
if (cdp->err != nfs_ok)
break;
trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen);
reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
......@@ -2062,7 +2065,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
goto out_close;
}
err = nfsd_buffered_readdir(file, func, cdp, offsetp);
err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp);
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
......
......@@ -152,7 +152,7 @@ static inline void fh_drop_write(struct svc_fh *fh)
}
}
static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
{
struct path p = {.mnt = fh->fh_export->ex_path.mnt,
.dentry = fh->fh_dentry};
......
......@@ -94,6 +94,7 @@ struct nfsd_diropres {
struct nfsd_readlinkres {
__be32 status;
int len;
struct page *page;
};
struct nfsd_readres {
......@@ -101,17 +102,20 @@ struct nfsd_readres {
struct svc_fh fh;
unsigned long count;
struct kstat stat;
struct page **pages;
};
struct nfsd_readdirres {
/* Components of the reply */
__be32 status;
int count;
/* Used to encode the reply's entry list */
struct xdr_stream xdr;
struct xdr_buf dirlist;
struct readdir_cd common;
__be32 * buffer;
int buflen;
__be32 * offset;
unsigned int cookie_offset;
};
struct nfsd_statfsres {
......@@ -147,23 +151,26 @@ int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
int nfssvc_encode_stat(struct svc_rqst *, __be32 *);
int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
int nfssvc_encode_entry(void *, const char *name,
int namlen, loff_t offset, u64 ino, unsigned int);
void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
int nfssvc_encode_entry(void *data, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type);
void nfssvc_release_attrstat(struct svc_rqst *rqstp);
void nfssvc_release_diropres(struct svc_rqst *rqstp);
void nfssvc_release_readres(struct svc_rqst *rqstp);
/* Helper functions for NFSv2 ACL code */
__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp);
bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status);
bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
const struct svc_fh *fhp, const struct kstat *stat);
#endif /* LINUX_NFSD_H */
......@@ -137,6 +137,7 @@ struct nfsd3_readlinkres {
__be32 status;
struct svc_fh fh;
__u32 len;
struct page **pages;
};
struct nfsd3_readres {
......@@ -144,6 +145,7 @@ struct nfsd3_readres {
struct svc_fh fh;
unsigned long count;
__u32 eof;
struct page **pages;
};
struct nfsd3_writeres {
......@@ -167,19 +169,17 @@ struct nfsd3_linkres {
};
struct nfsd3_readdirres {
/* Components of the reply */
__be32 status;
struct svc_fh fh;
/* Just to save kmalloc on every readdirplus entry (svc_fh is a
* little large for the stack): */
struct svc_fh scratch;
int count;
__be32 verf[2];
/* Used to encode the reply's entry list */
struct xdr_stream xdr;
struct xdr_buf dirlist;
struct svc_fh scratch;
struct readdir_cd common;
__be32 * buffer;
int buflen;
__be32 * offset;
__be32 * offset1;
unsigned int cookie_offset;
struct svc_rqst * rqstp;
};
......@@ -280,9 +280,9 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *);
int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
......@@ -298,15 +298,16 @@ int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
void nfs3svc_release_fhandle(struct svc_rqst *);
void nfs3svc_release_fhandle2(struct svc_rqst *);
int nfs3svc_encode_entry(void *, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int);
int nfs3svc_encode_entry_plus(void *, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int);
void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset);
int nfs3svc_encode_entry3(void *data, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type);
int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type);
/* Helper functions for NFSv3 ACL code */
__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
struct svc_fh *fhp);
bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp);
bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status);
bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
const struct svc_fh *fhp);
#endif /* _LINUX_NFSD_XDR3_H */
......@@ -698,7 +698,7 @@ struct nfsd4_compoundargs {
struct nfsd4_compoundres {
/* scratch variables for XDR encode */
struct xdr_stream xdr;
struct xdr_stream *xdr;
struct svc_rqst * rqstp;
u32 taglen;
......
......@@ -41,5 +41,8 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
extern bool
nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt,
struct posix_acl **pacl);
extern bool
nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode,
struct posix_acl *acl, int encode_entries, int typeflag);
#endif /* __LINUX_NFSACL_H */
......@@ -248,6 +248,7 @@ struct svc_rqst {
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
struct xdr_stream rq_arg_stream;
struct xdr_stream rq_res_stream;
struct page *rq_scratch_page;
struct xdr_buf rq_res;
struct page *rq_pages[RPCSVC_MAXPAGES + 1];
......@@ -574,4 +575,28 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
}
/**
* svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding
* @rqstp: controlling server RPC transaction context
*
*/
static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct xdr_buf *buf = &rqstp->rq_res;
struct kvec *resv = buf->head;
xdr_reset_scratch_buffer(xdr);
xdr->buf = buf;
xdr->iov = resv;
xdr->p = resv->iov_base + resv->iov_len;
xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
buf->len = resv->iov_len;
xdr->page_ptr = buf->pages - 1;
buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages);
buf->buflen -= rqstp->rq_auth_slack;
xdr->rqst = NULL;
}
#endif /* SUNRPC_SVC_H */
......@@ -94,6 +94,8 @@ struct svcxprt_rdma {
spinlock_t sc_rw_ctxt_lock;
struct list_head sc_rw_ctxts;
u32 sc_pending_recvs;
u32 sc_recv_batch;
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
......@@ -104,7 +106,6 @@ struct svcxprt_rdma {
wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
unsigned long sc_flags;
struct list_head sc_read_complete_q;
struct work_struct sc_work;
struct llist_head sc_recv_ctxts;
......@@ -133,12 +134,10 @@ struct svc_rdma_recv_ctxt {
struct rpc_rdma_cid rc_cid;
struct ib_sge rc_recv_sge;
void *rc_recv_buf;
struct xdr_buf rc_arg;
struct xdr_stream rc_stream;
bool rc_temp;
u32 rc_byte_len;
unsigned int rc_page_count;
unsigned int rc_hdr_count;
u32 rc_inv_rkey;
__be32 rc_msgtype;
......@@ -148,8 +147,6 @@ struct svc_rdma_recv_ctxt {
struct svc_rdma_chunk *rc_cur_result_payload;
struct svc_rdma_pcl rc_write_pcl;
struct svc_rdma_pcl rc_reply_pcl;
struct page *rc_pages[RPCSVC_MAXPAGES];
};
struct svc_rdma_send_ctxt {
......@@ -158,12 +155,12 @@ struct svc_rdma_send_ctxt {
struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe;
struct completion sc_done;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
struct page *sc_pages[RPCSVC_MAXPAGES];
struct ib_sge sc_sges[];
};
......
......@@ -130,6 +130,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
int svc_create_xprt(struct svc_serv *, const char *, struct net *,
const int, const unsigned short, int,
const struct cred *);
void svc_xprt_received(struct svc_xprt *xprt);
void svc_xprt_do_enqueue(struct svc_xprt *xprt);
void svc_xprt_enqueue(struct svc_xprt *xprt);
void svc_xprt_put(struct svc_xprt *xprt);
......@@ -143,6 +144,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen);
void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt);
void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *);
void svc_xprt_deferred_close(struct svc_xprt *xprt);
static inline void svc_xprt_get(struct svc_xprt *xprt)
{
......
......@@ -394,6 +394,40 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
return len;
}
/**
* xdr_encode_bool - Encode a boolean item
* @p: address in a buffer into which to encode
* @n: boolean value to encode
*
* Return value:
* Address of item following the encoded boolean
*/
static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
{
*p = n ? xdr_one : xdr_zero;
return p++;
}
/**
* xdr_stream_encode_bool - Encode a boolean item
* @xdr: pointer to xdr_stream
* @n: boolean value to encode
*
* Return values:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n)
{
const size_t len = XDR_UNIT;
__be32 *p = xdr_reserve_space(xdr, len);
if (unlikely(!p))
return -EMSGSIZE;
xdr_encode_bool(p, n);
return len;
}
/**
* xdr_stream_encode_u32 - Encode a 32-bit integer
* @xdr: pointer to xdr_stream
......
......@@ -1781,6 +1781,7 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
), \
TP_ARGS(xprt))
DEFINE_SVC_XPRT_EVENT(received);
DEFINE_SVC_XPRT_EVENT(no_write_space);
DEFINE_SVC_XPRT_EVENT(close);
DEFINE_SVC_XPRT_EVENT(detach);
......
......@@ -64,13 +64,24 @@ struct nfs_fhbase_old {
* in include/linux/exportfs.h for currently registered values.
*/
struct nfs_fhbase_new {
__u8 fb_version; /* == 1, even => nfs_fhbase_old */
__u8 fb_auth_type;
__u8 fb_fsid_type;
__u8 fb_fileid_type;
__u32 fb_auth[1];
/* __u32 fb_fsid[0]; floating */
/* __u32 fb_fileid[0]; floating */
union {
struct {
__u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */
__u8 fb_auth_type_aux;
__u8 fb_fsid_type_aux;
__u8 fb_fileid_type_aux;
__u32 fb_auth[1];
/* __u32 fb_fsid[0]; floating */
/* __u32 fb_fileid[0]; floating */
};
struct {
__u8 fb_version; /* == 1, even => nfs_fhbase_old */
__u8 fb_auth_type;
__u8 fb_fsid_type;
__u8 fb_fileid_type;
__u32 fb_auth_flex[]; /* flexible-array member */
};
};
};
struct knfsd_fh {
......@@ -97,7 +108,7 @@ struct knfsd_fh {
#define fh_fsid_type fh_base.fh_new.fb_fsid_type
#define fh_auth_type fh_base.fh_new.fb_auth_type
#define fh_fileid_type fh_base.fh_new.fb_fileid_type
#define fh_fsid fh_base.fh_new.fb_auth
#define fh_fsid fh_base.fh_new.fb_auth_flex
/* Do not use, provided for userspace compatiblity. */
#define fh_auth fh_base.fh_new.fb_auth
......
......@@ -139,6 +139,20 @@ int svc_print_xprts(char *buf, int maxlen)
return len;
}
/**
* svc_xprt_deferred_close - Close a transport
* @xprt: transport instance
*
* Used in contexts that need to defer the work of shutting down
* the transport to an nfsd thread.
*/
void svc_xprt_deferred_close(struct svc_xprt *xprt)
{
if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
svc_xprt_enqueue(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_deferred_close);
static void svc_xprt_free(struct kref *kref)
{
struct svc_xprt *xprt =
......@@ -233,21 +247,25 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return xprt;
}
/*
* svc_xprt_received conditionally queues the transport for processing
* by another thread. The caller must hold the XPT_BUSY bit and must
/**
* svc_xprt_received - start next receiver thread
* @xprt: controlling transport
*
* The caller must hold the XPT_BUSY bit and must
* not thereafter touch transport data.
*
* Note: XPT_DATA only gets cleared when a read-attempt finds no (or
* insufficient) data.
*/
static void svc_xprt_received(struct svc_xprt *xprt)
void svc_xprt_received(struct svc_xprt *xprt)
{
if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) {
WARN_ONCE(1, "xprt=0x%p already busy!", xprt);
return;
}
trace_svc_xprt_received(xprt);
/* As soon as we clear busy, the xprt could be closed and
* 'put', so we need a reference to call svc_enqueue_xprt with:
*/
......@@ -257,6 +275,7 @@ static void svc_xprt_received(struct svc_xprt *xprt)
xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
svc_xprt_put(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_received);
void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
{
......@@ -801,8 +820,10 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt->xpt_cred = get_cred(xprt->xpt_cred);
svc_add_new_temp_xprt(serv, newxpt);
trace_svc_xprt_accept(newxpt, serv->sv_name);
} else
} else {
module_put(xprt->xpt_class->xcl_owner);
}
svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
......@@ -817,8 +838,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
/* clear XPT_BUSY: */
svc_xprt_received(xprt);
out:
trace_svc_handle_xprt(xprt, len);
return len;
......@@ -1229,6 +1248,7 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_xprt_hlen = dr->xprt_hlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
svc_xprt_received(rqstp->rq_xprt);
return (dr->argslen<<2) - dr->xprt_hlen;
}
......
......@@ -303,15 +303,6 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
return NULL;
}
static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
struct in6_addr *addr)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
return __ip_map_lookup(sn->ip_map_cache, class, addr);
}
static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
struct unix_domain *udom, time64_t expiry)
{
......
......@@ -519,6 +519,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
svc_xprt_received(rqstp->rq_xprt);
return len;
out_recv_err:
......@@ -527,7 +528,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
}
trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
return 0;
goto out_clear_busy;
out_cmsg_err:
net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
cmh->cmsg_level, cmh->cmsg_type);
......@@ -536,6 +537,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
local_bh_enable();
out_free:
kfree_skb(skb);
out_clear_busy:
svc_xprt_received(rqstp->rq_xprt);
return 0;
}
......@@ -728,10 +731,8 @@ static void svc_tcp_state_change(struct sock *sk)
rmb();
svsk->sk_ostate(sk);
trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
}
if (sk->sk_state != TCP_ESTABLISHED)
svc_xprt_deferred_close(&svsk->sk_xprt);
}
}
......@@ -901,7 +902,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
__func__, svsk->sk_xprt.xpt_server->sv_name,
svc_sock_reclen(svsk));
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_deferred_close(&svsk->sk_xprt);
err_short:
return -EAGAIN;
}
......@@ -1035,6 +1036,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
svc_xprt_received(rqstp->rq_xprt);
return rqstp->rq_arg.len;
err_incomplete:
......@@ -1052,13 +1054,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (len != -EAGAIN)
goto err_delete;
trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
return 0;
goto err_noclose;
err_nuts:
svsk->sk_datalen = 0;
err_delete:
trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_deferred_close(&svsk->sk_xprt);
err_noclose:
svc_xprt_received(rqstp->rq_xprt);
return 0; /* record not complete */
}
......@@ -1188,8 +1191,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
xprt->xpt_server->sv_name,
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
svc_xprt_deferred_close(xprt);
atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
......@@ -1268,7 +1270,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
case TCP_ESTABLISHED:
break;
default:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_deferred_close(&svsk->sk_xprt);
}
}
}
......
......@@ -93,7 +93,13 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
return svc_rdma_send(rdma, sctxt);
ret = svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
}
/* Server-side transport endpoint wants a whole page for its send
......
......@@ -89,8 +89,7 @@
* svc_rdma_recvfrom call returns.
*
* During the second svc_rdma_recvfrom call, RDMA Read sink pages
* are transferred from the svc_rdma_recv_ctxt to the second svc_rqst
* (see rdma_read_complete() below).
* are transferred from the svc_rdma_recv_ctxt to the second svc_rqst.
*/
#include <linux/slab.h>
......@@ -107,8 +106,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
static inline struct svc_rdma_recv_ctxt *
......@@ -230,11 +227,6 @@ struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
unsigned int i;
for (i = 0; i < ctxt->rc_page_count; i++)
put_page(ctxt->rc_pages[i]);
pcl_free(&ctxt->rc_call_pcl);
pcl_free(&ctxt->rc_read_pcl);
pcl_free(&ctxt->rc_write_pcl);
......@@ -266,33 +258,48 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
unsigned int wanted, bool temp)
{
const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt;
struct ib_recv_wr *recv_chain;
int ret;
trace_svcrdma_post_recv(ctxt);
ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
if (ret)
goto err_post;
return 0;
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
return false;
err_post:
trace_svcrdma_rq_post_err(rdma, ret);
svc_rdma_recv_ctxt_put(rdma, ctxt);
return ret;
}
recv_chain = NULL;
while (wanted--) {
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
break;
static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
trace_svcrdma_post_recv(ctxt);
ctxt->rc_temp = temp;
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
}
if (!recv_chain)
return false;
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
return 0;
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;
return __svc_rdma_post_recv(rdma, ctxt);
ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
goto err_free;
return true;
err_free:
trace_svcrdma_rq_post_err(rdma, ret);
while (bad_wr) {
ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
rc_recv_wr);
bad_wr = bad_wr->next;
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
/* Since we're destroying the xprt, no need to reset
* sc_pending_recvs. */
return false;
}
/**
......@@ -303,20 +310,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
unsigned int i;
int ret;
for (i = 0; i < rdma->sc_max_requests; i++) {
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return false;
ctxt->rc_temp = true;
ret = __svc_rdma_post_recv(rdma, ctxt);
if (ret)
return false;
}
return true;
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
}
/**
......@@ -324,8 +318,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
* @cq: Completion Queue context
* @wc: Work Completion object
*
* NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
* the Receive completion handler could be running.
*/
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
......@@ -333,6 +325,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_recv_ctxt *ctxt;
rdma->sc_pending_recvs--;
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
......@@ -340,8 +334,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto flushed;
if (svc_rdma_post_recv(rdma))
goto post_err;
/* If receive posting fails, the connection is about to be
* lost anyway. The server will not be able to send a reply
* for this RPC, and the client will retransmit this RPC
* anyway when it reconnects.
*
* Therefore we drop the Receive, even if status was SUCCESS
* to reduce the likelihood of replayed requests once the
* client reconnects.
*/
if (rdma->sc_pending_recvs < rdma->sc_max_requests)
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
goto flushed;
/* All wc fields are now known to be valid */
ctxt->rc_byte_len = wc->byte_len;
......@@ -356,10 +360,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
flushed:
post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
......@@ -371,10 +373,6 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
list_del(&ctxt->rc_list);
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
list_del(&ctxt->rc_list);
svc_rdma_recv_ctxt_put(rdma, ctxt);
......@@ -712,35 +710,6 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
return -EINVAL;
}
static void rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head)
{
int page_no;
/* Move Read chunk pages to rqstp so that they will be released
* when svc_process is done with them.
*/
for (page_no = 0; page_no < head->rc_page_count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
rqstp->rq_pages[page_no] = head->rc_pages[page_no];
}
head->rc_page_count = 0;
/* Point rq_arg.pages past header */
rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count];
rqstp->rq_arg.page_len = head->rc_arg.page_len;
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_pages[page_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Rebuild rq_arg head and tail. */
rqstp->rq_arg.head[0] = head->rc_arg.head[0];
rqstp->rq_arg.tail[0] = head->rc_arg.tail[0];
rqstp->rq_arg.len = head->rc_arg.len;
rqstp->rq_arg.buflen = head->rc_arg.buflen;
}
static void svc_rdma_send_error(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *rctxt,
int status)
......@@ -825,25 +794,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
if (ctxt) {
list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
rdma_read_complete(rqstp, ctxt);
goto complete;
}
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
if (!ctxt) {
if (ctxt)
list_del(&ctxt->rc_list);
else
/* No new incoming requests, terminate the loop */
clear_bit(XPT_DATA, &xprt->xpt_flags);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
return 0;
}
list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
percpu_counter_inc(&svcrdma_stat_recv);
/* Unblock the transport for the next receive */
svc_xprt_received(xprt);
if (!ctxt)
return 0;
percpu_counter_inc(&svcrdma_stat_recv);
ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
DMA_FROM_DEVICE);
......@@ -868,21 +834,17 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
!pcl_is_empty(&ctxt->rc_call_pcl))
goto out_readlist;
!pcl_is_empty(&ctxt->rc_call_pcl)) {
ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
if (ret < 0)
goto out_readfail;
}
complete:
rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
return rqstp->rq_arg.len;
out_readlist:
ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
if (ret < 0)
goto out_readfail;
return 0;
out_err:
svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
......
......@@ -150,6 +150,8 @@ struct svc_rdma_chunk_ctxt {
struct svcxprt_rdma *cc_rdma;
struct list_head cc_rwctxts;
int cc_sqecount;
enum ib_wc_status cc_status;
struct completion cc_done;
};
static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
......@@ -250,7 +252,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
wake_up(&rdma->sc_send_wait);
if (unlikely(wc->status != IB_WC_SUCCESS))
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_deferred_close(&rdma->sc_xprt);
svc_rdma_write_info_free(info);
}
......@@ -299,29 +301,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
struct svcxprt_rdma *rdma = cc->cc_rdma;
struct svc_rdma_read_info *info =
container_of(cc, struct svc_rdma_read_info, ri_cc);
trace_svcrdma_wc_read(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
} else {
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&info->ri_readctxt->rc_list,
&rdma->sc_read_complete_q);
/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
spin_unlock(&rdma->sc_rq_dto_lock);
svc_xprt_enqueue(&rdma->sc_xprt);
}
svc_rdma_read_info_free(info);
cc->cc_status = wc->status;
complete(&cc->cc_done);
return;
}
/* This function sleeps when the transport's Send Queue is congested.
......@@ -334,7 +322,6 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
{
struct svcxprt_rdma *rdma = cc->cc_rdma;
struct svc_xprt *xprt = &rdma->sc_xprt;
struct ib_send_wr *first_wr;
const struct ib_send_wr *bad_wr;
struct list_head *tmp;
......@@ -373,7 +360,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
} while (1);
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one was posted, there will be a completion. */
if (bad_wr != first_wr)
......@@ -677,8 +664,8 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
struct svc_rqst *rqstp = info->ri_rqst;
struct svc_rdma_rw_ctxt *ctxt;
unsigned int sge_no, seg_len, len;
struct svc_rdma_rw_ctxt *ctxt;
struct scatterlist *sg;
int ret;
......@@ -694,8 +681,6 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
seg_len = min_t(unsigned int, len,
PAGE_SIZE - info->ri_pageoff);
head->rc_arg.pages[info->ri_pageno] =
rqstp->rq_pages[info->ri_pageno];
if (!info->ri_pageoff)
head->rc_page_count++;
......@@ -789,12 +774,10 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
page_len = min_t(unsigned int, remaining,
PAGE_SIZE - info->ri_pageoff);
head->rc_arg.pages[info->ri_pageno] =
rqstp->rq_pages[info->ri_pageno];
if (!info->ri_pageoff)
head->rc_page_count++;
dst = page_address(head->rc_arg.pages[info->ri_pageno]);
dst = page_address(rqstp->rq_pages[info->ri_pageno]);
memcpy(dst + info->ri_pageno, src + offset, page_len);
info->ri_totalbytes += page_len;
......@@ -814,7 +797,7 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
* svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks
* @info: context for RDMA Reads
*
* The chunk data lands in head->rc_arg as a series of contiguous pages,
* The chunk data lands in rqstp->rq_arg as a series of contiguous pages,
* like an incoming TCP call.
*
* Return values:
......@@ -828,8 +811,8 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk, *next;
struct xdr_buf *buf = &head->rc_arg;
unsigned int start, length;
int ret;
......@@ -865,9 +848,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
buf->len += info->ri_totalbytes;
buf->buflen += info->ri_totalbytes;
head->rc_hdr_count = 1;
buf->head[0].iov_base = page_address(head->rc_pages[0]);
buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
buf->pages = &info->ri_rqst->rq_pages[1];
buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
return 0;
}
......@@ -876,9 +859,9 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
* svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks
* @info: context for RDMA Reads
*
* The chunk data lands in the page list of head->rc_arg.pages.
* The chunk data lands in the page list of rqstp->rq_arg.pages.
*
* Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
* Currently NFSD does not look at the rqstp->rq_arg.tail[0] kvec.
* Therefore, XDR round-up of the Read chunk and trailing
* inline content must both be added at the end of the pagelist.
*
......@@ -892,7 +875,7 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct xdr_buf *buf = &head->rc_arg;
struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk;
unsigned int length;
int ret;
......@@ -902,8 +885,6 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
if (ret < 0)
goto out;
head->rc_hdr_count = 0;
/* Split the Receive buffer between the head and tail
* buffers at Read chunk's position. XDR roundup of the
* chunk is not included in either the pagelist or in
......@@ -922,7 +903,8 @@ static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
* Currently these chunks always start at page offset 0,
* thus the rounded-up length never crosses a page boundary.
*/
length = XDR_QUADLEN(info->ri_totalbytes) << 2;
buf->pages = &info->ri_rqst->rq_pages[0];
length = xdr_align_size(chunk->ch_length);
buf->page_len = length;
buf->len += length;
buf->buflen += length;
......@@ -1034,8 +1016,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
* @info: context for RDMA Reads
*
* The start of the data lands in the first page just after the
* Transport header, and the rest lands in the page list of
* head->rc_arg.pages.
* Transport header, and the rest lands in rqstp->rq_arg.pages.
*
* Assumptions:
* - A PZRC is never sent in an RDMA_MSG message, though it's
......@@ -1050,8 +1031,7 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
*/
static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct xdr_buf *buf = &head->rc_arg;
struct xdr_buf *buf = &info->ri_rqst->rq_arg;
int ret;
ret = svc_rdma_read_call_chunk(info);
......@@ -1061,35 +1041,15 @@ static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
buf->len += info->ri_totalbytes;
buf->buflen += info->ri_totalbytes;
head->rc_hdr_count = 1;
buf->head[0].iov_base = page_address(head->rc_pages[0]);
buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
buf->pages = &info->ri_rqst->rq_pages[1];
buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
out:
return ret;
}
/* Pages under I/O have been copied to head->rc_pages. Ensure they
* are not released by svc_xprt_release() until the I/O is complete.
*
* This has to be done after all Read WRs are constructed to properly
* handle a page that is part of I/O on behalf of two different RDMA
* segments.
*
* Do this only if I/O has been posted. Otherwise, we do indeed want
* svc_xprt_release() to clean things up properly.
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
const unsigned int start,
const unsigned int num_pages)
{
unsigned int i;
for (i = start; i < num_pages + start; i++)
rqstp->rq_pages[i] = NULL;
}
/**
* svc_rdma_process_read_list - Pull list of Read chunks from the client
* @rdma: controlling RDMA transport
......@@ -1121,18 +1081,6 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc;
int ret;
/* The request (with page list) is constructed in
* head->rc_arg. Pages involved with RDMA Read I/O are
* transferred there.
*/
head->rc_arg.head[0] = rqstp->rq_arg.head[0];
head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
head->rc_arg.pages = head->rc_pages;
head->rc_arg.page_base = 0;
head->rc_arg.page_len = 0;
head->rc_arg.len = rqstp->rq_arg.len;
head->rc_arg.buflen = rqstp->rq_arg.buflen;
info = svc_rdma_read_info_alloc(rdma);
if (!info)
return -ENOMEM;
......@@ -1154,11 +1102,22 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
goto out_err;
trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount);
init_completion(&cc->cc_done);
ret = svc_rdma_post_chunk_ctxt(cc);
if (ret < 0)
goto out_err;
svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count);
return 1;
ret = 1;
wait_for_completion(&cc->cc_done);
if (cc->cc_status != IB_WC_SUCCESS)
ret = -EIO;
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */
head->rc_page_count = 0;
out_err:
svc_rdma_read_info_free(info);
......
......@@ -111,8 +111,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
static inline struct svc_rdma_send_ctxt *
......@@ -157,6 +155,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
......@@ -220,7 +219,6 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
return ctxt;
out_empty:
......@@ -235,8 +233,6 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
* svc_rdma_send_ctxt_put - Return send_ctxt to free list
* @rdma: controlling svcxprt_rdma
* @ctxt: object to return to the free list
*
* Pages left in sc_pages are DMA unmapped and released.
*/
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
......@@ -257,9 +253,6 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
ctxt->sc_sges[i].length);
}
for (i = 0; i < ctxt->sc_page_count; ++i)
put_page(ctxt->sc_pages[i]);
spin_lock(&rdma->sc_send_lock);
list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
spin_unlock(&rdma->sc_send_lock);
......@@ -282,15 +275,13 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
complete(&ctxt->sc_done);
atomic_inc(&rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
svc_rdma_send_ctxt_put(rdma, ctxt);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
......@@ -298,7 +289,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
* @rdma: transport on which to post the WR
* @ctxt: send ctxt with a Send WR ready to post
*
* Returns zero the Send WR was posted successfully. Otherwise, a
* Returns zero if the Send WR was posted successfully. Otherwise, a
* negative errno is returned.
*/
int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
......@@ -306,7 +297,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
might_sleep();
reinit_completion(&ctxt->sc_done);
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
......@@ -336,7 +327,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
}
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_deferred_close(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
}
......@@ -795,25 +786,6 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
svc_rdma_xb_dma_map, &args);
}
/* The svc_rqst and all resources it owns are released as soon as
* svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
* so they are released by the Send completion handler.
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
struct svc_rdma_send_ctxt *ctxt)
{
int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
ctxt->sc_page_count += pages;
for (i = 0; i < pages; i++) {
ctxt->sc_pages[i] = rqstp->rq_respages[i];
rqstp->rq_respages[i] = NULL;
}
/* Prevent svc_xprt_release from releasing pages in rq_pages */
rqstp->rq_next_page = rqstp->rq_respages;
}
/* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
......@@ -843,15 +815,20 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
return svc_rdma_send(rdma, sctxt);
ret = svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
}
/**
......@@ -917,7 +894,8 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt))
goto put_ctxt;
return;
wait_for_completion_killable(&sctxt->sc_done);
put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
......@@ -979,16 +957,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
goto err1;
/* Prevent svc_xprt_release() from releasing the page backing
* rq_res.head[0].iov_base. It's no longer being accessed by
* the I/O device. */
rqstp->rq_respages++;
return 0;
err2:
if (ret != -E2BIG && ret != -EINVAL)
goto err1;
/* Send completion releases payload pages that were part
* of previously posted RDMA Writes.
*/
svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
......@@ -996,7 +975,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
svc_rdma_send_ctxt_put(rdma, sctxt);
err0:
trace_svcrdma_send_err(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_deferred_close(&rdma->sc_xprt);
return -ENOTCONN;
}
......
......@@ -119,8 +119,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
case IB_EVENT_QP_ACCESS_ERR:
case IB_EVENT_DEVICE_FATAL:
default:
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
svc_xprt_deferred_close(xprt);
break;
}
}
......@@ -137,7 +136,6 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
......@@ -279,12 +277,14 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED:
clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
/* Handle any requests that were received while
* CONN_PENDING was set. */
svc_xprt_enqueue(xprt);
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
svc_xprt_deferred_close(xprt);
break;
default:
break;
......@@ -404,11 +404,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests;
newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
newxprt->sc_recv_batch;
if (rq_depth > dev->attrs.max_qp_wr) {
pr_warn("svcrdma: reducing receive depth to %d\n",
dev->attrs.max_qp_wr);
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment