Commit f7976a64 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:

 - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES

 - Better NUMA awareness when allocating pages and other objects

 - A number of minor clean-ups to XDR encoding

 - Elimination of a race when accepting a TCP socket

 - Numerous observability enhancements

* tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (46 commits)
  nfsd: remove redundant assignments to variable len
  svcrdma: Fix stale comment
  NFSD: Distinguish per-net namespace initialization
  nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net
  SUNRPC: Address RCU warning in net/sunrpc/svc.c
  SUNRPC: Use sysfs_emit in place of strlcpy/sprintf
  SUNRPC: Remove transport class dprintk call sites
  SUNRPC: Fix comments for transport class registration
  svcrdma: Remove an unused argument from __svc_rdma_put_rw_ctxt()
  svcrdma: trace cc_release calls
  svcrdma: Convert "might sleep" comment into a code annotation
  NFSD: Add an nfsd4_encode_nfstime4() helper
  SUNRPC: Move initialization of rq_stime
  SUNRPC: Optimize page release in svc_rdma_sendto()
  svcrdma: Prevent page release when nothing was received
  svcrdma: Revert 2a1e4f21 ("svcrdma: Normalize Send page handling")
  SUNRPC: Revert 57990067 ("svcrdma: Remove unused sc_pages field")
  SUNRPC: Revert cc93ce95 ("svcrdma: Retain the page backing rq_res.head[0].iov_base")
  NFSD: add encoding of op_recall flag for write delegation
  NFSD: Add "official" reviewers for this subsystem
  ...
parents c0a572d9 75bfb704
...@@ -183,6 +183,8 @@ Henrik Rydberg <rydberg@bitmath.org> ...@@ -183,6 +183,8 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au> Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com> Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn> Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com> Jacob Shin <Jacob.Shin@amd.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com> Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com> Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>
......
...@@ -11275,6 +11275,10 @@ W: http://kernelnewbies.org/KernelJanitors ...@@ -11275,6 +11275,10 @@ W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com> M: Chuck Lever <chuck.lever@oracle.com>
M: Jeff Layton <jlayton@kernel.org> M: Jeff Layton <jlayton@kernel.org>
R: Neil Brown <neilb@suse.de>
R: Olga Kornievskaia <kolga@netapp.com>
R: Dai Ngo <Dai.Ngo@oracle.com>
R: Tom Talpey <tom@talpey.com>
L: linux-nfs@vger.kernel.org L: linux-nfs@vger.kernel.org
S: Supported S: Supported
W: http://nfs.sourceforge.net/ W: http://nfs.sourceforge.net/
......
...@@ -355,7 +355,6 @@ static int lockd_get(void) ...@@ -355,7 +355,6 @@ static int lockd_get(void)
int error; int error;
if (nlmsvc_serv) { if (nlmsvc_serv) {
svc_get(nlmsvc_serv);
nlmsvc_users++; nlmsvc_users++;
return 0; return 0;
} }
......
...@@ -80,6 +80,8 @@ enum { ...@@ -80,6 +80,8 @@ enum {
int nfsd_drc_slab_create(void); int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void); void nfsd_drc_slab_free(void);
int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *); int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *); int nfsd_cache_lookup(struct svc_rqst *);
......
...@@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) ...@@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out; goto out;
err = -EINVAL; err = -EINVAL;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out; goto out;
err = -ENOENT; err = -ENOENT;
...@@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) ...@@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("found domain %s\n", buf); dprintk("found domain %s\n", buf);
err = -EINVAL; err = -EINVAL;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out; goto out;
fsidtype = simple_strtoul(buf, &ep, 10); fsidtype = simple_strtoul(buf, &ep, 10);
if (*ep) if (*ep)
...@@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) ...@@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{ {
/* client path expiry [flags anonuid anongid fsid] */ /* client path expiry [flags anonuid anongid fsid] */
char *buf; char *buf;
int len;
int err; int err;
struct auth_domain *dom = NULL; struct auth_domain *dom = NULL;
struct svc_export exp = {}, *expp; struct svc_export exp = {}, *expp;
...@@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) ...@@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* client */ /* client */
err = -EINVAL; err = -EINVAL;
len = qword_get(&mesg, buf, PAGE_SIZE); if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
if (len <= 0)
goto out; goto out;
err = -ENOENT; err = -ENOENT;
...@@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) ...@@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* path */ /* path */
err = -EINVAL; err = -EINVAL;
if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out1; goto out1;
err = kern_path(buf, 0, &exp.ex_path); err = kern_path(buf, 0, &exp.ex_path);
...@@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) ...@@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out3; goto out3;
exp.ex_fsid = an_int; exp.ex_fsid = an_int;
while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { while (qword_get(&mesg, buf, PAGE_SIZE) > 0) {
if (strcmp(buf, "fsloc") == 0) if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0) else if (strcmp(buf, "uuid") == 0)
......
...@@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) ...@@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{ {
struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp; struct nfsd3_readres *resp = rqstp->rq_resp;
unsigned int len;
int v;
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
...@@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) ...@@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
if (argp->offset + argp->count > (u64)OFFSET_MAX) if (argp->offset + argp->count > (u64)OFFSET_MAX)
argp->count = (u64)OFFSET_MAX - argp->offset; argp->count = (u64)OFFSET_MAX - argp->offset;
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page; resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
/* Obtain buffer pointer for payload. /* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
...@@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) ...@@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, v, &resp->count, &resp->eof); &resp->count, &resp->eof);
return rpc_success; return rpc_success;
} }
......
...@@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) ...@@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (xdr_stream_encode_u32(xdr, resp->len) < 0) if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false; return false;
xdr_write_pages(xdr, resp->pages, 0, resp->len); svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false; return false;
break; break;
...@@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) ...@@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0) if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false; return false;
xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
resp->count); rqstp->rq_res.page_base,
resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false; return false;
break; break;
...@@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) ...@@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
return false; return false;
xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
dirlist->len);
/* no more entries */ /* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0) if (xdr_stream_encode_item_absent(xdr) < 0)
return false; return false;
......
This diff is collapsed.
...@@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void) ...@@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab); kmem_cache_destroy(drc_slab);
} }
static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) /**
* nfsd_net_reply_cache_init - per net namespace reply cache set-up
* @nn: nfsd_net being initialized
*
* Returns zero on succes; otherwise a negative errno is returned.
*/
int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{ {
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
} }
static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) /**
* nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
* @nn: nfsd_net being freed
*
*/
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{ {
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
} }
...@@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) ...@@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
hashsize = nfsd_hashsize(nn->max_drc_entries); hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize); nn->maskbits = ilog2(hashsize);
status = nfsd_reply_cache_stats_init(nn);
if (status)
goto out_nomem;
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1; nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker, status = register_shrinker(&nn->nfsd_reply_cache_shrinker,
"nfsd-reply:%s", nn->nfsd_name); "nfsd-reply:%s", nn->nfsd_name);
if (status) if (status)
goto out_stats_destroy; return status;
nn->drc_hashtbl = kvzalloc(array_size(hashsize, nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL); sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
...@@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) ...@@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0; return 0;
out_shrinker: out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker); unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_stats_destroy:
nfsd_reply_cache_stats_destroy(nn);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM; return -ENOMEM;
} }
...@@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) ...@@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn); rp, nn);
} }
} }
nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl); kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL; nn->drc_hashtbl = NULL;
......
This diff is collapsed.
...@@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp) ...@@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
inode = d_inode(fhp->fh_dentry); inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat); err = fh_getattr(fhp, &stat);
if (err) { if (err)
/* Grab the times from inode anyway */ return;
stat.mtime = inode->i_mtime;
stat.ctime = inode->i_ctime;
stat.size = inode->i_size;
if (v4 && IS_I_VERSION(inode)) {
stat.change_cookie = inode_query_iversion(inode);
stat.result_mask |= STATX_CHANGE_COOKIE;
}
}
if (v4) if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
...@@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp) ...@@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
printk("nfsd: inode locked twice during operation.\n"); printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr); err = fh_getattr(fhp, &fhp->fh_post_attr);
if (err) { if (err)
fhp->fh_post_saved = false; return;
fhp->fh_post_attr.ctime = inode->i_ctime;
if (v4 && IS_I_VERSION(inode)) { fhp->fh_post_saved = true;
fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
}
} else
fhp->fh_post_saved = true;
if (v4) if (v4)
fhp->fh_post_change = fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode); nfsd4_change_attribute(&fhp->fh_post_attr, inode);
......
...@@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) ...@@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
{ {
struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp; struct nfsd_readres *resp = rqstp->rq_resp;
unsigned int len;
u32 eof; u32 eof;
int v;
dprintk("nfsd: READ %s %d bytes at %d\n", dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
...@@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) ...@@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
v = 0;
len = argp->count;
resp->pages = rqstp->rq_next_page; resp->pages = rqstp->rq_next_page;
while (len > 0) {
struct page *page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page);
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
/* Obtain buffer pointer for payload. 19 is 1 word for /* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count. * status, 17 words for fattr, and 1 word for the byte count.
...@@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) ...@@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
resp->count = argp->count; resp->count = argp->count;
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, v, &resp->count, &eof); &resp->count, &eof);
if (resp->status == nfs_ok) if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat); resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox) else if (resp->status == nfserr_jukebox)
......
...@@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn) ...@@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn)
write_sequnlock(&nn->writeverf_lock); write_sequnlock(&nn->writeverf_lock);
} }
/*
* Crank up a set of per-namespace resources for a new NFSD instance,
* including lockd, a duplicate reply cache, an open file cache
* instance, and a cache of NFSv4 state objects.
*/
static int nfsd_startup_net(struct net *net, const struct cred *cred) static int nfsd_startup_net(struct net *net, const struct cred *cred)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
......
...@@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) ...@@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
case nfs_ok: case nfs_ok:
if (xdr_stream_encode_u32(xdr, resp->len) < 0) if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false; return false;
xdr_write_pages(xdr, &resp->page, 0, resp->len); svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false; return false;
break; break;
...@@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) ...@@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0) if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false; return false;
xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
resp->count); rqstp->rq_res.page_base,
resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false; return false;
break; break;
...@@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) ...@@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false; return false;
switch (resp->status) { switch (resp->status) {
case nfs_ok: case nfs_ok:
xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
dirlist->len);
/* no more entries */ /* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0) if (xdr_stream_encode_item_absent(xdr) < 0)
return false; return false;
......
...@@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done, ...@@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done,
) )
); );
TRACE_EVENT(nfsd_ctl_unlock_ip,
TP_PROTO(
const struct net *net,
const char *address
),
TP_ARGS(net, address),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(address, address)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(address, address);
),
TP_printk("address=%s",
__get_str(address)
)
);
TRACE_EVENT(nfsd_ctl_unlock_fs,
TP_PROTO(
const struct net *net,
const char *path
),
TP_ARGS(net, path),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(path, path)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(path, path);
),
TP_printk("path=%s",
__get_str(path)
)
);
TRACE_EVENT(nfsd_ctl_filehandle,
TP_PROTO(
const struct net *net,
const char *domain,
const char *path,
int maxsize
),
TP_ARGS(net, domain, path, maxsize),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, maxsize)
__string(domain, domain)
__string(path, path)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxsize = maxsize;
__assign_str(domain, domain);
__assign_str(path, path);
),
TP_printk("domain=%s path=%s maxsize=%d",
__get_str(domain), __get_str(path), __entry->maxsize
)
);
TRACE_EVENT(nfsd_ctl_threads,
TP_PROTO(
const struct net *net,
int newthreads
),
TP_ARGS(net, newthreads),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, newthreads)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->newthreads = newthreads;
),
TP_printk("newthreads=%d",
__entry->newthreads
)
);
TRACE_EVENT(nfsd_ctl_pool_threads,
TP_PROTO(
const struct net *net,
int pool,
int nrthreads
),
TP_ARGS(net, pool, nrthreads),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, pool)
__field(int, nrthreads)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->pool = pool;
__entry->nrthreads = nrthreads;
),
TP_printk("pool=%d nrthreads=%d",
__entry->pool, __entry->nrthreads
)
);
TRACE_EVENT(nfsd_ctl_version,
TP_PROTO(
const struct net *net,
const char *mesg
),
TP_ARGS(net, mesg),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(mesg, mesg)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(mesg, mesg);
),
TP_printk("%s",
__get_str(mesg)
)
);
TRACE_EVENT(nfsd_ctl_ports_addfd,
TP_PROTO(
const struct net *net,
int fd
),
TP_ARGS(net, fd),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, fd)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->fd = fd;
),
TP_printk("fd=%d",
__entry->fd
)
);
TRACE_EVENT(nfsd_ctl_ports_addxprt,
TP_PROTO(
const struct net *net,
const char *transport,
int port
),
TP_ARGS(net, transport, port),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, port)
__string(transport, transport)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->port = port;
__assign_str(transport, transport);
),
TP_printk("transport=%s port=%d",
__get_str(transport), __entry->port
)
);
TRACE_EVENT(nfsd_ctl_maxblksize,
TP_PROTO(
const struct net *net,
int bsize
),
TP_ARGS(net, bsize),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, bsize)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->bsize = bsize;
),
TP_printk("bsize=%d",
__entry->bsize
)
);
TRACE_EVENT(nfsd_ctl_maxconn,
TP_PROTO(
const struct net *net,
int maxconn
),
TP_ARGS(net, maxconn),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, maxconn)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxconn = maxconn;
),
TP_printk("maxconn=%d",
__entry->maxconn
)
);
TRACE_EVENT(nfsd_ctl_time,
TP_PROTO(
const struct net *net,
const char *name,
size_t namelen,
int time
),
TP_ARGS(net, name, namelen, time),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(int, time)
__string_len(name, name, namelen)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->time = time;
__assign_str_len(name, name, namelen);
),
TP_printk("file=%s time=%d\n",
__get_str(name), __entry->time
)
);
TRACE_EVENT(nfsd_ctl_recoverydir,
TP_PROTO(
const struct net *net,
const char *recdir
),
TP_ARGS(net, recdir),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__string(recdir, recdir)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__assign_str(recdir, recdir);
),
TP_printk("recdir=%s",
__get_str(recdir)
)
);
TRACE_EVENT(nfsd_end_grace,
TP_PROTO(
const struct net *net
),
TP_ARGS(net),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
),
TP_printk("nn=%d", __entry->netns_ino
)
);
#endif /* _NFSD_TRACE_H */ #endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
...@@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) ...@@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_mode &= ~S_ISGID; iap->ia_mode &= ~S_ISGID;
} else { } else {
/* set ATTR_KILL_* bits and let VFS handle it */ /* set ATTR_KILL_* bits and let VFS handle it */
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); iap->ia_valid |= ATTR_KILL_SUID;
iap->ia_valid |=
setattr_should_drop_sgid(&nop_mnt_idmap, inode);
} }
} }
} }
...@@ -1001,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1001,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
} }
} }
/**
* nfsd_splice_read - Perform a VFS read using a splice pipe
* @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @file: opened struct file of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @eof: OUT: set non-zero if operation reached the end of the file
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count, struct file *file, loff_t offset, unsigned long *count,
u32 *eof) u32 *eof)
...@@ -1014,22 +1028,50 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1014,22 +1028,50 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err; ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count); trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, /**
struct file *file, loff_t offset, * nfsd_iter_read - Perform a VFS read using an iterator
struct kvec *vec, int vlen, unsigned long *count, * @rqstp: RPC transaction context
u32 *eof) * @fhp: file handle of file to be read
* @file: opened struct file of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @base: offset in first page of read buffer
* @eof: OUT: set non-zero if operation reached the end of the file
*
* Some filesystems or situations cannot use nfsd_splice_read. This
* function is the slightly less-performant fallback for those cases.
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/
__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count,
unsigned int base, u32 *eof)
{ {
unsigned long v, total;
struct iov_iter iter; struct iov_iter iter;
loff_t ppos = offset; loff_t ppos = offset;
struct page *page;
ssize_t host_err; ssize_t host_err;
v = 0;
total = *count;
while (total) {
page = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(page) + base;
rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
total -= rqstp->rq_vec[v].iov_len;
++v;
base = 0;
}
WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
trace_nfsd_read_vector(rqstp, fhp, offset, *count); trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count); iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0); host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
} }
...@@ -1159,14 +1201,24 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, ...@@ -1159,14 +1201,24 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
return nfserr; return nfserr;
} }
/* /**
* Read data from a file. count must contain the requested read count * nfsd_read - Read data from a file
* on entry. On return, *count contains the number of bytes actually read. * @rqstp: RPC transaction context
* @fhp: file handle of file to be read
* @offset: starting byte offset
* @count: IN: requested number of bytes; OUT: number of bytes read
* @eof: OUT: set non-zero if operation reached the end of the file
*
* The caller must verify that there is enough space in @rqstp.rq_res
* to perform this operation.
*
* N.B. After this call fhp needs an fh_put * N.B. After this call fhp needs an fh_put
*
* Returns nfs_ok on success, otherwise an nfserr stat value is
* returned.
*/ */
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count, loff_t offset, unsigned long *count, u32 *eof)
u32 *eof)
{ {
struct nfsd_file *nf; struct nfsd_file *nf;
struct file *file; struct file *file;
...@@ -1181,12 +1233,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1181,12 +1233,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else else
err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof); err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
nfsd_file_put(nf); nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count); trace_nfsd_read_done(rqstp, fhp, offset, *count);
return err; return err;
} }
......
...@@ -110,13 +110,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -110,13 +110,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
unsigned long *count, unsigned long *count,
u32 *eof); u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *count, unsigned int base,
unsigned long *count,
u32 *eof); u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t, struct kvec *, int, unsigned long *, loff_t offset, unsigned long *count,
u32 *eof); u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *, struct kvec *, int, unsigned long *,
......
...@@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) ...@@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
xdr->rqst = NULL; xdr->rqst = NULL;
} }
/**
* svcxdr_encode_opaque_pages - Insert pages into an xdr_stream
* @xdr: xdr_stream to be updated
* @pages: array of pages to insert
* @base: starting offset of first data byte in @pages
* @len: number of data bytes in @pages to insert
*
* After the @pages are added, the tail iovec is instantiated pointing
* to end of the head buffer, and the stream is set up to encode
* subsequent items into the tail.
*/
static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp,
struct xdr_stream *xdr,
struct page **pages,
unsigned int base,
unsigned int len)
{
xdr_write_pages(xdr, pages, base, len);
xdr->page_ptr = rqstp->rq_next_page - 1;
}
/** /**
* svcxdr_set_auth_slack - * svcxdr_set_auth_slack -
* @rqstp: RPC transaction * @rqstp: RPC transaction
......
...@@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt { ...@@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge; struct ib_sge rc_recv_sge;
void *rc_recv_buf; void *rc_recv_buf;
struct xdr_stream rc_stream; struct xdr_stream rc_stream;
bool rc_temp;
u32 rc_byte_len; u32 rc_byte_len;
unsigned int rc_page_count; unsigned int rc_page_count;
u32 rc_inv_rkey; u32 rc_inv_rkey;
...@@ -155,12 +154,12 @@ struct svc_rdma_send_ctxt { ...@@ -155,12 +154,12 @@ struct svc_rdma_send_ctxt {
struct ib_send_wr sc_send_wr; struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe; struct ib_cqe sc_cqe;
struct completion sc_done;
struct xdr_buf sc_hdrbuf; struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream; struct xdr_stream sc_stream;
void *sc_xprt_buf; void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no; int sc_cur_sge_no;
struct page *sc_pages[RPCSVC_MAXPAGES];
struct ib_sge sc_sges[]; struct ib_sge sc_sges[];
}; };
......
...@@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, ...@@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, struct rpc_rqst *rqst); struct page **pages, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes);
size_t nbytes);
extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void __xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len);
......
...@@ -2112,6 +2112,14 @@ DEFINE_POST_CHUNK_EVENT(read); ...@@ -2112,6 +2112,14 @@ DEFINE_POST_CHUNK_EVENT(read);
DEFINE_POST_CHUNK_EVENT(write); DEFINE_POST_CHUNK_EVENT(write);
DEFINE_POST_CHUNK_EVENT(reply); DEFINE_POST_CHUNK_EVENT(reply);
DEFINE_EVENT(svcrdma_post_chunk_class, svcrdma_cc_release,
TP_PROTO(
const struct rpc_rdma_cid *cid,
int sqecount
),
TP_ARGS(cid, sqecount)
);
TRACE_EVENT(svcrdma_wc_read, TRACE_EVENT(svcrdma_wc_read,
TP_PROTO( TP_PROTO(
const struct ib_wc *wc, const struct ib_wc *wc,
......
...@@ -2104,31 +2104,46 @@ DEFINE_SVC_DEFERRED_EVENT(drop); ...@@ -2104,31 +2104,46 @@ DEFINE_SVC_DEFERRED_EVENT(drop);
DEFINE_SVC_DEFERRED_EVENT(queue); DEFINE_SVC_DEFERRED_EVENT(queue);
DEFINE_SVC_DEFERRED_EVENT(recv); DEFINE_SVC_DEFERRED_EVENT(recv);
TRACE_EVENT(svcsock_new_socket, DECLARE_EVENT_CLASS(svcsock_lifetime_class,
TP_PROTO( TP_PROTO(
const void *svsk,
const struct socket *socket const struct socket *socket
), ),
TP_ARGS(svsk, socket),
TP_ARGS(socket),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__field(const void *, svsk)
__field(const void *, sk)
__field(unsigned long, type) __field(unsigned long, type)
__field(unsigned long, family) __field(unsigned long, family)
__field(bool, listener) __field(unsigned long, state)
), ),
TP_fast_assign( TP_fast_assign(
struct sock *sk = socket->sk;
__entry->netns_ino = sock_net(sk)->ns.inum;
__entry->svsk = svsk;
__entry->sk = sk;
__entry->type = socket->type; __entry->type = socket->type;
__entry->family = socket->sk->sk_family; __entry->family = sk->sk_family;
__entry->listener = (socket->sk->sk_state == TCP_LISTEN); __entry->state = sk->sk_state;
), ),
TP_printk("svsk=%p type=%s family=%s%s",
TP_printk("type=%s family=%s%s", __entry->svsk, show_socket_type(__entry->type),
show_socket_type(__entry->type),
rpc_show_address_family(__entry->family), rpc_show_address_family(__entry->family),
__entry->listener ? " (listener)" : "" __entry->state == TCP_LISTEN ? " (listener)" : ""
) )
); );
#define DEFINE_SVCSOCK_LIFETIME_EVENT(name) \
DEFINE_EVENT(svcsock_lifetime_class, name, \
TP_PROTO( \
const void *svsk, \
const struct socket *socket \
), \
TP_ARGS(svsk, socket))
DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_new);
DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_free);
TRACE_EVENT(svcsock_marker, TRACE_EVENT(svcsock_marker,
TP_PROTO( TP_PROTO(
......
...@@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) ...@@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
switch (*ip) switch (*ip)
{ {
case SVC_POOL_AUTO: case SVC_POOL_AUTO:
return strlcpy(buf, "auto\n", 20); return sysfs_emit(buf, "auto\n");
case SVC_POOL_GLOBAL: case SVC_POOL_GLOBAL:
return strlcpy(buf, "global\n", 20); return sysfs_emit(buf, "global\n");
case SVC_POOL_PERCPU: case SVC_POOL_PERCPU:
return strlcpy(buf, "percpu\n", 20); return sysfs_emit(buf, "percpu\n");
case SVC_POOL_PERNODE: case SVC_POOL_PERNODE:
return strlcpy(buf, "pernode\n", 20); return sysfs_emit(buf, "pernode\n");
default: default:
return sprintf(buf, "%d\n", *ip); return sysfs_emit(buf, "%d\n", *ip);
} }
} }
...@@ -597,34 +597,25 @@ svc_destroy(struct kref *ref) ...@@ -597,34 +597,25 @@ svc_destroy(struct kref *ref)
} }
EXPORT_SYMBOL_GPL(svc_destroy); EXPORT_SYMBOL_GPL(svc_destroy);
/* static bool
* Allocate an RPC server's buffer space.
* We allocate pages and place them in rq_pages.
*/
static int
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
{ {
unsigned int pages, arghi; unsigned long pages, ret;
/* bc_xprt uses fore channel allocated buffers */ /* bc_xprt uses fore channel allocated buffers */
if (svc_is_backchannel(rqstp)) if (svc_is_backchannel(rqstp))
return 1; return true;
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
* We assume one is at most one page * We assume one is at most one page
*/ */
arghi = 0;
WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
if (pages > RPCSVC_MAXPAGES) if (pages > RPCSVC_MAXPAGES)
pages = RPCSVC_MAXPAGES; pages = RPCSVC_MAXPAGES;
while (pages) {
struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
if (!p) rqstp->rq_pages);
break; return ret == pages;
rqstp->rq_pages[arghi++] = p;
pages--;
}
return pages == 0;
} }
/* /*
...@@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi ...@@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
*/ */
static void svc_unregister(const struct svc_serv *serv, struct net *net) static void svc_unregister(const struct svc_serv *serv, struct net *net)
{ {
struct sighand_struct *sighand;
struct svc_program *progp; struct svc_program *progp;
unsigned long flags; unsigned long flags;
unsigned int i; unsigned int i;
...@@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) ...@@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
} }
} }
spin_lock_irqsave(&current->sighand->siglock, flags); rcu_read_lock();
sighand = rcu_dereference(current->sighand);
spin_lock_irqsave(&sighand->siglock, flags);
recalc_sigpending(); recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags); spin_unlock_irqrestore(&sighand->siglock, flags);
rcu_read_unlock();
} }
/* /*
......
...@@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list); ...@@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will * that no other thread will be using the transport or will
* try to set XPT_DEAD. * try to set XPT_DEAD.
*/ */
/**
* svc_reg_xprt_class - Register a server-side RPC transport class
* @xcl: New transport class to be registered
*
* Returns zero on success; otherwise a negative errno is returned.
*/
int svc_reg_xprt_class(struct svc_xprt_class *xcl) int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{ {
struct svc_xprt_class *cl; struct svc_xprt_class *cl;
int res = -EEXIST; int res = -EEXIST;
dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
INIT_LIST_HEAD(&xcl->xcl_list); INIT_LIST_HEAD(&xcl->xcl_list);
spin_lock(&svc_xprt_class_lock); spin_lock(&svc_xprt_class_lock);
/* Make sure there isn't already a class with the same name */ /* Make sure there isn't already a class with the same name */
...@@ -96,9 +101,13 @@ int svc_reg_xprt_class(struct svc_xprt_class *xcl) ...@@ -96,9 +101,13 @@ int svc_reg_xprt_class(struct svc_xprt_class *xcl)
} }
EXPORT_SYMBOL_GPL(svc_reg_xprt_class); EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
/**
* svc_unreg_xprt_class - Unregister a server-side RPC transport class
* @xcl: Transport class to be unregistered
*
*/
void svc_unreg_xprt_class(struct svc_xprt_class *xcl) void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
{ {
dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
spin_lock(&svc_xprt_class_lock); spin_lock(&svc_xprt_class_lock);
list_del_init(&xcl->xcl_list); list_del_init(&xcl->xcl_list);
spin_unlock(&svc_xprt_class_lock); spin_unlock(&svc_xprt_class_lock);
...@@ -685,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) ...@@ -685,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
} }
for (filled = 0; filled < pages; filled = ret) { for (filled = 0; filled < pages; filled = ret) {
ret = alloc_pages_bulk_array(GFP_KERNEL, pages, ret = alloc_pages_bulk_array_node(GFP_KERNEL,
rqstp->rq_pages); rqstp->rq_pool->sp_id,
pages, rqstp->rq_pages);
if (ret > filled) if (ret > filled)
/* Made progress, don't sleep yet */ /* Made progress, don't sleep yet */
continue; continue;
...@@ -843,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) ...@@ -843,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_xprt_received(xprt); svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) { } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */ /* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
kref_read(&xprt->xpt_ref));
rqstp->rq_deferred = svc_deferred_dequeue(xprt); rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred) if (rqstp->rq_deferred)
len = svc_deferred_recv(rqstp); len = svc_deferred_recv(rqstp);
else else
len = xprt->xpt_ops->xpo_recvfrom(rqstp); len = xprt->xpt_ops->xpo_recvfrom(rqstp);
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg; rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else } else
...@@ -894,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -894,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
err = -EAGAIN; err = -EAGAIN;
if (len <= 0) if (len <= 0)
goto out_release; goto out_release;
trace_svc_xdr_recvfrom(&rqstp->rq_arg); trace_svc_xdr_recvfrom(&rqstp->rq_arg);
clear_bit(XPT_OLD, &xprt->xpt_flags); clear_bit(XPT_OLD, &xprt->xpt_flags);
...@@ -902,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -902,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats) if (serv->sv_stats)
serv->sv_stats->netcnt++; serv->sv_stats->netcnt++;
rqstp->rq_stime = ktime_get();
return len; return len;
out_release: out_release:
rqstp->rq_res.len = 0; rqstp->rq_res.len = 0;
......
...@@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) ...@@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
trace_sk_data_ready(sk); trace_sk_data_ready(sk);
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
}
/* /*
* This callback may called twice when a new connection * This callback may called twice when a new connection
* is established as a child socket inherits everything * is established as a child socket inherits everything
...@@ -840,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk) ...@@ -840,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED. * when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called * 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted. * when it receives data before the socket is accepted.
* In case of 2, we should ignore it silently. * In case of 2, we should ignore it silently and DO NOT
* dereference svsk.
*/ */
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state != TCP_LISTEN)
if (svsk) { return;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt); if (svsk) {
} /* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
} }
} }
...@@ -887,13 +886,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) ...@@ -887,13 +886,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_accept(sock, &newsock, O_NONBLOCK); err = kernel_accept(sock, &newsock, O_NONBLOCK);
if (err < 0) { if (err < 0) {
if (err == -ENOMEM) if (err != -EAGAIN)
printk(KERN_WARNING "%s: no more sockets!\n", trace_svcsock_accept_err(xprt, serv->sv_name, err);
serv->sv_name);
else if (err != -EAGAIN)
net_warn_ratelimited("%s: accept failed (err %d)!\n",
serv->sv_name, -err);
trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL; return NULL;
} }
if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL))) if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL)))
...@@ -1464,7 +1458,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, ...@@ -1464,7 +1458,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
svsk->sk_owspace = inet->sk_write_space; svsk->sk_owspace = inet->sk_write_space;
/* /*
* This barrier is necessary in order to prevent race condition * This barrier is necessary in order to prevent race condition
* with svc_data_ready(), svc_listen_data_ready() and others * with svc_data_ready(), svc_tcp_listen_data_ready(), and others
* when calling callbacks above. * when calling callbacks above.
*/ */
wmb(); wmb();
...@@ -1476,7 +1470,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, ...@@ -1476,7 +1470,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else else
svc_tcp_init(svsk, serv); svc_tcp_init(svsk, serv);
trace_svcsock_new_socket(sock); trace_svcsock_new(svsk, sock);
return svsk; return svsk;
} }
...@@ -1657,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt) ...@@ -1657,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock; struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
tls_handshake_cancel(sock->sk); tls_handshake_cancel(sock->sk);
if (sock->file) if (sock->file)
sockfd_put(sock); sockfd_put(sock);
......
...@@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) ...@@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
} }
EXPORT_SYMBOL_GPL(xdr_reserve_space); EXPORT_SYMBOL_GPL(xdr_reserve_space);
/** /**
* xdr_reserve_space_vec - Reserves a large amount of buffer space for sending * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
* @xdr: pointer to xdr_stream * @xdr: pointer to xdr_stream
* @vec: pointer to a kvec array
* @nbytes: number of bytes to reserve * @nbytes: number of bytes to reserve
* *
* Reserves enough buffer space to encode 'nbytes' of data and stores the * The size argument passed to xdr_reserve_space() is determined based
* pointers in 'vec'. The size argument passed to xdr_reserve_space() is * on the number of bytes remaining in the current page to avoid
* determined based on the number of bytes remaining in the current page to * invalidating iov_base pointers when xdr_commit_encode() is called.
* avoid invalidating iov_base pointers when xdr_commit_encode() is called. *
* Return values:
* %0: success
* %-EMSGSIZE: not enough space is available in @xdr
*/ */
int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes) int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
{ {
int thislen; size_t thislen;
int v = 0;
__be32 *p; __be32 *p;
/* /*
...@@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte ...@@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
xdr->end = xdr->p; xdr->end = xdr->p;
} }
/* XXX: Let's find a way to make this more efficient */
while (nbytes) { while (nbytes) {
thislen = xdr->buf->page_len % PAGE_SIZE; thislen = xdr->buf->page_len % PAGE_SIZE;
thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen); thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
p = xdr_reserve_space(xdr, thislen); p = xdr_reserve_space(xdr, thislen);
if (!p) if (!p)
return -EIO; return -EMSGSIZE;
vec[v].iov_base = p;
vec[v].iov_len = thislen;
v++;
nbytes -= thislen; nbytes -= thislen;
} }
return v; return 0;
} }
EXPORT_SYMBOL_GPL(xdr_reserve_space_vec); EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);
......
...@@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, ...@@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/ */
get_page(virt_to_page(rqst->rq_buffer)); get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
ret = svc_rdma_send(rdma, sctxt); return svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
} }
/* Server-side transport endpoint wants a whole page for its send /* Server-side transport endpoint wants a whole page for its send
......
...@@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, ...@@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_recv_ctxt * static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{ {
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
dma_addr_t addr; dma_addr_t addr;
void *buffer; void *buffer;
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
if (!ctxt) if (!ctxt)
goto fail0; goto fail0;
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer) if (!buffer)
goto fail1; goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer, addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
...@@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) ...@@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer; ctxt->rc_recv_buf = buffer;
ctxt->rc_temp = false;
return ctxt; return ctxt;
fail2: fail2:
...@@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, ...@@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
pcl_free(&ctxt->rc_write_pcl); pcl_free(&ctxt->rc_write_pcl);
pcl_free(&ctxt->rc_reply_pcl); pcl_free(&ctxt->rc_reply_pcl);
if (!ctxt->rc_temp) llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
else
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
} }
/** /**
...@@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt) ...@@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
} }
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
unsigned int wanted, bool temp) unsigned int wanted)
{ {
const struct ib_recv_wr *bad_wr = NULL; const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
...@@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, ...@@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
break; break;
trace_svcrdma_post_recv(ctxt); trace_svcrdma_post_recv(ctxt);
ctxt->rc_temp = temp;
ctxt->rc_recv_wr.next = recv_chain; ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr; recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++; rdma->sc_pending_recvs++;
...@@ -309,7 +305,7 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, ...@@ -309,7 +305,7 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
*/ */
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{ {
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
} }
/** /**
...@@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
* client reconnects. * client reconnects.
*/ */
if (rdma->sc_pending_recvs < rdma->sc_max_requests) if (rdma->sc_pending_recvs < rdma->sc_max_requests)
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
goto dropped; goto dropped;
/* All wc fields are now known to be valid */ /* All wc fields are now known to be valid */
...@@ -775,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, ...@@ -775,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* *
* The next ctxt is removed from the "receive" lists. * The next ctxt is removed from the "receive" lists.
* *
* - If the ctxt completes a Read, then finish assembling the Call
* message and return the number of bytes in the message.
*
* - If the ctxt completes a Receive, then construct the Call * - If the ctxt completes a Receive, then construct the Call
* message from the contents of the Receive buffer. * message from the contents of the Receive buffer.
* *
...@@ -786,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, ...@@ -786,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* in the message. * in the message.
* *
* - If there are Read chunks in this message, post Read WRs to * - If there are Read chunks in this message, post Read WRs to
* pull that payload and return 0. * pull that payload. When the Read WRs complete, build the
* full message and return the number of bytes in it.
*/ */
int svc_rdma_recvfrom(struct svc_rqst *rqstp) int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{ {
...@@ -796,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -796,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *ctxt; struct svc_rdma_recv_ctxt *ctxt;
int ret; int ret;
/* Prevent svc_xprt_release() from releasing pages in rq_pages
* when returning 0 or an error.
*/
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
rqstp->rq_xprt_ctxt = NULL; rqstp->rq_xprt_ctxt = NULL;
ctxt = NULL; ctxt = NULL;
...@@ -819,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -819,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt); svc_rdma_build_arg_xdr(rqstp, ctxt);
/* Prevent svc_xprt_release from releasing pages in rq_pages
* if we return 0 or an error.
*/
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
......
...@@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) ...@@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) { if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else { } else {
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL); GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
if (!ctxt) if (!ctxt)
goto out_noctx; goto out_noctx;
...@@ -84,8 +84,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) ...@@ -84,8 +84,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
return NULL; return NULL;
} }
static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list) struct llist_head *list)
{ {
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
...@@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, ...@@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt) struct svc_rdma_rw_ctxt *ctxt)
{ {
__svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts); __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
} }
/** /**
...@@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, ...@@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
struct svc_rdma_rw_ctxt *ctxt; struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free); LLIST_HEAD(free);
trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
first = last = NULL; first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list); list_del(&ctxt->rw_list);
...@@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, ...@@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl, rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir); ctxt->rw_nents, dir);
__svc_rdma_put_rw_ctxt(rdma, ctxt, &free); __svc_rdma_put_rw_ctxt(ctxt, &free);
ctxt->rw_node.next = first; ctxt->rw_node.next = first;
first = &ctxt->rw_node; first = &ctxt->rw_node;
...@@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, ...@@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
{ {
struct svc_rdma_write_info *info; struct svc_rdma_write_info *info;
info = kmalloc(sizeof(*info), GFP_KERNEL); info = kmalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info) if (!info)
return info; return info;
...@@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma) ...@@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
{ {
struct svc_rdma_read_info *info; struct svc_rdma_read_info *info;
info = kmalloc(sizeof(*info), GFP_KERNEL); info = kmalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info) if (!info)
return info; return info;
...@@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
return; return;
} }
/* This function sleeps when the transport's Send Queue is congested. /*
*
* Assumptions: * Assumptions:
* - If ib_post_send() succeeds, only one completion is expected, * - If ib_post_send() succeeds, only one completion is expected,
* even if one or more WRs are flushed. This is true when posting * even if one or more WRs are flushed. This is true when posting
...@@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) ...@@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
struct ib_cqe *cqe; struct ib_cqe *cqe;
int ret; int ret;
might_sleep();
if (cc->cc_sqecount > rdma->sc_sq_depth) if (cc->cc_sqecount > rdma->sc_sq_depth)
return -EINVAL; return -EINVAL;
......
...@@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, ...@@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_send_ctxt * static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{ {
int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_send_ctxt *ctxt; struct svc_rdma_send_ctxt *ctxt;
dma_addr_t addr; dma_addr_t addr;
void *buffer; void *buffer;
size_t size;
int i; int i;
size = sizeof(*ctxt); ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
size += rdma->sc_max_send_sges * sizeof(struct ib_sge); GFP_KERNEL, node);
ctxt = kmalloc(size, GFP_KERNEL);
if (!ctxt) if (!ctxt)
goto fail0; goto fail0;
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer) if (!buffer)
goto fail1; goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer, addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
...@@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ...@@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer; ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
...@@ -214,6 +212,7 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) ...@@ -214,6 +212,7 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.num_sge = 0; ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0; ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
return ctxt; return ctxt;
out_empty: out_empty:
...@@ -228,6 +227,8 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) ...@@ -228,6 +227,8 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
* svc_rdma_send_ctxt_put - Return send_ctxt to free list * svc_rdma_send_ctxt_put - Return send_ctxt to free list
* @rdma: controlling svcxprt_rdma * @rdma: controlling svcxprt_rdma
* @ctxt: object to return to the free list * @ctxt: object to return to the free list
*
* Pages left in sc_pages are DMA unmapped and released.
*/ */
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt) struct svc_rdma_send_ctxt *ctxt)
...@@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, ...@@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device; struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i; unsigned int i;
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
/* The first SGE contains the transport header, which /* The first SGE contains the transport header, which
* remains mapped until @ctxt is destroyed. * remains mapped until @ctxt is destroyed.
*/ */
...@@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) ...@@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_wake_send_waiters(rdma, 1); svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
if (unlikely(wc->status != IB_WC_SUCCESS)) if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed; goto flushed;
trace_svcrdma_wc_send(wc, &ctxt->sc_cid); trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
return; return;
flushed: flushed:
...@@ -294,6 +298,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) ...@@ -294,6 +298,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
else else
trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
svc_xprt_deferred_close(&rdma->sc_xprt); svc_xprt_deferred_close(&rdma->sc_xprt);
} }
...@@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) ...@@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
struct ib_send_wr *wr = &ctxt->sc_send_wr; struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret; int ret;
reinit_completion(&ctxt->sc_done); might_sleep();
/* Sync the transport header buffer */ /* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device, ib_dma_sync_single_for_device(rdma->sc_pd->device,
...@@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, ...@@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
svc_rdma_xb_dma_map, &args); svc_rdma_xb_dma_map, &args);
} }
/* The svc_rqst and all resources it owns are released as soon as
* svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
* so they are released by the Send completion handler.
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
struct svc_rdma_send_ctxt *ctxt)
{
int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
ctxt->sc_page_count += pages;
for (i = 0; i < pages; i++) {
ctxt->sc_pages[i] = rqstp->rq_respages[i];
rqstp->rq_respages[i] = NULL;
}
/* Prevent svc_xprt_release from releasing pages in rq_pages */
rqstp->rq_next_page = rqstp->rq_respages;
}
/* Prepare the portion of the RPC Reply that will be transmitted /* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared * via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges. * in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
...@@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, ...@@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0) if (ret < 0)
return ret; return ret;
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) { if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
...@@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, ...@@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_send_wr.opcode = IB_WR_SEND;
} }
ret = svc_rdma_send(rdma, sctxt); return svc_rdma_send(rdma, sctxt);
if (ret < 0)
return ret;
ret = wait_for_completion_killable(&sctxt->sc_done);
svc_rdma_send_ctxt_put(rdma, sctxt);
return ret;
} }
/** /**
...@@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, ...@@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt)) if (svc_rdma_send(rdma, sctxt))
goto put_ctxt; goto put_ctxt;
return;
wait_for_completion_killable(&sctxt->sc_done);
put_ctxt: put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt); svc_rdma_send_ctxt_put(rdma, sctxt);
...@@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0) if (ret < 0)
goto put_ctxt; goto put_ctxt;
/* Prevent svc_xprt_release() from releasing the page backing
* rq_res.head[0].iov_base. It's no longer being accessed by
* the I/O device. */
rqstp->rq_respages++;
return 0; return 0;
reply_chunk: reply_chunk:
if (ret != -E2BIG && ret != -EINVAL) if (ret != -E2BIG && ret != -EINVAL)
goto put_ctxt; goto put_ctxt;
/* Send completion releases payload pages that were part
* of previously posted RDMA Writes.
*/
svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0; return 0;
......
...@@ -64,7 +64,7 @@ ...@@ -64,7 +64,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net); struct net *net, int node);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net, struct net *net,
struct sockaddr *sa, int salen, struct sockaddr *sa, int salen,
...@@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context) ...@@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context)
} }
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net) struct net *net, int node)
{ {
struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); struct svcxprt_rdma *cma_xprt;
if (!cma_xprt) { cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
dprintk("svcrdma: failed to create new transport\n"); if (!cma_xprt)
return NULL; return NULL;
}
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
...@@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, ...@@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
struct svcxprt_rdma *newxprt; struct svcxprt_rdma *newxprt;
struct sockaddr *sa; struct sockaddr *sa;
/* Create a new transport */
newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
listen_xprt->sc_xprt.xpt_net); listen_xprt->sc_xprt.xpt_net,
ibdev_to_node(new_cma_id->device));
if (!newxprt) if (!newxprt)
return; return;
newxprt->sc_cm_id = new_cma_id; newxprt->sc_cm_id = new_cma_id;
...@@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, ...@@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT); return ERR_PTR(-EAFNOSUPPORT);
cma_xprt = svc_rdma_create_xprt(serv, net); cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
if (!cma_xprt) if (!cma_xprt)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment