Commit 8313064c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.11' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "The nfsd update this round is mainly a lot of miscellaneous cleanups
  and bugfixes.

  A couple changes could theoretically break working setups on upgrade.
  I don't expect complaints in practice, but they seem worth calling out
  just in case:

   - NFS security labels are now off by default; a new security_label
     export flag reenables it per export. But, having them on by default
     is a disaster, as it generally only makes sense if all your clients
     and servers have similar enough selinux policies. Thanks to Jason
     Tibbitts for pointing this out.

   - NFSv4/UDP support is off. It was never really supported, and the
     spec explicitly forbids it. We only ever left it on out of
     laziness; thanks to Jeff Layton for finally fixing that"

* tag 'nfsd-4.11' of git://linux-nfs.org/~bfields/linux: (34 commits)
  nfsd: Fix display of the version string
  nfsd: fix configuration of supported minor versions
  sunrpc: don't register UDP port with rpcbind when version needs congestion control
  nfs/nfsd/sunrpc: enforce transport requirements for NFSv4
  sunrpc: flag transports as having congestion control
  sunrpc: turn bitfield flags in svc_version into bools
  nfsd: remove superfluous KERN_INFO
  nfsd: special case truncates some more
  nfsd: minor nfsd_setattr cleanup
  NFSD: Reserve adequate space for LOCKT operation
  NFSD: Get response size before operation for all RPCs
  nfsd/callback: Drop a useless data copy when comparing sessionid
  nfsd/callback: skip the callback tag
  nfsd/callback: Cleanup callback cred on shutdown
  nfsd/idmap: return nfserr_inval for 0-length names
  SUNRPC/Cache: Always treat the invalid cache as unexpired
  SUNRPC: Drop all entries from cache_detail when cache_purge()
  svcrdma: Poll CQs in "workqueue" mode
  svcrdma: Combine list fields in struct svc_rdma_op_ctxt
  svcrdma: Remove unused sc_dto_q field
  ...
parents b2deee2d ff7d1179
...@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this, ...@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6; sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr; sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6); (struct sockaddr *)&sin6);
} }
......
...@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = { ...@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = {
.vs_proc = nfs4_callback_procedures1, .vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL, .vs_dispatch = NULL,
.vs_hidden = 1, .vs_hidden = true,
.vs_need_cong_ctrl = true,
}; };
struct svc_version nfs4_callback_version4 = { struct svc_version nfs4_callback_version4 = {
...@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = { ...@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = {
.vs_proc = nfs4_callback_procedures1, .vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL, .vs_dispatch = NULL,
.vs_hidden = 1, .vs_hidden = true,
.vs_need_cong_ctrl = true,
}; };
...@@ -1102,6 +1102,7 @@ static struct flags { ...@@ -1102,6 +1102,7 @@ static struct flags {
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}}, { NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}}, { NFSEXP_PNFS, {"pnfs", ""}},
{ NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}} { 0, {"", ""}}
}; };
......
...@@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = { ...@@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2, .vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch, .vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE, .vs_xdrsize = NFS3_SVC_XDRSIZE,
.vs_hidden = 0,
}; };
...@@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = { ...@@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3, .vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch, .vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE, .vs_xdrsize = NFS3_SVC_XDRSIZE,
.vs_hidden = 0,
}; };
...@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, ...@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable; resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh, NULL, nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
argp->offset,
rqstp->rq_vec, argp->vlen, rqstp->rq_vec, argp->vlen,
&cnt, &cnt, resp->committed);
&resp->committed);
resp->count = cnt; resp->count = cnt;
RETURN_STATUS(nfserr); RETURN_STATUS(nfserr);
} }
......
...@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr, ...@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, length + 4); p = xdr_inline_decode(xdr, length + 4);
if (unlikely(p == NULL)) if (unlikely(p == NULL))
goto out_overflow; goto out_overflow;
p += XDR_QUADLEN(length);
hdr->nops = be32_to_cpup(p); hdr->nops = be32_to_cpup(p);
return 0; return 0;
out_overflow: out_overflow:
...@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, ...@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_callback *cb) struct nfsd4_callback *cb)
{ {
struct nfsd4_session *session = cb->cb_clp->cl_cb_session; struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
struct nfs4_sessionid id; int status = -ESERVERFAULT;
int status;
__be32 *p; __be32 *p;
u32 dummy; u32 dummy;
status = -ESERVERFAULT;
/* /*
* If the server returns different values for sessionID, slotID or * If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes. * sequence number, the server is looney tunes.
...@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, ...@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4); p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL)) if (unlikely(p == NULL))
goto out_overflow; goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
if (memcmp(id.data, session->se_sessionid.data, if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
NFS4_MAX_SESSIONID_LEN) != 0) {
dprintk("NFS: %s Invalid session id\n", __func__); dprintk("NFS: %s Invalid session id\n", __func__);
goto out; goto out;
} }
...@@ -753,6 +750,14 @@ int set_callback_cred(void) ...@@ -753,6 +750,14 @@ int set_callback_cred(void)
return 0; return 0;
} }
void cleanup_callback_cred(void)
{
if (callback_cred) {
put_rpccred(callback_cred);
callback_cred = NULL;
}
}
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{ {
if (clp->cl_minorversion == 0) { if (clp->cl_minorversion == 0) {
......
...@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, ...@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{ {
__be32 status; __be32 status;
u32 id = -1; u32 id = -1;
if (name == NULL || namelen == 0)
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
*uid = make_kuid(&init_user_ns, id); *uid = make_kuid(&init_user_ns, id);
if (!uid_valid(*uid)) if (!uid_valid(*uid))
...@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, ...@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{ {
__be32 status; __be32 status;
u32 id = -1; u32 id = -1;
if (name == NULL || namelen == 0)
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
*gid = make_kgid(&init_user_ns, id); *gid = make_kgid(&init_user_ns, id);
if (!gid_valid(*gid)) if (!gid_valid(*gid))
......
...@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable) u32 *bmval, u32 *writable)
{ {
struct dentry *dentry = cstate->current_fh.fh_dentry; struct dentry *dentry = cstate->current_fh.fh_dentry;
struct svc_export *exp = cstate->current_fh.fh_export;
if (!nfsd_attrs_supported(cstate->minorversion, bmval)) if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp; return nfserr_attrnotsupp;
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry))) if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp; return nfserr_attrnotsupp;
if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
!(exp->ex_flags & NFSEXP_SECURITY_LABEL))
return nfserr_attrnotsupp;
if (writable && !bmval_is_subset(bmval, writable)) if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval; return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) && if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
...@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt, write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
&write->wr_how_written); write->wr_how_written);
fput(filp); fput(filp);
write->wr_bytes_written = cnt; write->wr_bytes_written = cnt;
...@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd ...@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
} }
static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
/* ac_supported, ac_resp_access */
return (op_encode_hdr_size + 2)* sizeof(__be32);
}
static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
...@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, ...@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret; return ret;
} }
static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
}
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + op_encode_change_info_maxsz) return (op_encode_hdr_size + op_encode_change_info_maxsz)
...@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o ...@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
XDR_QUADLEN(rlen)) * sizeof(__be32); XDR_QUADLEN(rlen)) * sizeof(__be32);
} }
static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
}
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + op_encode_change_info_maxsz) return (op_encode_hdr_size + op_encode_change_info_maxsz)
...@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, ...@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
} }
static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
* sizeof(__be32);
}
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
} }
static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
(4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
}
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
...@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) ...@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
} }
#ifdef CONFIG_NFSD_PNFS #ifdef CONFIG_NFSD_PNFS
static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = 0, rlen = 0;
maxcount = svc_max_payload(rqstp);
rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
return (op_encode_hdr_size +
1 /* gd_layout_type*/ +
XDR_QUADLEN(rlen) +
2 /* gd_notify_types */) * sizeof(__be32);
}
/* /*
* At this stage we don't really know what layout driver will handle the request, * At this stage we don't really know what layout driver will handle the request,
* so we need to define an arbitrary upper bound here. * so we need to define an arbitrary upper bound here.
...@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_ ...@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
} }
#endif /* CONFIG_NFSD_PNFS */ #endif /* CONFIG_NFSD_PNFS */
static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 3) * sizeof(__be32);
}
static struct nfsd4_operation nfsd4_ops[] = { static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = { [OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access, .op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS", .op_name = "OP_ACCESS",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
}, },
[OP_CLOSE] = { [OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close, .op_func = (nfsd4op_func)nfsd4_close,
...@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETFH] = { [OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh, .op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH", .op_name = "OP_GETFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
}, },
[OP_LINK] = { [OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link, .op_func = (nfsd4op_func)nfsd4_link,
...@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_LOCKT] = { [OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt, .op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT", .op_name = "OP_LOCKT",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
}, },
[OP_LOCKU] = { [OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku, .op_func = (nfsd4op_func)nfsd4_locku,
...@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_lookup, .op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUP", .op_name = "OP_LOOKUP",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_LOOKUPP] = { [OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp, .op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUPP", .op_name = "OP_LOOKUPP",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_NVERIFY] = { [OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify, .op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY", .op_name = "OP_NVERIFY",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_OPEN] = { [OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open, .op_func = (nfsd4op_func)nfsd4_open,
...@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_READLINK] = { [OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink, .op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK", .op_name = "OP_READLINK",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
}, },
[OP_REMOVE] = { [OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove, .op_func = (nfsd4op_func)nfsd4_remove,
...@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo, .op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC, .op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO", .op_name = "OP_SECINFO",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
}, },
[OP_SETATTR] = { [OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr, .op_func = (nfsd4op_func)nfsd4_setattr,
...@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_VERIFY] = { [OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify, .op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY", .op_name = "OP_VERIFY",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
[OP_WRITE] = { [OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write, .op_func = (nfsd4op_func)nfsd4_write,
...@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name, .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC, .op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME", .op_name = "OP_SECINFO_NO_NAME",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
}, },
[OP_TEST_STATEID] = { [OP_TEST_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_test_stateid, .op_func = (nfsd4op_func)nfsd4_test_stateid,
.op_flags = ALLOWED_WITHOUT_FH, .op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_TEST_STATEID", .op_name = "OP_TEST_STATEID",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
}, },
[OP_FREE_STATEID] = { [OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid, .op_func = (nfsd4op_func)nfsd4_free_stateid,
...@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_getdeviceinfo, .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
.op_flags = ALLOWED_WITHOUT_FH, .op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO", .op_name = "OP_GETDEVICEINFO",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
}, },
[OP_LAYOUTGET] = { [OP_LAYOUTGET] = {
.op_func = (nfsd4op_func)nfsd4_layoutget, .op_func = (nfsd4op_func)nfsd4_layoutget,
...@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_SEEK] = { [OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek, .op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK", .op_name = "OP_SEEK",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
}, },
}; };
...@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) ...@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{ {
struct nfsd4_operation *opdesc;
nfsd4op_rsize estimator;
if (op->opnum == OP_ILLEGAL) if (op->opnum == OP_ILLEGAL)
return op_encode_hdr_size * sizeof(__be32); return op_encode_hdr_size * sizeof(__be32);
opdesc = OPDESC(op);
estimator = opdesc->op_rsize_bop; BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
return estimator ? estimator(rqstp, op) : PAGE_SIZE; return OPDESC(op)->op_rsize_bop(rqstp, op);
} }
void warn_on_nonidempotent_op(struct nfsd4_op *op) void warn_on_nonidempotent_op(struct nfsd4_op *op)
...@@ -2481,7 +2542,8 @@ struct svc_version nfsd_version4 = { ...@@ -2481,7 +2542,8 @@ struct svc_version nfsd_version4 = {
.vs_proc = nfsd_procedures4, .vs_proc = nfsd_procedures4,
.vs_dispatch = nfsd_dispatch, .vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS4_SVC_XDRSIZE, .vs_xdrsize = NFS4_SVC_XDRSIZE,
.vs_rpcb_optnl = 1, .vs_rpcb_optnl = true,
.vs_need_cong_ctrl = true,
}; };
/* /*
......
...@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r ...@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
out_err: out_err:
conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0; conn->cb_addrlen = 0;
dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " dprintk("NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n", "will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
...@@ -7012,23 +7012,24 @@ nfs4_state_start(void) ...@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
ret = set_callback_cred(); ret = set_callback_cred();
if (ret) if (ret)
return -ENOMEM; return ret;
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) { if (laundry_wq == NULL) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_recovery; goto out_cleanup_cred;
} }
ret = nfsd4_create_callback_queue(); ret = nfsd4_create_callback_queue();
if (ret) if (ret)
goto out_free_laundry; goto out_free_laundry;
set_max_delegations(); set_max_delegations();
return 0; return 0;
out_free_laundry: out_free_laundry:
destroy_workqueue(laundry_wq); destroy_workqueue(laundry_wq);
out_recovery: out_cleanup_cred:
cleanup_callback_cred();
return ret; return ret;
} }
...@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void) ...@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
{ {
destroy_workqueue(laundry_wq); destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue(); nfsd4_destroy_callback_queue();
cleanup_callback_cred();
} }
static void static void
......
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR #define NFSDDBG_FACILITY NFSDDBG_XDR
u32 nfsd_suppattrs[3][3] = { const u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0, {NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1, NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2}, NFSD4_SUPPORTED_ATTRS_WORD2},
...@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) ...@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
READ_BUF(16); READ_BUF(16);
p = xdr_decode_hyper(p, &write->wr_offset); p = xdr_decode_hyper(p, &write->wr_offset);
write->wr_stable_how = be32_to_cpup(p++); write->wr_stable_how = be32_to_cpup(p++);
if (write->wr_stable_how > 2) if (write->wr_stable_how > NFS_FILE_SYNC)
goto xdr_error; goto xdr_error;
write->wr_buflen = be32_to_cpup(p++); write->wr_buflen = be32_to_cpup(p++);
...@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) ...@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
} else } else
max_reply += nfsd4_max_reply(argp->rqstp, op); max_reply += nfsd4_max_reply(argp->rqstp, op);
/* /*
* OP_LOCK may return a conflicting lock. (Special case * OP_LOCK and OP_LOCKT may return a conflicting lock.
* because it will just skip encoding this if it runs * (Special case because it will just skip encoding this
* out of xdr buffer space, and it is the only operation * if it runs out of xdr buffer space, and it is the only
* that behaves this way.) * operation that behaves this way.)
*/ */
if (op->opnum == OP_LOCK) if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
max_reply += NFS4_OPAQUE_LIMIT; max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) { if (op->status) {
...@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) ...@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
DECODE_TAIL; DECODE_TAIL;
} }
static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
struct svc_export *exp)
{ {
if (IS_I_VERSION(inode)) { if (exp->ex_flags & NFSEXP_V4ROOT) {
*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
*p++ = 0;
} else if (IS_I_VERSION(inode)) {
p = xdr_encode_hyper(p, inode->i_version); p = xdr_encode_hyper(p, inode->i_version);
} else { } else {
*p++ = cpu_to_be32(stat->ctime.tv_sec); *p++ = cpu_to_be32(stat->ctime.tv_sec);
...@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, ...@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry), err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen); &context, &contextlen);
else
err = -EOPNOTSUPP;
contextsupport = (err == 0); contextsupport = (err == 0);
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP) if (err == -EOPNOTSUPP)
...@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, ...@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8); p = xdr_reserve_space(xdr, 8);
if (!p) if (!p)
goto out_resource; goto out_resource;
p = encode_change(p, &stat, d_inode(dentry)); p = encode_change(p, &stat, d_inode(dentry), exp);
} }
if (bmval0 & FATTR4_WORD0_SIZE) { if (bmval0 & FATTR4_WORD0_SIZE) {
p = xdr_reserve_space(xdr, 8); p = xdr_reserve_space(xdr, 8);
......
...@@ -536,6 +536,19 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) ...@@ -536,6 +536,19 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
return rv; return rv;
} }
static ssize_t
nfsd_print_version_support(char *buf, int remaining, const char *sep,
unsigned vers, unsigned minor)
{
const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
bool supported = !!nfsd_vers(vers, NFSD_TEST);
if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
supported = false;
return snprintf(buf, remaining, format, sep,
supported ? '+' : '-', vers, minor);
}
static ssize_t __write_versions(struct file *file, char *buf, size_t size) static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{ {
char *mesg = buf; char *mesg = buf;
...@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) ...@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, vers, size); len = qword_get(&mesg, vers, size);
if (len <= 0) return -EINVAL; if (len <= 0) return -EINVAL;
do { do {
enum vers_op cmd;
sign = *vers; sign = *vers;
if (sign == '+' || sign == '-') if (sign == '+' || sign == '-')
num = simple_strtol((vers+1), &minorp, 0); num = simple_strtol((vers+1), &minorp, 0);
...@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) ...@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (*minorp == '.') { if (*minorp == '.') {
if (num != 4) if (num != 4)
return -EINVAL; return -EINVAL;
minor = simple_strtoul(minorp+1, NULL, 0); if (kstrtouint(minorp+1, 0, &minor) < 0)
if (minor == 0)
return -EINVAL;
if (nfsd_minorversion(minor, sign == '-' ?
NFSD_CLEAR : NFSD_SET) < 0)
return -EINVAL; return -EINVAL;
goto next; } else
} minor = 0;
cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) { switch(num) {
case 2: case 2:
case 3: case 3:
nfsd_vers(num, cmd);
break;
case 4: case 4:
nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET); if (nfsd_minorversion(minor, cmd) >= 0)
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
next:
vers += len + 1; vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0); } while ((len = qword_get(&mesg, vers, size)) > 0);
/* If all get turned off, turn them back on, as /* If all get turned off, turn them back on, as
...@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) ...@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = 0; len = 0;
sep = ""; sep = "";
remaining = SIMPLE_TRANSACTION_LIMIT; remaining = SIMPLE_TRANSACTION_LIMIT;
for (num=2 ; num <= 4 ; num++) for (num=2 ; num <= 4 ; num++) {
if (nfsd_vers(num, NFSD_AVAIL)) { if (!nfsd_vers(num, NFSD_AVAIL))
len = snprintf(buf, remaining, "%s%c%d", sep, continue;
nfsd_vers(num, NFSD_TEST)?'+':'-', minor = 0;
num); do {
sep = " "; len = nfsd_print_version_support(buf, remaining,
sep, num, minor);
if (len >= remaining)
break;
remaining -= len;
buf += len;
tlen += len;
}
if (nfsd_vers(4, NFSD_AVAIL))
for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
minor++) {
len = snprintf(buf, remaining, " %c4.%u",
(nfsd_vers(4, NFSD_TEST) &&
nfsd_minorversion(minor, NFSD_TEST)) ?
'+' : '-',
minor);
if (len >= remaining) if (len >= remaining)
break; goto out;
remaining -= len; remaining -= len;
buf += len; buf += len;
tlen += len; tlen += len;
minor++;
sep = " ";
} while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
} }
out:
len = snprintf(buf, remaining, "\n"); len = snprintf(buf, remaining, "\n");
if (len >= remaining) if (len >= remaining)
return -EINVAL; return -EINVAL;
......
...@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void); ...@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void);
FATTR4_WORD2_MODE_UMASK | \ FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS) NFSD4_2_SECURITY_ATTRS)
extern u32 nfsd_suppattrs[3][3]; extern const u32 nfsd_suppattrs[3][3];
static inline bool bmval_is_subset(u32 *bm1, u32 *bm2) static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{ {
return !((bm1[0] & ~bm2[0]) || return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) || (bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2])); (bm1[2] & ~bm2[2]));
} }
static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval) static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
{ {
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]); return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
} }
......
...@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, ...@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
struct nfsd_attrstat *resp) struct nfsd_attrstat *resp)
{ {
__be32 nfserr; __be32 nfserr;
int stable = 1;
unsigned long cnt = argp->len; unsigned long cnt = argp->len;
dprintk("nfsd: WRITE %s %d bytes at %d\n", dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
argp->len, argp->offset); argp->len, argp->offset);
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
argp->offset, rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
rqstp->rq_vec, argp->vlen,
&cnt,
&stable);
return nfsd_return_attrs(nfserr, resp); return nfsd_return_attrs(nfserr, resp);
} }
......
...@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change) ...@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
return 0; return 0;
} }
static void
nfsd_adjust_nfsd_versions4(void)
{
unsigned i;
for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
if (nfsd_supported_minorversions[i])
return;
}
nfsd_vers(4, NFSD_CLEAR);
}
int nfsd_minorversion(u32 minorversion, enum vers_op change) int nfsd_minorversion(u32 minorversion, enum vers_op change)
{ {
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
...@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) ...@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
switch(change) { switch(change) {
case NFSD_SET: case NFSD_SET:
nfsd_supported_minorversions[minorversion] = true; nfsd_supported_minorversions[minorversion] = true;
nfsd_vers(4, NFSD_SET);
break; break;
case NFSD_CLEAR: case NFSD_CLEAR:
nfsd_supported_minorversions[minorversion] = false; nfsd_supported_minorversions[minorversion] = false;
nfsd_adjust_nfsd_versions4();
break; break;
case NFSD_TEST: case NFSD_TEST:
return nfsd_supported_minorversions[minorversion]; return nfsd_supported_minorversions[minorversion];
...@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, ...@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6; sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr; sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
} }
......
...@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, ...@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid, extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn); struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void); extern int set_callback_cred(void);
extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
......
...@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, ...@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err; __be32 err;
int host_err; int host_err;
bool get_write_count; bool get_write_count;
int size_change = 0; bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
...@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, ...@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */ /* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode); err = fh_verify(rqstp, fhp, ftype, accmode);
if (err) if (err)
goto out; return err;
if (get_write_count) { if (get_write_count) {
host_err = fh_want_write(fhp); host_err = fh_want_write(fhp);
if (host_err) if (host_err)
return nfserrno(host_err); goto out;
} }
dentry = fhp->fh_dentry; dentry = fhp->fh_dentry;
...@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, ...@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE; iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid) if (!iap->ia_valid)
goto out; return 0;
nfsd_sanitize_attrs(inode, iap); nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode->i_ctime.tv_sec)
return nfserr_notsync;
/* /*
* The size case is special, it changes the file in addition to the * The size case is special, it changes the file in addition to the
* attributes. * attributes, and file systems don't expect it to be mixed with
* "random" attribute changes. We thus split out the size change
* into a separate call to ->setattr, and do the rest as a separate
* setattr call.
*/ */
if (iap->ia_valid & ATTR_SIZE) { if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap); err = nfsd_get_write_access(rqstp, fhp, iap);
if (err) if (err)
goto out; return err;
size_change = 1; }
fh_lock(fhp);
if (size_change) {
/* /*
* RFC5661, Section 18.30.4: * RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly * Changing the size of a file with SETATTR indirectly
...@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, ...@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
* *
* (and similar for the older RFCs) * (and similar for the older RFCs)
*/ */
if (iap->ia_size != i_size_read(inode)) struct iattr size_attr = {
iap->ia_valid |= ATTR_MTIME; .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
} .ia_size = iap->ia_size,
};
iap->ia_valid |= ATTR_CTIME; host_err = notify_change(dentry, &size_attr, NULL);
if (host_err)
goto out_unlock;
iap->ia_valid &= ~ATTR_SIZE;
if (check_guard && guardtime != inode->i_ctime.tv_sec) { /*
err = nfserr_notsync; * Avoid the additional setattr call below if the only other
goto out_put_write_access; * attribute that the client sends is the mtime, as we update
* it as part of the size change above.
*/
if ((iap->ia_valid & ~ATTR_MTIME) == 0)
goto out_unlock;
} }
fh_lock(fhp); iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL); host_err = notify_change(dentry, iap, NULL);
fh_unlock(fhp);
err = nfserrno(host_err);
out_put_write_access: out_unlock:
fh_unlock(fhp);
if (size_change) if (size_change)
put_write_access(inode); put_write_access(inode);
if (!err)
err = nfserrno(commit_metadata(fhp));
out: out:
return err; if (!host_err)
host_err = commit_metadata(fhp);
return nfserrno(host_err);
} }
#if defined(CONFIG_NFSD_V4) #if defined(CONFIG_NFSD_V4)
...@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file) ...@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
__be32 __be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, loff_t offset, struct kvec *vec, int vlen,
unsigned long *cnt, int *stablep) unsigned long *cnt, int stable)
{ {
struct svc_export *exp; struct svc_export *exp;
struct inode *inode;
mm_segment_t oldfs; mm_segment_t oldfs;
__be32 err = 0; __be32 err = 0;
int host_err; int host_err;
int stable = *stablep;
int use_wgather; int use_wgather;
loff_t pos = offset; loff_t pos = offset;
unsigned int pflags = current->flags; unsigned int pflags = current->flags;
...@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, ...@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
*/ */
current->flags |= PF_LESS_THROTTLE; current->flags |= PF_LESS_THROTTLE;
inode = file_inode(file);
exp = fhp->fh_export; exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp)) if (!EX_ISSYNC(exp))
stable = 0; stable = NFS_UNSTABLE;
if (stable && !use_wgather) if (stable && !use_wgather)
flags |= RWF_SYNC; flags |= RWF_SYNC;
...@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put * N.B. After this call fhp needs an fh_put
*/ */
__be32 __be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, struct kvec *vec, int vlen, unsigned long *cnt, int stable)
int *stablep)
{ {
struct file *file = NULL;
__be32 err = 0; __be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen); trace_write_start(rqstp, fhp, offset, vlen);
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
stablep);
trace_write_io_done(rqstp, fhp, offset, vlen);
} else {
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err) if (err)
goto out; goto out;
trace_write_opened(rqstp, fhp, offset, vlen); trace_write_opened(rqstp, fhp, offset, vlen);
if (cnt) err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
trace_write_io_done(rqstp, fhp, offset, vlen); trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file); fput(file);
}
out: out:
trace_write_done(rqstp, fhp, offset, vlen); trace_write_done(rqstp, fhp, offset, vlen);
return err; return err;
......
...@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, ...@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *); unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *); loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
loff_t, struct kvec *,int, unsigned long *, int *); struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt, struct kvec *vec, int vlen, unsigned long *cnt,
int *stablep); int stable);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *); char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
......
...@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) ...@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
kref_put(&h->ref, cd->cache_put); kref_put(&h->ref, cd->cache_put);
} }
static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
{ {
if (!test_bit(CACHE_VALID, &h->flags))
return false;
return (h->expiry_time < seconds_since_boot()) || return (h->expiry_time < seconds_since_boot()) ||
(detail->flush_time >= h->last_refresh); (detail->flush_time >= h->last_refresh);
} }
...@@ -227,6 +230,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); ...@@ -227,6 +230,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
umode_t, struct cache_detail *); umode_t, struct cache_detail *);
extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
/* Must store cache_detail in seq_file->private if using next three functions */ /* Must store cache_detail in seq_file->private if using next three functions */
extern void *cache_seq_start(struct seq_file *file, loff_t *pos); extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
......
...@@ -109,6 +109,15 @@ struct rpcrdma_msg { ...@@ -109,6 +109,15 @@ struct rpcrdma_msg {
} rm_body; } rm_body;
}; };
/*
* XDR sizes, in quads
*/
enum {
rpcrdma_fixed_maxsz = 4,
rpcrdma_segment_maxsz = 4,
rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz,
};
/* /*
* Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
*/ */
......
...@@ -400,10 +400,14 @@ struct svc_version { ...@@ -400,10 +400,14 @@ struct svc_version {
struct svc_procedure * vs_proc; /* per-procedure info */ struct svc_procedure * vs_proc; /* per-procedure info */
u32 vs_xdrsize; /* xdrsize needed for this version */ u32 vs_xdrsize; /* xdrsize needed for this version */
unsigned int vs_hidden : 1, /* Don't register with portmapper. /* Don't register with rpcbind */
* Only used for nfsacl so far. */ bool vs_hidden;
vs_rpcb_optnl:1;/* Don't care the result of register.
* Only used for nfsv4. */ /* Don't care if the rpcbind registration fails */
bool vs_rpcb_optnl;
/* Need xprt with congestion control */
bool vs_need_cong_ctrl;
/* Override dispatch function (e.g. when caching replies). /* Override dispatch function (e.g. when caching replies).
* A return value of 0 means drop the request. * A return value of 0 means drop the request.
......
...@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod; ...@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
* completes. * completes.
*/ */
struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt {
struct list_head free; struct list_head list;
struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_op_ctxt *read_hdr;
struct svc_rdma_fastreg_mr *frmr; struct svc_rdma_fastreg_mr *frmr;
int hdr_count; int hdr_count;
...@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt { ...@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt {
struct ib_cqe cqe; struct ib_cqe cqe;
struct ib_cqe reg_cqe; struct ib_cqe reg_cqe;
struct ib_cqe inv_cqe; struct ib_cqe inv_cqe;
struct list_head dto_q;
u32 byte_len; u32 byte_len;
u32 position; u32 position;
struct svcxprt_rdma *xprt; struct svcxprt_rdma *xprt;
...@@ -141,7 +140,8 @@ struct svcxprt_rdma { ...@@ -141,7 +140,8 @@ struct svcxprt_rdma {
atomic_t sc_sq_avail; /* SQEs ready to be consumed */ atomic_t sc_sq_avail; /* SQEs ready to be consumed */
unsigned int sc_sq_depth; /* Depth of SQ */ unsigned int sc_sq_depth; /* Depth of SQ */
unsigned int sc_rq_depth; /* Depth of RQ */ unsigned int sc_rq_depth; /* Depth of RQ */
u32 sc_max_requests; /* Forward credits */ __be32 sc_fc_credits; /* Forward credits */
u32 sc_max_requests; /* Max requests */
u32 sc_max_bc_requests;/* Backward credits */ u32 sc_max_bc_requests;/* Backward credits */
int sc_max_req_size; /* Size of each RQ WR buf */ int sc_max_req_size; /* Size of each RQ WR buf */
...@@ -171,7 +171,6 @@ struct svcxprt_rdma { ...@@ -171,7 +171,6 @@ struct svcxprt_rdma {
wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
unsigned long sc_flags; unsigned long sc_flags;
struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */
struct list_head sc_read_complete_q; struct list_head sc_read_complete_q;
struct work_struct sc_work; struct work_struct sc_work;
}; };
...@@ -214,11 +213,7 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); ...@@ -214,11 +213,7 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
__be32, __be64, u32); __be32, __be64, u32);
extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
struct rpcrdma_msg *,
struct rpcrdma_msg *,
enum rpcrdma_proc);
extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
/* svc_rdma_recvfrom.c */ /* svc_rdma_recvfrom.c */
extern int svc_rdma_recvfrom(struct svc_rqst *); extern int svc_rdma_recvfrom(struct svc_rqst *);
......
...@@ -67,6 +67,7 @@ struct svc_xprt { ...@@ -67,6 +67,7 @@ struct svc_xprt {
#define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_CACHE_AUTH 11 /* cache auth info */
#define XPT_LOCAL 12 /* connection from loopback interface */ #define XPT_LOCAL 12 /* connection from loopback interface */
#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
#define XPT_CONG_CTRL 14 /* has congestion control */
struct svc_serv *xpt_server; /* service for transport */ struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */ atomic_t xpt_reserved; /* space on outq that is rsvd */
......
...@@ -32,7 +32,8 @@ ...@@ -32,7 +32,8 @@
#define NFSEXP_ASYNC 0x0010 #define NFSEXP_ASYNC 0x0010
#define NFSEXP_GATHERED_WRITES 0x0020 #define NFSEXP_GATHERED_WRITES 0x0020
#define NFSEXP_NOREADDIRPLUS 0x0040 #define NFSEXP_NOREADDIRPLUS 0x0040
/* 80 100 currently unused */ #define NFSEXP_SECURITY_LABEL 0x0080
/* 0x100 currently unused */
#define NFSEXP_NOHIDE 0x0200 #define NFSEXP_NOHIDE 0x0200
#define NFSEXP_NOSUBTREECHECK 0x0400 #define NFSEXP_NOSUBTREECHECK 0x0400
#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */ #define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */
...@@ -53,7 +54,7 @@ ...@@ -53,7 +54,7 @@
#define NFSEXP_PNFS 0x20000 #define NFSEXP_PNFS 0x20000
/* All flags that we claim to support. (Note we don't support NOACL.) */ /* All flags that we claim to support. (Note we don't support NOACL.) */
#define NFSEXP_ALLFLAGS 0x3FE7F #define NFSEXP_ALLFLAGS 0x3FEFF
/* The flags that may vary depending on security flavor: */ /* The flags that may vary depending on security flavor: */
#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ #define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
......
...@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) ...@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY: case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err; goto auth_err;
rsci->h.expiry_time = seconds_since_boot(); /* Delete the entry from the cache_list and call cache_put */
set_bit(CACHE_NEGATIVE, &rsci->h.flags); sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
if (resv->iov_len + 4 > PAGE_SIZE) if (resv->iov_len + 4 > PAGE_SIZE)
goto drop; goto drop;
svc_putnl(resv, RPC_SUCCESS); svc_putnl(resv, RPC_SUCCESS);
......
...@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) ...@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
cache_purge(cd); cache_purge(cd);
spin_lock(&cache_list_lock); spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock); write_lock(&cd->hash_lock);
if (cd->entries) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
goto out;
}
if (current_detail == cd) if (current_detail == cd)
current_detail = NULL; current_detail = NULL;
list_del_init(&cd->others); list_del_init(&cd->others);
...@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) ...@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
/* module must be being unloaded so its safe to kill the worker */ /* module must be being unloaded so its safe to kill the worker */
cancel_delayed_work_sync(&cache_cleaner); cancel_delayed_work_sync(&cache_cleaner);
} }
return;
out:
printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
} }
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
...@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush); ...@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
void cache_purge(struct cache_detail *detail) void cache_purge(struct cache_detail *detail)
{ {
time_t now = seconds_since_boot(); struct cache_head *ch = NULL;
if (detail->flush_time >= now) struct hlist_head *head = NULL;
now = detail->flush_time + 1; struct hlist_node *tmp = NULL;
/* 'now' is the maximum value any 'last_refresh' can have */ int i = 0;
detail->flush_time = now;
detail->nextcheck = seconds_since_boot(); write_lock(&detail->hash_lock);
cache_flush(); if (!detail->entries) {
write_unlock(&detail->hash_lock);
return;
}
dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
hlist_del_init(&ch->cache_list);
detail->entries--;
set_bit(CACHE_CLEANED, &ch->flags);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(ch, detail);
cache_put(ch, detail);
write_lock(&detail->hash_lock);
}
}
write_unlock(&detail->hash_lock);
} }
EXPORT_SYMBOL_GPL(cache_purge); EXPORT_SYMBOL_GPL(cache_purge);
...@@ -1855,3 +1866,15 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) ...@@ -1855,3 +1866,15 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
} }
EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
{
write_lock(&cd->hash_lock);
if (!hlist_unhashed(&h->cache_list)){
hlist_del_init(&h->cache_list);
cd->entries--;
write_unlock(&cd->hash_lock);
cache_put(h, cd);
} else
write_unlock(&cd->hash_lock);
}
EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
...@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv) ...@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
for (i = 0; i < progp->pg_nvers; i++) { for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL) if (progp->pg_vers[i] == NULL)
continue; continue;
if (progp->pg_vers[i]->vs_hidden == 0) if (!progp->pg_vers[i]->vs_hidden)
return 1; return 1;
} }
} }
...@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net, ...@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
if (vers->vs_hidden) if (vers->vs_hidden)
continue; continue;
/*
* Don't register a UDP port if we need congestion
* control.
*/
if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
continue;
error = __svc_register(net, progp->pg_name, progp->pg_prog, error = __svc_register(net, progp->pg_name, progp->pg_prog,
i, family, proto, port); i, family, proto, port);
...@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) ...@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
!(versp = progp->pg_vers[vers])) !(versp = progp->pg_vers[vers]))
goto err_bad_vers; goto err_bad_vers;
/*
* Some protocol versions (namely NFSv4) require some form of
* congestion control. (See RFC 7530 section 3.1 paragraph 2)
* In other words, UDP is not allowed. We mark those when setting
* up the svc_xprt, and verify that here.
*
* The spec is not very clear about what error should be returned
* when someone tries to access a server that is listening on UDP
* for lower versions. RPC_PROG_MISMATCH seems to be the closest
* fit.
*/
if (versp->vs_need_cong_ctrl &&
!test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
goto err_bad_vers;
procp = versp->vs_proc + proc; procp = versp->vs_proc + proc;
if (proc >= versp->vs_nproc || !procp->pc_func) if (proc >= versp->vs_nproc || !procp->pc_func)
goto err_bad_proc; goto err_bad_proc;
......
...@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) ...@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
&svsk->sk_xprt, serv); &svsk->sk_xprt, serv);
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n"); dprintk("setting up TCP socket for listening\n");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
......
...@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) ...@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
{ {
struct rpc_xprt *xprt = rqst->rq_xprt; struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer; __be32 *p;
int rc; int rc;
/* Space in the send buffer for an RPC/RDMA header is reserved /* Space in the send buffer for an RPC/RDMA header is reserved
* via xprt->tsh_size. * via xprt->tsh_size.
*/ */
headerp->rm_xid = rqst->rq_xid; p = rqst->rq_buffer;
headerp->rm_vers = rpcrdma_version; *p++ = rqst->rq_xid;
headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); *p++ = rpcrdma_version;
headerp->rm_type = rdma_msg; *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
headerp->rm_body.rm_chunks[0] = xdr_zero; *p++ = rdma_msg;
headerp->rm_body.rm_chunks[1] = xdr_zero; *p++ = xdr_zero;
headerp->rm_body.rm_chunks[2] = xdr_zero; *p++ = xdr_zero;
*p = xdr_zero;
#ifdef SVCRDMA_BACKCHANNEL_DEBUG #ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
......
/* /*
* Copyright (c) 2016 Oracle. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
...@@ -47,102 +48,43 @@ ...@@ -47,102 +48,43 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT #define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : __be32 offset into XDR stream
* handle : __be32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
{ {
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; __be32 *next;
while (ch->rc_discrim != xdr_zero) { while (*p++ != xdr_zero) {
if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > next = p + rpcrdma_readchunk_maxsz - 1;
(unsigned long)vaend) { if (next > end)
dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
return NULL; return NULL;
p = next;
} }
ch++; return p;
}
return &ch->rc_position;
} }
/* static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : __be32 RKEY ---+
* length : __be32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
{ {
unsigned long start, end; __be32 *next;
int nchunks;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > while (*p++ != xdr_zero) {
(unsigned long)vaend) { next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); if (next > end)
return NULL; return NULL;
p = next;
} }
nchunks = be32_to_cpu(ary->wc_nchunks); return p;
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
if (nchunks < 0 ||
nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
(start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
}
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return &ary->wc_array[nchunks].wc_target.rs_length;
} }
static __be32 *decode_reply_array(__be32 *va, __be32 *vaend) static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
{ {
unsigned long start, end; __be32 *next;
int nchunks;
struct rpcrdma_write_array *ary = if (*p++ != xdr_zero) {
(struct rpcrdma_write_array *)va; next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
if (next > end)
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
if (nchunks < 0 ||
nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
(start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL; return NULL;
p = next;
} }
return (__be32 *)&ary->wc_array[nchunks]; return p;
} }
/** /**
...@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend) ...@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
*/ */
int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
{ {
struct rpcrdma_msg *rmsgp; __be32 *p, *end, *rdma_argp;
__be32 *va, *vaend; unsigned int hdr_len;
unsigned int len;
u32 hdr_len;
/* Verify that there's enough bytes for header + something */ /* Verify that there's enough bytes for header + something */
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) { if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
dprintk("svcrdma: header too short = %d\n", goto out_short;
rq_arg->len);
return -EINVAL;
}
rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base; rdma_argp = rq_arg->head[0].iov_base;
if (rmsgp->rm_vers != rpcrdma_version) { if (*(rdma_argp + 1) != rpcrdma_version)
dprintk("%s: bad version %u\n", __func__, goto out_version;
be32_to_cpu(rmsgp->rm_vers));
return -EPROTONOSUPPORT;
}
switch (be32_to_cpu(rmsgp->rm_type)) { switch (*(rdma_argp + 3)) {
case RDMA_MSG: case rdma_msg:
case RDMA_NOMSG: case rdma_nomsg:
break; break;
case RDMA_DONE: case rdma_done:
/* Just drop it */ goto out_drop;
dprintk("svcrdma: dropping RDMA_DONE message\n");
return 0;
case RDMA_ERROR: case rdma_error:
/* Possible if this is a backchannel reply. goto out_drop;
* XXX: We should cancel this XID, though.
*/
dprintk("svcrdma: dropping RDMA_ERROR message\n");
return 0;
case RDMA_MSGP:
/* Pull in the extra for the padded case, bump our pointer */
rmsgp->rm_body.rm_padded.rm_align =
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rq_arg->head[0].iov_base = va;
len = (u32)((unsigned long)va - (unsigned long)rmsgp);
rq_arg->head[0].iov_len -= len;
if (len > rq_arg->len)
return -EINVAL;
return len;
default: default:
dprintk("svcrdma: bad rdma procedure (%u)\n", goto out_proc;
be32_to_cpu(rmsgp->rm_type));
return -EINVAL;
} }
/* The chunk list may contain either a read chunk list or a write end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
* chunk list and a reply chunk list. p = xdr_check_read_list(rdma_argp + 4, end);
*/ if (!p)
va = &rmsgp->rm_body.rm_chunks[0]; goto out_inval;
vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len); p = xdr_check_write_list(p, end);
va = decode_read_list(va, vaend); if (!p)
if (!va) { goto out_inval;
dprintk("svcrdma: failed to decode read list\n"); p = xdr_check_reply_chunk(p, end);
return -EINVAL; if (!p)
} goto out_inval;
va = decode_write_list(va, vaend); if (p > end)
if (!va) { goto out_inval;
dprintk("svcrdma: failed to decode write list\n");
rq_arg->head[0].iov_base = p;
hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
rq_arg->head[0].iov_len -= hdr_len;
return hdr_len;
out_short:
dprintk("svcrdma: header too short = %d\n", rq_arg->len);
return -EINVAL; return -EINVAL;
}
va = decode_reply_array(va, vaend); out_version:
if (!va) { dprintk("svcrdma: bad xprt version: %u\n",
dprintk("svcrdma: failed to decode reply chunk\n"); be32_to_cpup(rdma_argp + 1));
return -EPROTONOSUPPORT;
out_drop:
dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
return 0;
out_proc:
dprintk("svcrdma: bad rdma procedure (%u)\n",
be32_to_cpup(rdma_argp + 3));
return -EINVAL; return -EINVAL;
}
rq_arg->head[0].iov_base = va; out_inval:
hdr_len = (unsigned long)va - (unsigned long)rmsgp; dprintk("svcrdma: failed to parse transport header\n");
rq_arg->head[0].iov_len -= hdr_len; return -EINVAL;
return hdr_len;
} }
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
...@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, ...@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
*va++ = rmsgp->rm_xid; *va++ = rmsgp->rm_xid;
*va++ = rmsgp->rm_vers; *va++ = rmsgp->rm_vers;
*va++ = cpu_to_be32(xprt->sc_max_requests); *va++ = xprt->sc_fc_credits;
*va++ = rdma_error; *va++ = rdma_error;
*va++ = cpu_to_be32(err); *va++ = cpu_to_be32(err);
if (err == ERR_VERS) { if (err == ERR_VERS) {
...@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, ...@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
return (int)((unsigned long)va - (unsigned long)startp); return (int)((unsigned long)va - (unsigned long)startp);
} }
int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) /**
* svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
* @rdma_resp: buffer containing Reply transport header
*
* Returns length of transport header, in bytes.
*/
unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
{ {
struct rpcrdma_write_array *wr_ary; unsigned int nsegs;
__be32 *p;
/* There is no read-list in a reply */ p = rdma_resp;
/* skip write list */ /* RPC-over-RDMA V1 replies never have a Read list. */
wr_ary = (struct rpcrdma_write_array *) p += rpcrdma_fixed_maxsz + 1;
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim) /* Skip Write list. */
wr_ary = (struct rpcrdma_write_array *) while (*p++ != xdr_zero) {
&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)]. nsegs = be32_to_cpup(p++);
wc_target.rs_length; p += nsegs * rpcrdma_segment_maxsz;
else }
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks; /* Skip Reply chunk. */
if (*p++ != xdr_zero) {
/* skip reply array */ nsegs = be32_to_cpup(p++);
if (wr_ary->wc_discrim) p += nsegs * rpcrdma_segment_maxsz;
wr_ary = (struct rpcrdma_write_array *) }
&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
else return (unsigned long)p - (unsigned long)rdma_resp;
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
return (unsigned long) wr_ary - (unsigned long) rmsgp;
} }
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
...@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, ...@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
seg->rs_offset = rs_offset; seg->rs_offset = rs_offset;
seg->rs_length = cpu_to_be32(write_len); seg->rs_length = cpu_to_be32(write_len);
} }
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = rdma_argp->rm_xid;
rdma_resp->rm_vers = rdma_argp->rm_vers;
rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
rdma_resp->rm_type = cpu_to_be32(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
}
...@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dprintk("svcrdma: rqstp=%p\n", rqstp); dprintk("svcrdma: rqstp=%p\n", rqstp);
spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); spin_lock(&rdma_xprt->sc_rq_dto_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) { if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
struct svc_rdma_op_ctxt, struct svc_rdma_op_ctxt, list);
dto_q); list_del(&ctxt->list);
list_del_init(&ctxt->dto_q); spin_unlock(&rdma_xprt->sc_rq_dto_lock);
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
rdma_read_complete(rqstp, ctxt); rdma_read_complete(rqstp, ctxt);
goto complete; goto complete;
} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
struct svc_rdma_op_ctxt, struct svc_rdma_op_ctxt, list);
dto_q); list_del(&ctxt->list);
list_del_init(&ctxt->dto_q);
} else { } else {
atomic_inc(&rdma_stat_rq_starve); atomic_inc(&rdma_stat_rq_starve);
clear_bit(XPT_DATA, &xprt->xpt_flags); clear_bit(XPT_DATA, &xprt->xpt_flags);
ctxt = NULL; ctxt = NULL;
} }
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); spin_unlock(&rdma_xprt->sc_rq_dto_lock);
if (!ctxt) { if (!ctxt) {
/* This is the EAGAIN path. The svc_recv routine will /* This is the EAGAIN path. The svc_recv routine will
* return -EAGAIN, the nfsd thread will go to call into * return -EAGAIN, the nfsd thread will go to call into
......
...@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma, ...@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the SGE for the RPCRDMA Header */ /* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].length =
svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
ctxt->sge[0].addr = ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0, ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
ctxt->sge[0].length, DMA_TO_DEVICE); ctxt->sge[0].length, DMA_TO_DEVICE);
...@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
struct rpcrdma_msg *rdma_argp; struct rpcrdma_msg *rdma_argp;
struct rpcrdma_msg *rdma_resp; struct rpcrdma_msg *rdma_resp;
struct rpcrdma_write_array *wr_ary, *rp_ary; struct rpcrdma_write_array *wr_ary, *rp_ary;
enum rpcrdma_proc reply_type;
int ret; int ret;
int inline_bytes; int inline_bytes;
struct page *res_page; struct page *res_page;
struct svc_rdma_req_map *vec; struct svc_rdma_req_map *vec;
u32 inv_rkey; u32 inv_rkey;
__be32 *p;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
...@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ...@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!res_page) if (!res_page)
goto err0; goto err0;
rdma_resp = page_address(res_page); rdma_resp = page_address(res_page);
if (rp_ary)
reply_type = RDMA_NOMSG; p = &rdma_resp->rm_xid;
else *p++ = rdma_argp->rm_xid;
reply_type = RDMA_MSG; *p++ = rdma_argp->rm_vers;
svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, *p++ = rdma->sc_fc_credits;
rdma_resp, reply_type); *p++ = rp_ary ? rdma_nomsg : rdma_msg;
/* Start with empty chunks */
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
/* Send any write-chunk data and build resp write-list */ /* Send any write-chunk data and build resp write-list */
if (wr_ary) { if (wr_ary) {
......
...@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, ...@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
ctxt = kmalloc(sizeof(*ctxt), flags); ctxt = kmalloc(sizeof(*ctxt), flags);
if (ctxt) { if (ctxt) {
ctxt->xprt = xprt; ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->free); INIT_LIST_HEAD(&ctxt->list);
INIT_LIST_HEAD(&ctxt->dto_q);
} }
return ctxt; return ctxt;
} }
...@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) ...@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
dprintk("svcrdma: No memory for RDMA ctxt\n"); dprintk("svcrdma: No memory for RDMA ctxt\n");
return false; return false;
} }
list_add(&ctxt->free, &xprt->sc_ctxts); list_add(&ctxt->list, &xprt->sc_ctxts);
} }
return true; return true;
} }
...@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) ...@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{ {
struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL;
spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used++; xprt->sc_ctxt_used++;
if (list_empty(&xprt->sc_ctxts)) if (list_empty(&xprt->sc_ctxts))
goto out_empty; goto out_empty;
ctxt = list_first_entry(&xprt->sc_ctxts, ctxt = list_first_entry(&xprt->sc_ctxts,
struct svc_rdma_op_ctxt, free); struct svc_rdma_op_ctxt, list);
list_del_init(&ctxt->free); list_del(&ctxt->list);
spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock(&xprt->sc_ctxt_lock);
out: out:
ctxt->count = 0; ctxt->count = 0;
...@@ -209,15 +208,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) ...@@ -209,15 +208,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
/* Either pre-allocation missed the mark, or send /* Either pre-allocation missed the mark, or send
* queue accounting is broken. * queue accounting is broken.
*/ */
spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock(&xprt->sc_ctxt_lock);
ctxt = alloc_ctxt(xprt, GFP_NOIO); ctxt = alloc_ctxt(xprt, GFP_NOIO);
if (ctxt) if (ctxt)
goto out; goto out;
spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--; xprt->sc_ctxt_used--;
spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock(&xprt->sc_ctxt_lock);
WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n"); WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
return NULL; return NULL;
} }
...@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) ...@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
for (i = 0; i < ctxt->count; i++) for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]); put_page(ctxt->pages[i]);
spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--; xprt->sc_ctxt_used--;
list_add(&ctxt->free, &xprt->sc_ctxts); list_add(&ctxt->list, &xprt->sc_ctxts);
spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock(&xprt->sc_ctxt_lock);
} }
static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
...@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) ...@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_op_ctxt *ctxt;
ctxt = list_first_entry(&xprt->sc_ctxts, ctxt = list_first_entry(&xprt->sc_ctxts,
struct svc_rdma_op_ctxt, free); struct svc_rdma_op_ctxt, list);
list_del(&ctxt->free); list_del(&ctxt->list);
kfree(ctxt); kfree(ctxt);
} }
} }
...@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
/* All wc fields are now known to be valid */ /* All wc fields are now known to be valid */
ctxt->byte_len = wc->byte_len; ctxt->byte_len = wc->byte_len;
spin_lock(&xprt->sc_rq_dto_lock); spin_lock(&xprt->sc_rq_dto_lock);
list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
spin_unlock(&xprt->sc_rq_dto_lock); spin_unlock(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
...@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc) ...@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
read_hdr = ctxt->read_hdr; read_hdr = ctxt->read_hdr;
spin_lock(&xprt->sc_rq_dto_lock); spin_lock(&xprt->sc_rq_dto_lock);
list_add_tail(&read_hdr->dto_q, list_add_tail(&read_hdr->list,
&xprt->sc_read_complete_q); &xprt->sc_read_complete_q);
spin_unlock(&xprt->sc_rq_dto_lock); spin_unlock(&xprt->sc_rq_dto_lock);
...@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, ...@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return NULL; return NULL;
svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
...@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, ...@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_ctxt_lock); spin_lock_init(&cma_xprt->sc_ctxt_lock);
spin_lock_init(&cma_xprt->sc_map_lock); spin_lock_init(&cma_xprt->sc_map_lock);
/*
* Note that this implies that the underlying transport support
* has some form of congestion control (see RFC 7530 section 3.1
* paragraph 2). For now, we assume that all supported RDMA
* transports are suitable here.
*/
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
if (listener) if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
...@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) ...@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
{ {
struct svc_rdma_fastreg_mr *frmr = NULL; struct svc_rdma_fastreg_mr *frmr = NULL;
spin_lock_bh(&rdma->sc_frmr_q_lock); spin_lock(&rdma->sc_frmr_q_lock);
if (!list_empty(&rdma->sc_frmr_q)) { if (!list_empty(&rdma->sc_frmr_q)) {
frmr = list_entry(rdma->sc_frmr_q.next, frmr = list_entry(rdma->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list); struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list); list_del_init(&frmr->frmr_list);
frmr->sg_nents = 0; frmr->sg_nents = 0;
} }
spin_unlock_bh(&rdma->sc_frmr_q_lock); spin_unlock(&rdma->sc_frmr_q_lock);
if (frmr) if (frmr)
return frmr; return frmr;
...@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, ...@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) { if (frmr) {
ib_dma_unmap_sg(rdma->sc_cm_id->device, ib_dma_unmap_sg(rdma->sc_cm_id->device,
frmr->sg, frmr->sg_nents, frmr->direction); frmr->sg, frmr->sg_nents, frmr->direction);
spin_lock_bh(&rdma->sc_frmr_q_lock); spin_lock(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q); list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
spin_unlock_bh(&rdma->sc_frmr_q_lock); spin_unlock(&rdma->sc_frmr_q_lock);
} }
} }
...@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_requests); svcrdma_max_requests);
newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_bc_requests); svcrdma_max_bc_requests);
newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_rq_depth = newxprt->sc_max_requests +
...@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ...@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout; goto errout;
} }
newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
0, IB_POLL_SOFTIRQ); 0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_sq_cq)) { if (IS_ERR(newxprt->sc_sq_cq)) {
dprintk("svcrdma: error creating SQ CQ for connect request\n"); dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout; goto errout;
} }
newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth, newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
0, IB_POLL_SOFTIRQ); 0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_rq_cq)) { if (IS_ERR(newxprt->sc_rq_cq)) {
dprintk("svcrdma: error creating RQ CQ for connect request\n"); dprintk("svcrdma: error creating RQ CQ for connect request\n");
goto errout; goto errout;
...@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work) ...@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work)
*/ */
while (!list_empty(&rdma->sc_read_complete_q)) { while (!list_empty(&rdma->sc_read_complete_q)) {
struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_read_complete_q.next, ctxt = list_first_entry(&rdma->sc_read_complete_q,
struct svc_rdma_op_ctxt, struct svc_rdma_op_ctxt, list);
dto_q); list_del(&ctxt->list);
list_del_init(&ctxt->dto_q);
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
} }
/* Destroy queued, but not processed recv completions */ /* Destroy queued, but not processed recv completions */
while (!list_empty(&rdma->sc_rq_dto_q)) { while (!list_empty(&rdma->sc_rq_dto_q)) {
struct svc_rdma_op_ctxt *ctxt; struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_rq_dto_q.next, ctxt = list_first_entry(&rdma->sc_rq_dto_q,
struct svc_rdma_op_ctxt, struct svc_rdma_op_ctxt, list);
dto_q); list_del(&ctxt->list);
list_del_init(&ctxt->dto_q);
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment