Commit 8313064c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.11' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "The nfsd update this round is mainly a lot of miscellaneous cleanups
  and bugfixes.

  A couple changes could theoretically break working setups on upgrade.
  I don't expect complaints in practice, but they seem worth calling out
  just in case:

   - NFS security labels are now off by default; a new security_label
     export flag reenables it per export. But, having them on by default
     is a disaster, as it generally only makes sense if all your clients
     and servers have similar enough selinux policies. Thanks to Jason
     Tibbitts for pointing this out.

   - NFSv4/UDP support is off. It was never really supported, and the
     spec explicitly forbids it. We only ever left it on out of
     laziness; thanks to Jeff Layton for finally fixing that"

* tag 'nfsd-4.11' of git://linux-nfs.org/~bfields/linux: (34 commits)
  nfsd: Fix display of the version string
  nfsd: fix configuration of supported minor versions
  sunrpc: don't register UDP port with rpcbind when version needs congestion control
  nfs/nfsd/sunrpc: enforce transport requirements for NFSv4
  sunrpc: flag transports as having congestion control
  sunrpc: turn bitfield flags in svc_version into bools
  nfsd: remove superfluous KERN_INFO
  nfsd: special case truncates some more
  nfsd: minor nfsd_setattr cleanup
  NFSD: Reserve adequate space for LOCKT operation
  NFSD: Get response size before operation for all RPCs
  nfsd/callback: Drop a useless data copy when comparing sessionid
  nfsd/callback: skip the callback tag
  nfsd/callback: Cleanup callback cred on shutdown
  nfsd/idmap: return nfserr_inval for 0-length names
  SUNRPC/Cache: Always treat the invalid cache as unexpired
  SUNRPC: Drop all entries from cache_detail when cache_purge()
  svcrdma: Poll CQs in "workqueue" mode
  svcrdma: Combine list fields in struct svc_rdma_op_ctxt
  svcrdma: Remove unused sc_dto_q field
  ...
parents b2deee2d ff7d1179
......@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6);
}
......
......@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
.vs_hidden = 1,
.vs_hidden = true,
.vs_need_cong_ctrl = true,
};
struct svc_version nfs4_callback_version4 = {
......@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
.vs_hidden = 1,
.vs_hidden = true,
.vs_need_cong_ctrl = true,
};
......@@ -1102,6 +1102,7 @@ static struct flags {
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
{ NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}}
};
......
......@@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
.vs_hidden = 0,
};
......@@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
.vs_hidden = 0,
};
......@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh, NULL,
argp->offset,
rqstp->rq_vec, argp->vlen,
&cnt,
&resp->committed);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, argp->vlen,
&cnt, resp->committed);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
......
......@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, length + 4);
if (unlikely(p == NULL))
goto out_overflow;
p += XDR_QUADLEN(length);
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
......@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
struct nfs4_sessionid id;
int status;
int status = -ESERVERFAULT;
__be32 *p;
u32 dummy;
status = -ESERVERFAULT;
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
......@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
if (memcmp(id.data, session->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN) != 0) {
if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
......@@ -753,6 +750,14 @@ int set_callback_cred(void)
return 0;
}
void cleanup_callback_cred(void)
{
if (callback_cred) {
put_rpccred(callback_cred);
callback_cred = NULL;
}
}
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
......
......@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
if (name == NULL || namelen == 0)
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
*uid = make_kuid(&init_user_ns, id);
if (!uid_valid(*uid))
......@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
if (name == NULL || namelen == 0)
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
*gid = make_kgid(&init_user_ns, id);
if (!gid_valid(*gid))
......
......@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable)
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
struct svc_export *exp = cstate->current_fh.fh_export;
if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp;
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp;
if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
!(exp->ex_flags & NFSEXP_SECURITY_LABEL))
return nfserr_attrnotsupp;
if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
......@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
&write->wr_how_written);
write->wr_how_written);
fput(filp);
write->wr_bytes_written = cnt;
......@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
}
static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
/* ac_supported, ac_resp_access */
return (op_encode_hdr_size + 2)* sizeof(__be32);
}
static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
......@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret;
}
static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
}
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
......@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
}
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
......@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
}
static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
* sizeof(__be32);
}
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
(4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
}
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
......@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
}
#ifdef CONFIG_NFSD_PNFS
static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = 0, rlen = 0;
maxcount = svc_max_payload(rqstp);
rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
return (op_encode_hdr_size +
1 /* gd_layout_type*/ +
XDR_QUADLEN(rlen) +
2 /* gd_notify_types */) * sizeof(__be32);
}
/*
* At this stage we don't really know what layout driver will handle the request,
* so we need to define an arbitrary upper bound here.
......@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
}
#endif /* CONFIG_NFSD_PNFS */
static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 3) * sizeof(__be32);
}
static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
......@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
......@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
......@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUP",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUPP",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
......@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
......@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
......@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
......@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_TEST_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_test_stateid,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_TEST_STATEID",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
},
[OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid,
......@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
},
[OP_LAYOUTGET] = {
.op_func = (nfsd4op_func)nfsd4_layoutget,
......@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
},
};
......@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
struct nfsd4_operation *opdesc;
nfsd4op_rsize estimator;
if (op->opnum == OP_ILLEGAL)
return op_encode_hdr_size * sizeof(__be32);
opdesc = OPDESC(op);
estimator = opdesc->op_rsize_bop;
return estimator ? estimator(rqstp, op) : PAGE_SIZE;
BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
return OPDESC(op)->op_rsize_bop(rqstp, op);
}
void warn_on_nonidempotent_op(struct nfsd4_op *op)
......@@ -2476,12 +2537,13 @@ static struct svc_procedure nfsd_procedures4[2] = {
};
struct svc_version nfsd_version4 = {
.vs_vers = 4,
.vs_nproc = 2,
.vs_proc = nfsd_procedures4,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS4_SVC_XDRSIZE,
.vs_rpcb_optnl = 1,
.vs_vers = 4,
.vs_nproc = 2,
.vs_proc = nfsd_procedures4,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS4_SVC_XDRSIZE,
.vs_rpcb_optnl = true,
.vs_need_cong_ctrl = true,
};
/*
......
......@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
dprintk("NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
......@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
ret = set_callback_cred();
if (ret)
return -ENOMEM;
return ret;
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) {
ret = -ENOMEM;
goto out_recovery;
goto out_cleanup_cred;
}
ret = nfsd4_create_callback_queue();
if (ret)
goto out_free_laundry;
set_max_delegations();
return 0;
out_free_laundry:
destroy_workqueue(laundry_wq);
out_recovery:
out_cleanup_cred:
cleanup_callback_cred();
return ret;
}
......@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
{
destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
cleanup_callback_cred();
}
static void
......
......@@ -58,7 +58,7 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
u32 nfsd_suppattrs[3][3] = {
const u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2},
......@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
READ_BUF(16);
p = xdr_decode_hyper(p, &write->wr_offset);
write->wr_stable_how = be32_to_cpup(p++);
if (write->wr_stable_how > 2)
if (write->wr_stable_how > NFS_FILE_SYNC)
goto xdr_error;
write->wr_buflen = be32_to_cpup(p++);
......@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
} else
max_reply += nfsd4_max_reply(argp->rqstp, op);
/*
* OP_LOCK may return a conflicting lock. (Special case
* because it will just skip encoding this if it runs
* out of xdr buffer space, and it is the only operation
* that behaves this way.)
* OP_LOCK and OP_LOCKT may return a conflicting lock.
* (Special case because it will just skip encoding this
* if it runs out of xdr buffer space, and it is the only
* operation that behaves this way.)
*/
if (op->opnum == OP_LOCK)
if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) {
......@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
DECODE_TAIL;
}
static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
struct svc_export *exp)
{
if (IS_I_VERSION(inode)) {
if (exp->ex_flags & NFSEXP_V4ROOT) {
*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
*p++ = 0;
} else if (IS_I_VERSION(inode)) {
p = xdr_encode_hyper(p, inode->i_version);
} else {
*p++ = cpu_to_be32(stat->ctime.tv_sec);
......@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
err = security_inode_getsecctx(d_inode(dentry),
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen);
else
err = -EOPNOTSUPP;
contextsupport = (err == 0);
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
......@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
p = encode_change(p, &stat, d_inode(dentry));
p = encode_change(p, &stat, d_inode(dentry), exp);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
p = xdr_reserve_space(xdr, 8);
......
......@@ -536,6 +536,19 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
return rv;
}
static ssize_t
nfsd_print_version_support(char *buf, int remaining, const char *sep,
unsigned vers, unsigned minor)
{
const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
bool supported = !!nfsd_vers(vers, NFSD_TEST);
if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
supported = false;
return snprintf(buf, remaining, format, sep,
supported ? '+' : '-', vers, minor);
}
static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
......@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, vers, size);
if (len <= 0) return -EINVAL;
do {
enum vers_op cmd;
sign = *vers;
if (sign == '+' || sign == '-')
num = simple_strtol((vers+1), &minorp, 0);
......@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (*minorp == '.') {
if (num != 4)
return -EINVAL;
minor = simple_strtoul(minorp+1, NULL, 0);
if (minor == 0)
return -EINVAL;
if (nfsd_minorversion(minor, sign == '-' ?
NFSD_CLEAR : NFSD_SET) < 0)
if (kstrtouint(minorp+1, 0, &minor) < 0)
return -EINVAL;
goto next;
}
} else
minor = 0;
cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) {
case 2:
case 3:
case 4:
nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
nfsd_vers(num, cmd);
break;
case 4:
if (nfsd_minorversion(minor, cmd) >= 0)
break;
default:
return -EINVAL;
}
next:
vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0);
/* If all get turned off, turn them back on, as
......@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = 0;
sep = "";
remaining = SIMPLE_TRANSACTION_LIMIT;
for (num=2 ; num <= 4 ; num++)
if (nfsd_vers(num, NFSD_AVAIL)) {
len = snprintf(buf, remaining, "%s%c%d", sep,
nfsd_vers(num, NFSD_TEST)?'+':'-',
num);
sep = " ";
if (len >= remaining)
break;
remaining -= len;
buf += len;
tlen += len;
}
if (nfsd_vers(4, NFSD_AVAIL))
for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
minor++) {
len = snprintf(buf, remaining, " %c4.%u",
(nfsd_vers(4, NFSD_TEST) &&
nfsd_minorversion(minor, NFSD_TEST)) ?
'+' : '-',
minor);
for (num=2 ; num <= 4 ; num++) {
if (!nfsd_vers(num, NFSD_AVAIL))
continue;
minor = 0;
do {
len = nfsd_print_version_support(buf, remaining,
sep, num, minor);
if (len >= remaining)
break;
goto out;
remaining -= len;
buf += len;
tlen += len;
}
minor++;
sep = " ";
} while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
}
out:
len = snprintf(buf, remaining, "\n");
if (len >= remaining)
return -EINVAL;
......
......@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void);
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
extern u32 nfsd_suppattrs[3][3];
extern const u32 nfsd_suppattrs[3][3];
static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2]));
}
static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
{
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
}
......
......@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
struct nfsd_attrstat *resp)
{
__be32 nfserr;
int stable = 1;
unsigned long cnt = argp->len;
dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
rqstp->rq_vec, argp->vlen,
&cnt,
&stable);
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp);
}
......
......@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
return 0;
}
static void
nfsd_adjust_nfsd_versions4(void)
{
unsigned i;
for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
if (nfsd_supported_minorversions[i])
return;
}
nfsd_vers(4, NFSD_CLEAR);
}
int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
......@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
switch(change) {
case NFSD_SET:
nfsd_supported_minorversions[minorversion] = true;
nfsd_vers(4, NFSD_SET);
break;
case NFSD_CLEAR:
nfsd_supported_minorversions[minorversion] = false;
nfsd_adjust_nfsd_versions4();
break;
case NFSD_TEST:
return nfsd_supported_minorversions[minorversion];
......@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
......
......@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
......
......@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
int size_change = 0;
bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
......@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
goto out;
return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);
goto out;
}
dentry = fhp->fh_dentry;
......@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
goto out;
return 0;
nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode->i_ctime.tv_sec)
return nfserr_notsync;
/*
* The size case is special, it changes the file in addition to the
* attributes.
* attributes, and file systems don't expect it to be mixed with
* "random" attribute changes. We thus split out the size change
* into a separate call to ->setattr, and do the rest as a separate
* setattr call.
*/
if (iap->ia_valid & ATTR_SIZE) {
if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
goto out;
size_change = 1;
return err;
}
fh_lock(fhp);
if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
......@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
if (iap->ia_size != i_size_read(inode))
iap->ia_valid |= ATTR_MTIME;
}
struct iattr size_attr = {
.ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
.ia_size = iap->ia_size,
};
iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, &size_attr, NULL);
if (host_err)
goto out_unlock;
iap->ia_valid &= ~ATTR_SIZE;
if (check_guard && guardtime != inode->i_ctime.tv_sec) {
err = nfserr_notsync;
goto out_put_write_access;
/*
* Avoid the additional setattr call below if the only other
* attribute that the client sends is the mtime, as we update
* it as part of the size change above.
*/
if ((iap->ia_valid & ~ATTR_MTIME) == 0)
goto out_unlock;
}
fh_lock(fhp);
iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
fh_unlock(fhp);
err = nfserrno(host_err);
out_put_write_access:
out_unlock:
fh_unlock(fhp);
if (size_change)
put_write_access(inode);
if (!err)
err = nfserrno(commit_metadata(fhp));
out:
return err;
if (!host_err)
host_err = commit_metadata(fhp);
return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
......@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
unsigned long *cnt, int *stablep)
unsigned long *cnt, int stable)
{
struct svc_export *exp;
struct inode *inode;
mm_segment_t oldfs;
__be32 err = 0;
int host_err;
int stable = *stablep;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
......@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
*/
current->flags |= PF_LESS_THROTTLE;
inode = file_inode(file);
exp = fhp->fh_export;
exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
stable = 0;
stable = NFS_UNSTABLE;
if (stable && !use_wgather)
flags |= RWF_SYNC;
......@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
int *stablep)
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{
__be32 err = 0;
struct file *file = NULL;
__be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen);
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
stablep);
trace_write_io_done(rqstp, fhp, offset, vlen);
} else {
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
if (err)
goto out;
trace_write_opened(rqstp, fhp, offset, vlen);
if (cnt)
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file);
}
trace_write_opened(rqstp, fhp, offset, vlen);
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
trace_write_io_done(rqstp, fhp, offset, vlen);
fput(file);
out:
trace_write_done(rqstp, fhp, offset, vlen);
return err;
......
......@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
int *stablep);
int stable);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
......
......@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
kref_put(&h->ref, cd->cache_put);
}
static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
{
if (!test_bit(CACHE_VALID, &h->flags))
return false;
return (h->expiry_time < seconds_since_boot()) ||
(detail->flush_time >= h->last_refresh);
}
......@@ -227,6 +230,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
umode_t, struct cache_detail *);
extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
/* Must store cache_detail in seq_file->private if using next three functions */
extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
......
......@@ -109,6 +109,15 @@ struct rpcrdma_msg {
} rm_body;
};
/*
* XDR sizes, in quads
*/
enum {
rpcrdma_fixed_maxsz = 4,
rpcrdma_segment_maxsz = 4,
rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz,
};
/*
* Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
*/
......
......@@ -400,10 +400,14 @@ struct svc_version {
struct svc_procedure * vs_proc; /* per-procedure info */
u32 vs_xdrsize; /* xdrsize needed for this version */
unsigned int vs_hidden : 1, /* Don't register with portmapper.
* Only used for nfsacl so far. */
vs_rpcb_optnl:1;/* Don't care the result of register.
* Only used for nfsv4. */
/* Don't register with rpcbind */
bool vs_hidden;
/* Don't care if the rpcbind registration fails */
bool vs_rpcb_optnl;
/* Need xprt with congestion control */
bool vs_need_cong_ctrl;
/* Override dispatch function (e.g. when caching replies).
* A return value of 0 means drop the request.
......
......@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
* completes.
*/
struct svc_rdma_op_ctxt {
struct list_head free;
struct list_head list;
struct svc_rdma_op_ctxt *read_hdr;
struct svc_rdma_fastreg_mr *frmr;
int hdr_count;
......@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt {
struct ib_cqe cqe;
struct ib_cqe reg_cqe;
struct ib_cqe inv_cqe;
struct list_head dto_q;
u32 byte_len;
u32 position;
struct svcxprt_rdma *xprt;
......@@ -141,7 +140,8 @@ struct svcxprt_rdma {
atomic_t sc_sq_avail; /* SQEs ready to be consumed */
unsigned int sc_sq_depth; /* Depth of SQ */
unsigned int sc_rq_depth; /* Depth of RQ */
u32 sc_max_requests; /* Forward credits */
__be32 sc_fc_credits; /* Forward credits */
u32 sc_max_requests; /* Max requests */
u32 sc_max_bc_requests;/* Backward credits */
int sc_max_req_size; /* Size of each RQ WR buf */
......@@ -171,7 +171,6 @@ struct svcxprt_rdma {
wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
unsigned long sc_flags;
struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */
struct list_head sc_read_complete_q;
struct work_struct sc_work;
};
......@@ -214,11 +213,7 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
__be32, __be64, u32);
extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
struct rpcrdma_msg *,
struct rpcrdma_msg *,
enum rpcrdma_proc);
extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
/* svc_rdma_recvfrom.c */
extern int svc_rdma_recvfrom(struct svc_rqst *);
......
......@@ -67,6 +67,7 @@ struct svc_xprt {
#define XPT_CACHE_AUTH 11 /* cache auth info */
#define XPT_LOCAL 12 /* connection from loopback interface */
#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
#define XPT_CONG_CTRL 14 /* has congestion control */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
......
......@@ -32,7 +32,8 @@
#define NFSEXP_ASYNC 0x0010
#define NFSEXP_GATHERED_WRITES 0x0020
#define NFSEXP_NOREADDIRPLUS 0x0040
/* 80 100 currently unused */
#define NFSEXP_SECURITY_LABEL 0x0080
/* 0x100 currently unused */
#define NFSEXP_NOHIDE 0x0200
#define NFSEXP_NOSUBTREECHECK 0x0400
#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */
......@@ -53,7 +54,7 @@
#define NFSEXP_PNFS 0x20000
/* All flags that we claim to support. (Note we don't support NOACL.) */
#define NFSEXP_ALLFLAGS 0x3FE7F
#define NFSEXP_ALLFLAGS 0x3FEFF
/* The flags that may vary depending on security flavor: */
#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
......
......@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
rsci->h.expiry_time = seconds_since_boot();
set_bit(CACHE_NEGATIVE, &rsci->h.flags);
/* Delete the entry from the cache_list and call cache_put */
sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
svc_putnl(resv, RPC_SUCCESS);
......
......@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
cache_purge(cd);
spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock);
if (cd->entries) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
goto out;
}
if (current_detail == cd)
current_detail = NULL;
list_del_init(&cd->others);
......@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
/* module must be being unloaded so its safe to kill the worker */
cancel_delayed_work_sync(&cache_cleaner);
}
return;
out:
printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
......@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
time_t now = seconds_since_boot();
if (detail->flush_time >= now)
now = detail->flush_time + 1;
/* 'now' is the maximum value any 'last_refresh' can have */
detail->flush_time = now;
detail->nextcheck = seconds_since_boot();
cache_flush();
struct cache_head *ch = NULL;
struct hlist_head *head = NULL;
struct hlist_node *tmp = NULL;
int i = 0;
write_lock(&detail->hash_lock);
if (!detail->entries) {
write_unlock(&detail->hash_lock);
return;
}
dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
hlist_del_init(&ch->cache_list);
detail->entries--;
set_bit(CACHE_CLEANED, &ch->flags);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(ch, detail);
cache_put(ch, detail);
write_lock(&detail->hash_lock);
}
}
write_unlock(&detail->hash_lock);
}
EXPORT_SYMBOL_GPL(cache_purge);
......@@ -1855,3 +1866,15 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
}
EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
{
write_lock(&cd->hash_lock);
if (!hlist_unhashed(&h->cache_list)){
hlist_del_init(&h->cache_list);
cd->entries--;
write_unlock(&cd->hash_lock);
cache_put(h, cd);
} else
write_unlock(&cd->hash_lock);
}
EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
......@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
if (progp->pg_vers[i]->vs_hidden == 0)
if (!progp->pg_vers[i]->vs_hidden)
return 1;
}
}
......@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
if (vers->vs_hidden)
continue;
/*
* Don't register a UDP port if we need congestion
* control.
*/
if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
continue;
error = __svc_register(net, progp->pg_name, progp->pg_prog,
i, family, proto, port);
......@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
!(versp = progp->pg_vers[vers]))
goto err_bad_vers;
/*
* Some protocol versions (namely NFSv4) require some form of
* congestion control. (See RFC 7530 section 3.1 paragraph 2)
* In other words, UDP is not allowed. We mark those when setting
* up the svc_xprt, and verify that here.
*
* The spec is not very clear about what error should be returned
* when someone tries to access a server that is listening on UDP
* for lower versions. RPC_PROG_MISMATCH seems to be the closest
* fit.
*/
if (versp->vs_need_cong_ctrl &&
!test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
goto err_bad_vers;
procp = versp->vs_proc + proc;
if (proc >= versp->vs_nproc || !procp->pc_func)
goto err_bad_proc;
......
......@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
&svsk->sk_xprt, serv);
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
......
......@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
__be32 *p;
int rc;
/* Space in the send buffer for an RPC/RDMA header is reserved
* via xprt->tsh_size.
*/
headerp->rm_xid = rqst->rq_xid;
headerp->rm_vers = rpcrdma_version;
headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
headerp->rm_type = rdma_msg;
headerp->rm_body.rm_chunks[0] = xdr_zero;
headerp->rm_body.rm_chunks[1] = xdr_zero;
headerp->rm_body.rm_chunks[2] = xdr_zero;
p = rqst->rq_buffer;
*p++ = rqst->rq_xid;
*p++ = rpcrdma_version;
*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
*p++ = rdma_msg;
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
......
/*
* Copyright (c) 2016 Oracle. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -47,102 +48,43 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : __be32 offset into XDR stream
* handle : __be32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
__be32 *next;
while (ch->rc_discrim != xdr_zero) {
if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
(unsigned long)vaend) {
dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
while (*p++ != xdr_zero) {
next = p + rpcrdma_readchunk_maxsz - 1;
if (next > end)
return NULL;
}
ch++;
p = next;
}
return &ch->rc_position;
return p;
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : __be32 RKEY ---+
* length : __be32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
{
unsigned long start, end;
int nchunks;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
__be32 *next;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
if (nchunks < 0 ||
nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
(start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
while (*p++ != xdr_zero) {
next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
if (next > end)
return NULL;
p = next;
}
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return &ary->wc_array[nchunks].wc_target.rs_length;
return p;
}
static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
{
unsigned long start, end;
int nchunks;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
if (nchunks < 0 ||
nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
(start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
__be32 *next;
if (*p++ != xdr_zero) {
next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
if (next > end)
return NULL;
p = next;
}
return (__be32 *)&ary->wc_array[nchunks];
return p;
}
/**
......@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
*/
int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
{
struct rpcrdma_msg *rmsgp;
__be32 *va, *vaend;
unsigned int len;
u32 hdr_len;
__be32 *p, *end, *rdma_argp;
unsigned int hdr_len;
/* Verify that there's enough bytes for header + something */
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
dprintk("svcrdma: header too short = %d\n",
rq_arg->len);
return -EINVAL;
}
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
goto out_short;
rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
if (rmsgp->rm_vers != rpcrdma_version) {
dprintk("%s: bad version %u\n", __func__,
be32_to_cpu(rmsgp->rm_vers));
return -EPROTONOSUPPORT;
}
rdma_argp = rq_arg->head[0].iov_base;
if (*(rdma_argp + 1) != rpcrdma_version)
goto out_version;
switch (be32_to_cpu(rmsgp->rm_type)) {
case RDMA_MSG:
case RDMA_NOMSG:
switch (*(rdma_argp + 3)) {
case rdma_msg:
case rdma_nomsg:
break;
case RDMA_DONE:
/* Just drop it */
dprintk("svcrdma: dropping RDMA_DONE message\n");
return 0;
case RDMA_ERROR:
/* Possible if this is a backchannel reply.
* XXX: We should cancel this XID, though.
*/
dprintk("svcrdma: dropping RDMA_ERROR message\n");
return 0;
case RDMA_MSGP:
/* Pull in the extra for the padded case, bump our pointer */
rmsgp->rm_body.rm_padded.rm_align =
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rq_arg->head[0].iov_base = va;
len = (u32)((unsigned long)va - (unsigned long)rmsgp);
rq_arg->head[0].iov_len -= len;
if (len > rq_arg->len)
return -EINVAL;
return len;
default:
dprintk("svcrdma: bad rdma procedure (%u)\n",
be32_to_cpu(rmsgp->rm_type));
return -EINVAL;
}
case rdma_done:
goto out_drop;
/* The chunk list may contain either a read chunk list or a write
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
va = decode_read_list(va, vaend);
if (!va) {
dprintk("svcrdma: failed to decode read list\n");
return -EINVAL;
}
va = decode_write_list(va, vaend);
if (!va) {
dprintk("svcrdma: failed to decode write list\n");
return -EINVAL;
}
va = decode_reply_array(va, vaend);
if (!va) {
dprintk("svcrdma: failed to decode reply chunk\n");
return -EINVAL;
case rdma_error:
goto out_drop;
default:
goto out_proc;
}
rq_arg->head[0].iov_base = va;
hdr_len = (unsigned long)va - (unsigned long)rmsgp;
end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
p = xdr_check_read_list(rdma_argp + 4, end);
if (!p)
goto out_inval;
p = xdr_check_write_list(p, end);
if (!p)
goto out_inval;
p = xdr_check_reply_chunk(p, end);
if (!p)
goto out_inval;
if (p > end)
goto out_inval;
rq_arg->head[0].iov_base = p;
hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
rq_arg->head[0].iov_len -= hdr_len;
return hdr_len;
out_short:
dprintk("svcrdma: header too short = %d\n", rq_arg->len);
return -EINVAL;
out_version:
dprintk("svcrdma: bad xprt version: %u\n",
be32_to_cpup(rdma_argp + 1));
return -EPROTONOSUPPORT;
out_drop:
dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
return 0;
out_proc:
dprintk("svcrdma: bad rdma procedure (%u)\n",
be32_to_cpup(rdma_argp + 3));
return -EINVAL;
out_inval:
dprintk("svcrdma: failed to parse transport header\n");
return -EINVAL;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
......@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
*va++ = rmsgp->rm_xid;
*va++ = rmsgp->rm_vers;
*va++ = cpu_to_be32(xprt->sc_max_requests);
*va++ = xprt->sc_fc_credits;
*va++ = rdma_error;
*va++ = cpu_to_be32(err);
if (err == ERR_VERS) {
......@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
return (int)((unsigned long)va - (unsigned long)startp);
}
int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
/**
* svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
* @rdma_resp: buffer containing Reply transport header
*
* Returns length of transport header, in bytes.
*/
unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
{
struct rpcrdma_write_array *wr_ary;
unsigned int nsegs;
__be32 *p;
/* There is no read-list in a reply */
p = rdma_resp;
/* skip write list */
wr_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
return (unsigned long) wr_ary - (unsigned long) rmsgp;
/* RPC-over-RDMA V1 replies never have a Read list. */
p += rpcrdma_fixed_maxsz + 1;
/* Skip Write list. */
while (*p++ != xdr_zero) {
nsegs = be32_to_cpup(p++);
p += nsegs * rpcrdma_segment_maxsz;
}
/* Skip Reply chunk. */
if (*p++ != xdr_zero) {
nsegs = be32_to_cpup(p++);
p += nsegs * rpcrdma_segment_maxsz;
}
return (unsigned long)p - (unsigned long)rdma_resp;
}
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
......@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
seg->rs_offset = rs_offset;
seg->rs_length = cpu_to_be32(write_len);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = rdma_argp->rm_xid;
rdma_resp->rm_vers = rdma_argp->rm_vers;
rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
rdma_resp->rm_type = cpu_to_be32(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
}
......@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dprintk("svcrdma: rqstp=%p\n", rqstp);
spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
spin_lock(&rdma_xprt->sc_rq_dto_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
struct svc_rdma_op_ctxt, list);
list_del(&ctxt->list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
rdma_read_complete(rqstp, ctxt);
goto complete;
} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
struct svc_rdma_op_ctxt, list);
list_del(&ctxt->list);
} else {
atomic_inc(&rdma_stat_rq_starve);
clear_bit(XPT_DATA, &xprt->xpt_flags);
ctxt = NULL;
}
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
if (!ctxt) {
/* This is the EAGAIN path. The svc_recv routine will
* return -EAGAIN, the nfsd thread will go to call into
......
......@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].length =
svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
ctxt->sge[0].length, DMA_TO_DEVICE);
......@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
struct rpcrdma_msg *rdma_argp;
struct rpcrdma_msg *rdma_resp;
struct rpcrdma_write_array *wr_ary, *rp_ary;
enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
struct page *res_page;
struct svc_rdma_req_map *vec;
u32 inv_rkey;
__be32 *p;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
......@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!res_page)
goto err0;
rdma_resp = page_address(res_page);
if (rp_ary)
reply_type = RDMA_NOMSG;
else
reply_type = RDMA_MSG;
svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
rdma_resp, reply_type);
p = &rdma_resp->rm_xid;
*p++ = rdma_argp->rm_xid;
*p++ = rdma_argp->rm_vers;
*p++ = rdma->sc_fc_credits;
*p++ = rp_ary ? rdma_nomsg : rdma_msg;
/* Start with empty chunks */
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
/* Send any write-chunk data and build resp write-list */
if (wr_ary) {
......
......@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
ctxt = kmalloc(sizeof(*ctxt), flags);
if (ctxt) {
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->free);
INIT_LIST_HEAD(&ctxt->dto_q);
INIT_LIST_HEAD(&ctxt->list);
}
return ctxt;
}
......@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
dprintk("svcrdma: No memory for RDMA ctxt\n");
return false;
}
list_add(&ctxt->free, &xprt->sc_ctxts);
list_add(&ctxt->list, &xprt->sc_ctxts);
}
return true;
}
......@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
spin_lock_bh(&xprt->sc_ctxt_lock);
spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used++;
if (list_empty(&xprt->sc_ctxts))
goto out_empty;
ctxt = list_first_entry(&xprt->sc_ctxts,
struct svc_rdma_op_ctxt, free);
list_del_init(&ctxt->free);
spin_unlock_bh(&xprt->sc_ctxt_lock);
struct svc_rdma_op_ctxt, list);
list_del(&ctxt->list);
spin_unlock(&xprt->sc_ctxt_lock);
out:
ctxt->count = 0;
......@@ -209,15 +208,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
/* Either pre-allocation missed the mark, or send
* queue accounting is broken.
*/
spin_unlock_bh(&xprt->sc_ctxt_lock);
spin_unlock(&xprt->sc_ctxt_lock);
ctxt = alloc_ctxt(xprt, GFP_NOIO);
if (ctxt)
goto out;
spin_lock_bh(&xprt->sc_ctxt_lock);
spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--;
spin_unlock_bh(&xprt->sc_ctxt_lock);
spin_unlock(&xprt->sc_ctxt_lock);
WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
return NULL;
}
......@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
spin_lock_bh(&xprt->sc_ctxt_lock);
spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--;
list_add(&ctxt->free, &xprt->sc_ctxts);
spin_unlock_bh(&xprt->sc_ctxt_lock);
list_add(&ctxt->list, &xprt->sc_ctxts);
spin_unlock(&xprt->sc_ctxt_lock);
}
static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
......@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_first_entry(&xprt->sc_ctxts,
struct svc_rdma_op_ctxt, free);
list_del(&ctxt->free);
struct svc_rdma_op_ctxt, list);
list_del(&ctxt->list);
kfree(ctxt);
}
}
......@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
/* All wc fields are now known to be valid */
ctxt->byte_len = wc->byte_len;
spin_lock(&xprt->sc_rq_dto_lock);
list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
spin_unlock(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
......@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
read_hdr = ctxt->read_hdr;
spin_lock(&xprt->sc_rq_dto_lock);
list_add_tail(&read_hdr->dto_q,
list_add_tail(&read_hdr->list,
&xprt->sc_read_complete_q);
spin_unlock(&xprt->sc_rq_dto_lock);
......@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return NULL;
svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
......@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_ctxt_lock);
spin_lock_init(&cma_xprt->sc_map_lock);
/*
* Note that this implies that the underlying transport support
* has some form of congestion control (see RFC 7530 section 3.1
* paragraph 2). For now, we assume that all supported RDMA
* transports are suitable here.
*/
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
......@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
{
struct svc_rdma_fastreg_mr *frmr = NULL;
spin_lock_bh(&rdma->sc_frmr_q_lock);
spin_lock(&rdma->sc_frmr_q_lock);
if (!list_empty(&rdma->sc_frmr_q)) {
frmr = list_entry(rdma->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
frmr->sg_nents = 0;
}
spin_unlock_bh(&rdma->sc_frmr_q_lock);
spin_unlock(&rdma->sc_frmr_q_lock);
if (frmr)
return frmr;
......@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) {
ib_dma_unmap_sg(rdma->sc_cm_id->device,
frmr->sg, frmr->sg_nents, frmr->direction);
spin_lock_bh(&rdma->sc_frmr_q_lock);
spin_lock(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
spin_unlock_bh(&rdma->sc_frmr_q_lock);
spin_unlock(&rdma->sc_frmr_q_lock);
}
}
......@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_requests);
newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_bc_requests);
newxprt->sc_rq_depth = newxprt->sc_max_requests +
......@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
}
newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
0, IB_POLL_SOFTIRQ);
0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_sq_cq)) {
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
0, IB_POLL_SOFTIRQ);
0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_rq_cq)) {
dprintk("svcrdma: error creating RQ CQ for connect request\n");
goto errout;
......@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work)
*/
while (!list_empty(&rdma->sc_read_complete_q)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_read_complete_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
ctxt = list_first_entry(&rdma->sc_read_complete_q,
struct svc_rdma_op_ctxt, list);
list_del(&ctxt->list);
svc_rdma_put_context(ctxt, 1);
}
/* Destroy queued, but not processed recv completions */
while (!list_empty(&rdma->sc_rq_dto_q)) {
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_entry(rdma->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
ctxt = list_first_entry(&rdma->sc_rq_dto_q,
struct svc_rdma_op_ctxt, list);
list_del(&ctxt->list);
svc_rdma_put_context(ctxt, 1);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment