Commit ad9a19d0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.14' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "More RDMA work and some op-structure constification from Chuck Lever,
  and a small cleanup to our xdr encoding"

* tag 'nfsd-4.14' of git://linux-nfs.org/~bfields/linux:
  svcrdma: Estimate Send Queue depth properly
  rdma core: Add rdma_rw_mr_payload()
  svcrdma: Limit RQ depth
  svcrdma: Populate tail iovec when receiving
  nfsd: Incoming xdr_bufs may have content in tail buffer
  svcrdma: Clean up svc_rdma_build_read_chunk()
  sunrpc: Const-ify struct sv_serv_ops
  nfsd: Const-ify NFSv4 encoding and decoding ops arrays
  sunrpc: Const-ify instances of struct svc_xprt_ops
  nfsd4: individual encoders no longer see error cases
  nfsd4: skip encoder in trivial error cases
  nfsd4: define ->op_release for compound ops
  nfsd4: opdesc will be useful outside nfs4proc.c
  nfsd4: move some nfsd4 op definitions to xdr4.h
parents 66ba772e 26fb2254
......@@ -643,6 +643,30 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
/**
* rdma_rw_mr_factor - return number of MRs required for a payload
* @device: device handling the connection
* @port_num: port num to which the connection is bound
* @maxpages: maximum payload pages per rdma_rw_ctx
*
* Returns the number of MRs the device requires to move @maxpayload
* bytes. The returned value is used during transport creation to
* compute max_rdma_ctxts and the size of the transport's Send and
* Send Completion Queues.
*/
unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
unsigned int maxpages)
{
unsigned int mr_pages;
if (rdma_rw_can_use_mr(device, port_num))
mr_pages = rdma_rw_fr_page_list_len(device);
else
mr_pages = device->attrs.max_sge_rd;
return DIV_ROUND_UP(maxpages, mr_pages);
}
EXPORT_SYMBOL(rdma_rw_mr_factor);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
{
u32 factor;
......
......@@ -396,7 +396,7 @@ static int lockd_start_svc(struct svc_serv *serv)
return error;
}
static struct svc_serv_ops lockd_sv_ops = {
static const struct svc_serv_ops lockd_sv_ops = {
.svo_shutdown = svc_rpcb_cleanup,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
};
......
......@@ -226,26 +226,26 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
return ret;
}
static struct svc_serv_ops nfs40_cb_sv_ops = {
static const struct svc_serv_ops nfs40_cb_sv_ops = {
.svo_function = nfs4_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
.svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
#if defined(CONFIG_NFS_V4_1)
static struct svc_serv_ops nfs41_cb_sv_ops = {
static const struct svc_serv_ops nfs41_cb_sv_ops = {
.svo_function = nfs41_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
.svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = &nfs41_cb_sv_ops,
};
#else
static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = NULL,
};
......@@ -254,8 +254,8 @@ static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
static struct svc_serv *nfs_callback_create_svc(int minorversion)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
const struct svc_serv_ops *sv_ops;
struct svc_serv *serv;
struct svc_serv_ops *sv_ops;
/*
* Check whether we're already up and running.
......
......@@ -784,6 +784,14 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
if (u->read.rd_filp)
fput(u->read.rd_filp);
}
static __be32
nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
......@@ -912,6 +920,13 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
return nfs_ok;
}
static void
nfsd4_secinfo_release(union nfsd4_op_u *u)
{
if (u->secinfo.si_exp)
exp_put(u->secinfo.si_exp);
}
static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
......@@ -1335,6 +1350,12 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
return nfserr;
}
static void
nfsd4_getdeviceinfo_release(union nfsd4_op_u *u)
{
kfree(u->getdeviceinfo.gd_device);
}
static __be32
nfsd4_layoutget(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
......@@ -1415,6 +1436,12 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
return nfserr;
}
static void
nfsd4_layoutget_release(union nfsd4_op_u *u)
{
kfree(u->layoutget.lg_content);
}
static __be32
nfsd4_layoutcommit(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
......@@ -1541,49 +1568,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
nfsdstats.nfs4_opcount[opnum]++;
}
enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
/* For rfc 5661 section 2.6.3.1.1: */
OP_HANDLES_WRONGSEC = 1 << 3,
OP_IS_PUTFH_LIKE = 1 << 4,
/*
* These are the ops whose result size we estimate before
* encoding, to avoid performing an op then not being able to
* respond or cache a response. This includes writes and setattrs
* as well as the operations usually called "nonidempotent":
*/
OP_MODIFIES_SOMETHING = 1 << 5,
/*
* Cache compounds containing these ops in the xid-based drc:
* We use the DRC for compounds containing non-idempotent
* operations, *except* those that are 4.1-specific (since
* sessions provide their own EOS), and except for stateful
* operations other than setclientid and setclientid_confirm
* (since sequence numbers provide EOS for open, lock, etc in
* the v4.0 case).
*/
OP_CACHEME = 1 << 6,
/*
* These are ops which clear current state id.
*/
OP_CLEAR_STATEID = 1 << 7,
};
struct nfsd4_operation {
__be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
union nfsd4_op_u *);
u32 op_flags;
char *op_name;
/* Try to get response size before operation */
u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
void (*op_get_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
void (*op_set_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
};
static const struct nfsd4_operation nfsd4_ops[];
static const char *nfsd4_op_name(unsigned opnum);
......@@ -1621,7 +1605,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
return nfs_ok;
}
static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
{
return &nfsd4_ops[op->opnum];
}
......@@ -1694,7 +1678,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
struct nfsd4_compoundargs *args = rqstp->rq_argp;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_op *op;
const struct nfsd4_operation *opdesc;
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
......@@ -1747,15 +1730,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op;
}
opdesc = OPDESC(op);
if (!current_fh->fh_dentry) {
if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
op->status = nfserr_nofilehandle;
goto encode_op;
}
} else if (current_fh->fh_export->ex_fslocs.migrated &&
!(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
!(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
op->status = nfserr_moved;
goto encode_op;
}
......@@ -1763,12 +1744,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
fh_clear_wcc(current_fh);
/* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
/*
* Don't execute this op if we couldn't encode a
* succesful reply:
*/
u32 plen = opdesc->op_rsize_bop(rqstp, op);
u32 plen = op->opdesc->op_rsize_bop(rqstp, op);
/*
* Plus if there's another operation, make sure
* we'll have space to at least encode an error:
......@@ -1781,9 +1762,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->status)
goto encode_op;
if (opdesc->op_get_currentstateid)
opdesc->op_get_currentstateid(cstate, &op->u);
op->status = opdesc->op_func(rqstp, cstate, &op->u);
if (op->opdesc->op_get_currentstateid)
op->opdesc->op_get_currentstateid(cstate, &op->u);
op->status = op->opdesc->op_func(rqstp, cstate, &op->u);
/* Only from SEQUENCE */
if (cstate->status == nfserr_replay_cache) {
......@@ -1792,10 +1773,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto out;
}
if (!op->status) {
if (opdesc->op_set_currentstateid)
opdesc->op_set_currentstateid(cstate, &op->u);
if (op->opdesc->op_set_currentstateid)
op->opdesc->op_set_currentstateid(cstate, &op->u);
if (opdesc->op_flags & OP_CLEAR_STATEID)
if (op->opdesc->op_flags & OP_CLEAR_STATEID)
clear_current_stateid(cstate);
if (need_wrongsec_check(rqstp))
......@@ -2160,13 +2141,15 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LOCK] = {
.op_func = nfsd4_lock,
.op_flags = OP_MODIFIES_SOMETHING,
.op_flags = OP_MODIFIES_SOMETHING |
OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_LOCK",
.op_rsize_bop = nfsd4_lock_rsize,
.op_set_currentstateid = nfsd4_set_lockstateid,
},
[OP_LOCKT] = {
.op_func = nfsd4_lockt,
.op_flags = OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_LOCKT",
.op_rsize_bop = nfsd4_lock_rsize,
},
......@@ -2238,6 +2221,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_READ] = {
.op_func = nfsd4_read,
.op_release = nfsd4_read_release,
.op_name = "OP_READ",
.op_rsize_bop = nfsd4_read_rsize,
.op_get_currentstateid = nfsd4_get_readstateid,
......@@ -2287,6 +2271,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO] = {
.op_func = nfsd4_secinfo,
.op_release = nfsd4_secinfo_release,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
.op_rsize_bop = nfsd4_secinfo_rsize,
......@@ -2294,14 +2279,16 @@ static const struct nfsd4_operation nfsd4_ops[] = {
[OP_SETATTR] = {
.op_func = nfsd4_setattr,
.op_name = "OP_SETATTR",
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME
| OP_NONTRIVIAL_ERROR_ENCODE,
.op_rsize_bop = nfsd4_setattr_rsize,
.op_get_currentstateid = nfsd4_get_setattrstateid,
},
[OP_SETCLIENTID] = {
.op_func = nfsd4_setclientid,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_MODIFIES_SOMETHING | OP_CACHEME,
| OP_MODIFIES_SOMETHING | OP_CACHEME
| OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_SETCLIENTID",
.op_rsize_bop = nfsd4_setclientid_rsize,
},
......@@ -2388,6 +2375,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO_NO_NAME] = {
.op_func = nfsd4_secinfo_no_name,
.op_release = nfsd4_secinfo_release,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
.op_rsize_bop = nfsd4_secinfo_rsize,
......@@ -2408,12 +2396,14 @@ static const struct nfsd4_operation nfsd4_ops[] = {
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = {
.op_func = nfsd4_getdeviceinfo,
.op_release = nfsd4_getdeviceinfo_release,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
},
[OP_LAYOUTGET] = {
.op_func = nfsd4_layoutget,
.op_release = nfsd4_layoutget_release,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_LAYOUTGET",
.op_rsize_bop = nfsd4_layoutget_rsize,
......
This diff is collapsed.
......@@ -475,7 +475,7 @@ static int nfsd_get_default_max_blksize(void)
return ret;
}
static struct svc_serv_ops nfsd_thread_sv_ops = {
static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_shutdown = nfsd_last_thread,
.svo_function = nfsd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
......
......@@ -538,6 +538,7 @@ struct nfsd4_seek {
struct nfsd4_op {
int opnum;
const struct nfsd4_operation * opdesc;
__be32 status;
union nfsd4_op_u {
struct nfsd4_access access;
......@@ -614,6 +615,7 @@ struct nfsd4_compoundargs {
__be32 * end;
struct page ** pagelist;
int pagelen;
bool tail;
__be32 tmp[8];
__be32 * tmpp;
struct svcxdr_tmpbuf *to_free;
......@@ -661,6 +663,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
return argp->opcnt == resp->opcnt;
}
const struct nfsd4_operation *OPDESC(struct nfsd4_op *op);
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op);
void warn_on_nonidempotent_op(struct nfsd4_op *op);
......@@ -748,6 +751,53 @@ extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *);
extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
/* For rfc 5661 section 2.6.3.1.1: */
OP_HANDLES_WRONGSEC = 1 << 3,
OP_IS_PUTFH_LIKE = 1 << 4,
/*
* These are the ops whose result size we estimate before
* encoding, to avoid performing an op then not being able to
* respond or cache a response. This includes writes and setattrs
* as well as the operations usually called "nonidempotent":
*/
OP_MODIFIES_SOMETHING = 1 << 5,
/*
* Cache compounds containing these ops in the xid-based drc:
* We use the DRC for compounds containing non-idempotent
* operations, *except* those that are 4.1-specific (since
* sessions provide their own EOS), and except for stateful
* operations other than setclientid and setclientid_confirm
* (since sequence numbers provide EOS for open, lock, etc in
* the v4.0 case).
*/
OP_CACHEME = 1 << 6,
/*
* These are ops which clear current state id.
*/
OP_CLEAR_STATEID = 1 << 7,
/* Most ops return only an error on failure; some may do more: */
OP_NONTRIVIAL_ERROR_ENCODE = 1 << 8,
};
struct nfsd4_operation {
__be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
union nfsd4_op_u *);
void (*op_release)(union nfsd4_op_u *);
u32 op_flags;
char *op_name;
/* Try to get response size before operation */
u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
void (*op_get_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
void (*op_set_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
};
#endif
/*
......
......@@ -99,7 +99,7 @@ struct svc_serv {
unsigned int sv_nrpools; /* number of thread pools */
struct svc_pool * sv_pools; /* array of thread pools */
struct svc_serv_ops *sv_ops; /* server operations */
const struct svc_serv_ops *sv_ops; /* server operations */
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head sv_cb_list; /* queue for callback requests
* that arrive over the same
......@@ -465,7 +465,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
int svc_bind(struct svc_serv *serv, struct net *net);
struct svc_serv *svc_create(struct svc_program *, unsigned int,
struct svc_serv_ops *);
const struct svc_serv_ops *);
struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
......@@ -475,7 +475,7 @@ void svc_exit_thread(struct svc_rqst *);
unsigned int svc_pool_map_get(void);
void svc_pool_map_put(void);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
struct svc_serv_ops *);
const struct svc_serv_ops *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
......
......@@ -31,7 +31,7 @@ struct svc_xprt_ops {
struct svc_xprt_class {
const char *xcl_name;
struct module *xcl_owner;
struct svc_xprt_ops *xcl_ops;
const struct svc_xprt_ops *xcl_ops;
struct list_head xcl_list;
u32 xcl_max_payload;
int xcl_ident;
......@@ -49,7 +49,7 @@ struct svc_xpt_user {
struct svc_xprt {
struct svc_xprt_class *xpt_class;
struct svc_xprt_ops *xpt_ops;
const struct svc_xprt_ops *xpt_ops;
struct kref xpt_ref;
struct list_head xpt_list;
struct list_head xpt_ready;
......
......@@ -81,6 +81,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
unsigned int maxpages);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
......
......@@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv_ops *ops)
const struct svc_serv_ops *ops)
{
struct svc_serv *serv;
unsigned int vers;
......@@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
struct svc_serv_ops *ops)
const struct svc_serv_ops *ops)
{
return __svc_create(prog, bufsize, /*npools*/1, ops);
}
......@@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
struct svc_serv_ops *ops)
const struct svc_serv_ops *ops)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
......
......@@ -693,7 +693,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
}
static struct svc_xprt_ops svc_udp_ops = {
static const struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
......@@ -1241,7 +1241,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
{
}
static struct svc_xprt_ops svc_tcp_bc_ops = {
static const struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_create = svc_bc_tcp_create,
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
......@@ -1275,7 +1275,7 @@ static void svc_cleanup_bc_xprt_sock(void)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static struct svc_xprt_ops svc_tcp_ops = {
static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
......
......@@ -660,19 +660,21 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
return -EIO;
}
/* Walk the segments in the Read chunk starting at @p and construct
* RDMA Read operations to pull the chunk to the server.
*/
static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
int ret;
ret = -EINVAL;
info->ri_chunklen = 0;
while (*p++ != xdr_zero) {
while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
u32 rs_handle, rs_length;
u64 rs_offset;
if (be32_to_cpup(p++) != info->ri_position)
break;
rs_handle = be32_to_cpup(p++);
rs_length = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &rs_offset);
......@@ -689,78 +691,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
return ret;
}
/* If there is inline content following the Read chunk, append it to
* the page list immediately following the data payload. This has to
* be done after the reader function has determined how many pages
* were consumed for RDMA Read.
*
* On entry, ri_pageno and ri_pageoff point directly to the end of the
* page list. On exit, both have been updated to the new "next byte".
*
* Assumptions:
* - Inline content fits entirely in rq_pages[0]
* - Trailing content is only a handful of bytes
*/
static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info)
{
struct svc_rdma_op_ctxt *head = info->ri_readctxt;
unsigned int tail_length, remaining;
u8 *srcp, *destp;
/* Assert that all inline content fits in page 0. This is an
* implementation limit, not a protocol limit.
*/
if (head->arg.head[0].iov_len > PAGE_SIZE) {
pr_warn_once("svcrdma: too much trailing inline content\n");
return -EINVAL;
}
srcp = head->arg.head[0].iov_base;
srcp += info->ri_position;
tail_length = head->arg.head[0].iov_len - info->ri_position;
remaining = tail_length;
/* If there is room on the last page in the page list, try to
* fit the trailing content there.
*/
if (info->ri_pageoff > 0) {
unsigned int len;
len = min_t(unsigned int, remaining,
PAGE_SIZE - info->ri_pageoff);
destp = page_address(rqstp->rq_pages[info->ri_pageno]);
destp += info->ri_pageoff;
memcpy(destp, srcp, len);
srcp += len;
destp += len;
info->ri_pageoff += len;
remaining -= len;
if (info->ri_pageoff == PAGE_SIZE) {
info->ri_pageno++;
info->ri_pageoff = 0;
}
}
/* Otherwise, a fresh page is needed. */
if (remaining) {
head->arg.pages[info->ri_pageno] =
rqstp->rq_pages[info->ri_pageno];
head->count++;
destp = page_address(rqstp->rq_pages[info->ri_pageno]);
memcpy(destp, srcp, remaining);
info->ri_pageoff += remaining;
}
head->arg.page_len += tail_length;
head->arg.len += tail_length;
head->arg.buflen += tail_length;
return 0;
}
/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
* data lands in the page list of head->arg.pages.
*
......@@ -785,34 +715,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
if (ret < 0)
goto out;
/* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
/* Split the Receive buffer between the head and tail
* buffers at Read chunk's position. XDR roundup of the
* chunk is not included in either the pagelist or in
* the tail.
*/
if (info->ri_chunklen & 3) {
u32 padlen = 4 - (info->ri_chunklen & 3);
info->ri_chunklen += padlen;
head->arg.tail[0].iov_base =
head->arg.head[0].iov_base + info->ri_position;
head->arg.tail[0].iov_len =
head->arg.head[0].iov_len - info->ri_position;
head->arg.head[0].iov_len = info->ri_position;
/* NB: data payload always starts on XDR alignment,
* thus the pad can never contain a page boundary.
*/
info->ri_pageoff += padlen;
if (info->ri_pageoff == PAGE_SIZE) {
info->ri_pageno++;
info->ri_pageoff = 0;
}
}
/* Read chunk may need XDR roundup (see RFC 5666, s. 3.7).
*
* NFSv2/3 write decoders need the length of the tail to
* contain the size of the roundup padding.
*/
head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3);
head->arg.page_len = info->ri_chunklen;
head->arg.len += info->ri_chunklen;
head->arg.buflen += info->ri_chunklen;
if (info->ri_position < head->arg.head[0].iov_len) {
ret = svc_rdma_copy_tail(rqstp, info);
if (ret < 0)
goto out;
}
head->arg.head[0].iov_len = info->ri_position;
out:
return ret;
}
......
......@@ -51,6 +51,7 @@
#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/rw.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/export.h>
#include "xprt_rdma.h"
......@@ -70,7 +71,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static struct svc_xprt_ops svc_rdma_ops = {
static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_create = svc_rdma_create,
.xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto,
......@@ -98,7 +99,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
static void svc_rdma_bc_detach(struct svc_xprt *);
static void svc_rdma_bc_free(struct svc_xprt *);
static struct svc_xprt_ops svc_rdma_bc_ops = {
static const struct svc_xprt_ops svc_rdma_bc_ops = {
.xpo_create = svc_rdma_bc_create,
.xpo_detach = svc_rdma_bc_detach,
.xpo_free = svc_rdma_bc_free,
......@@ -167,8 +168,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
{
unsigned int i;
/* Each RPC/RDMA credit can consume a number of send
* and receive WQEs. One ctxt is allocated for each.
/* Each RPC/RDMA credit can consume one Receive and
* one Send WQE at the same time.
*/
i = xprt->sc_sq_depth + xprt->sc_rq_depth;
......@@ -713,7 +714,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
struct sockaddr *sap;
unsigned int i;
unsigned int i, ctxts;
int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
......@@ -742,14 +743,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_requests);
newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_bc_requests);
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
newxprt->sc_rq_depth = newxprt->sc_max_requests +
newxprt->sc_max_bc_requests;
newxprt->sc_sq_depth = newxprt->sc_rq_depth;
if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
pr_warn("svcrdma: reducing receive depth to %d\n",
dev->attrs.max_qp_wr);
newxprt->sc_rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_max_requests = newxprt->sc_rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
ctxts *= newxprt->sc_max_requests;
newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
pr_warn("svcrdma: reducing send depth to %d\n",
dev->attrs.max_qp_wr);
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
}
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
if (!svc_rdma_prealloc_ctxts(newxprt))
......@@ -784,8 +797,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.port_num = newxprt->sc_port_num;
qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
qp_attr.cap.max_rdma_ctxs = ctxts;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
......@@ -853,6 +866,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", newxprt->sc_ord);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment