Commit f9773b22 authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-for-4.14-1' of git://git.linux-nfs.org/projects/anna/linux-nfs into linux-next

NFS-over-RDMA client updates for Linux 4.14

Bugfixes and cleanups:
- Constify rpc_xprt_ops
- Harden RPC call encoding and decoding
- Clean up rpc call decoding to use xdr_streams
- Remove unused variables from various structures
- Refactor code to remove imul instructions
- Rearrange rx_stats structure for better cacheline sharing
parents 7af7a596 67af6f65
......@@ -239,6 +239,19 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
* @xdr: pointer to struct xdr_stream
*
* Return value:
* Number of bytes remaining in @xdr before xdr->end
*/
static inline size_t
xdr_stream_remaining(const struct xdr_stream *xdr)
{
return xdr->nwords << 2;
}
ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
size_t maxlen, gfp_t gfp_flags);
/**
......
......@@ -174,7 +174,7 @@ enum xprt_transports {
struct rpc_xprt {
struct kref kref; /* Reference count */
struct rpc_xprt_ops * ops; /* transport methods */
const struct rpc_xprt_ops *ops; /* transport methods */
const struct rpc_timeout *timeout; /* timeout parms */
struct sockaddr_storage addr; /* server address */
......
......@@ -49,6 +49,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
if (IS_ERR(rb))
goto out_fail;
req->rl_rdmabuf = rb;
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
......@@ -202,20 +203,24 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
*/
int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_msg *headerp;
headerp = rdmab_to_msg(req->rl_rdmabuf);
headerp->rm_xid = rqst->rq_xid;
headerp->rm_vers = rpcrdma_version;
headerp->rm_credit =
cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
headerp->rm_type = rdma_msg;
headerp->rm_body.rm_chunks[0] = xdr_zero;
headerp->rm_body.rm_chunks[1] = xdr_zero;
headerp->rm_body.rm_chunks[2] = xdr_zero;
__be32 *p;
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
p = xdr_reserve_space(&req->rl_stream, 28);
if (unlikely(!p))
return -EIO;
*p++ = rqst->rq_xid;
*p++ = rpcrdma_version;
*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
*p++ = rdma_msg;
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch))
......@@ -271,9 +276,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
* @xprt: transport receiving the call
* @rep: receive buffer containing the call
*
* Called in the RPC reply handler, which runs in a tasklet.
* Be quick about it.
*
* Operational assumptions:
* o Backchannel credits are ignored, just as the NFS server
* forechannel currently does
......@@ -284,7 +286,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_rep *rep)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_msg *headerp;
struct svc_serv *bc_serv;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
......@@ -292,24 +293,15 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
size_t size;
__be32 *p;
headerp = rdmab_to_msg(rep->rr_rdmabuf);
p = xdr_inline_decode(&rep->rr_stream, 0);
size = xdr_stream_remaining(&rep->rr_stream);
#ifdef RPCRDMA_BACKCHANNEL_DEBUG
pr_info("RPC: %s: callback XID %08x, length=%u\n",
__func__, be32_to_cpu(headerp->rm_xid), rep->rr_len);
pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp);
__func__, be32_to_cpup(p), size);
pr_info("RPC: %s: %*ph\n", __func__, size, p);
#endif
/* Sanity check:
* Need at least enough bytes for RPC/RDMA header, as code
* here references the header fields by array offset. Also,
* backward calls are always inline, so ensure there
* are some bytes beyond the RPC/RDMA header.
*/
if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24)
goto out_short;
p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN);
size = rep->rr_len - RPCRDMA_HDRLEN_MIN;
/* Grab a free bc rqst */
spin_lock(&xprt->bc_pa_lock);
if (list_empty(&xprt->bc_pa_list)) {
......@@ -325,7 +317,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_bytes_sent = 0;
rqst->rq_xid = headerp->rm_xid;
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
......@@ -337,9 +329,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
buf->len = size;
/* The receive buffer has to be hooked to the rpcrdma_req
* so that it can be reposted after the server is done
* parsing it but just before sending the backward
* direction reply.
* so that it is not released while the req is pointing
* to its buffer, and so that it can be reposted after
* the Upper Layer is done decoding it.
*/
req = rpcr_to_rdmar(rqst);
dprintk("RPC: %s: attaching rep %p to req %p\n",
......@@ -367,13 +359,4 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* when the connection is re-established.
*/
return;
out_short:
pr_warn("RPC/RDMA short backward direction call\n");
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
xprt_disconnect_done(xprt);
else
pr_warn("RPC: %s: reposting rep %p\n",
__func__, rep);
}
......@@ -177,7 +177,7 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
static int
static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out)
{
......@@ -188,7 +188,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
return -ENOBUFS;
return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
......@@ -232,13 +232,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mw->mw_offset = dma_pages[0] + pageoff;
*out = mw;
return mw->mw_nents;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
rpcrdma_put_mw(r_xprt, mw);
return -EIO;
return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
......@@ -247,7 +247,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
return -EIO;
return ERR_PTR(-EIO);
}
/* Invalidate all memory regions that were registered for "req".
......
......@@ -344,7 +344,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
/* Post a REG_MR Work Request to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
static int
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out)
{
......@@ -364,7 +364,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_defer_mr_recovery(mw);
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
return -ENOBUFS;
return ERR_PTR(-ENOBUFS);
} while (mw->frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->frmr;
frmr->fr_state = FRMR_IS_VALID;
......@@ -429,25 +429,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mw->mw_offset = mr->iova;
*out = mw;
return mw->mw_nents;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
frmr->fr_state = FRMR_IS_INVALID;
rpcrdma_put_mw(r_xprt, mw);
return -EIO;
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frmr->fr_mr, n, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
return -EIO;
return ERR_PTR(-EIO);
out_senderr:
pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw);
return -ENOTCONN;
return ERR_PTR(-ENOTCONN);
}
/* Invalidate all memory regions that were registered for "req".
......
This diff is collapsed.
......@@ -269,7 +269,7 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
static struct rpc_xprt_ops xprt_rdma_bc_procs = {
static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot,
......
......@@ -149,7 +149,7 @@ static struct ctl_table sunrpc_table[] = {
#endif
static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
static const struct rpc_xprt_ops xprt_rdma_procs;
static void
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
......@@ -559,6 +559,7 @@ rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
r_xprt->rx_stats.hardway_register_count += size;
req->rl_rdmabuf = rb;
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
return true;
}
......@@ -730,7 +731,7 @@ xprt_rdma_send_request(struct rpc_task *task)
if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
rc = rpcrdma_marshal_req(rqst);
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
......@@ -811,7 +812,7 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
* Plumbing for rpc transport switch and kernel module
*/
static struct rpc_xprt_ops xprt_rdma_procs = {
static const struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
......
......@@ -139,14 +139,11 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
static void
rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
{
struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
__be32 *p = rep->rr_rdmabuf->rg_base;
u32 credits;
if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
return;
credits = be32_to_cpu(rmsgp->rm_credit);
credits = be32_to_cpup(p + 2);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > buffer->rb_max_requests)
......@@ -173,21 +170,19 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
goto out_fail;
/* status == SUCCESS means all fields in wc are trustworthy */
if (wc->opcode != IB_WC_RECV)
return;
dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
__func__, rep, wc->byte_len);
rep->rr_len = wc->byte_len;
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
rdmab_addr(rep->rr_rdmabuf),
rep->rr_len, DMA_FROM_DEVICE);
wc->byte_len, DMA_FROM_DEVICE);
rpcrdma_update_granted_credits(rep);
if (wc->byte_len >= RPCRDMA_HDRLEN_ERR)
rpcrdma_update_granted_credits(rep);
out_schedule:
queue_work(rpcrdma_receive_wq, &rep->rr_work);
......@@ -198,7 +193,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
rep->rr_len = RPCRDMA_BAD_LEN;
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0);
goto out_schedule;
}
......@@ -974,6 +969,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(rep->rr_rdmabuf);
goto out_free;
}
xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
......
......@@ -218,18 +218,17 @@ enum {
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
unsigned int rr_len;
int rr_wc_flags;
u32 rr_inv_rkey;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
struct xdr_buf rr_hdrbuf;
struct xdr_stream rr_stream;
struct list_head rr_list;
struct ib_recv_wr rr_recv_wr;
struct rpcrdma_regbuf *rr_rdmabuf;
};
#define RPCRDMA_BAD_LEN (~0U)
/*
* struct rpcrdma_mw - external memory region metadata
*
......@@ -346,6 +345,8 @@ struct rpcrdma_req {
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf;
struct ib_send_wr rl_send_wr;
struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
......@@ -440,24 +441,27 @@ struct rpcrdma_create_data_internal {
* Statistics for RPCRDMA
*/
struct rpcrdma_stats {
/* accessed when sending a call */
unsigned long read_chunk_count;
unsigned long write_chunk_count;
unsigned long reply_chunk_count;
unsigned long long total_rdma_request;
unsigned long long total_rdma_reply;
/* rarely accessed error counters */
unsigned long long pullup_copy_count;
unsigned long long fixup_copy_count;
unsigned long hardway_register_count;
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
unsigned long nomsg_call_count;
unsigned long bcall_count;
unsigned long mrs_recovered;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
/* accessed when receiving a reply */
unsigned long long total_rdma_reply;
unsigned long long fixup_copy_count;
unsigned long local_inv_needed;
unsigned long nomsg_call_count;
unsigned long bcall_count;
};
/*
......@@ -465,7 +469,8 @@ struct rpcrdma_stats {
*/
struct rpcrdma_xprt;
struct rpcrdma_memreg_ops {
int (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
......@@ -638,10 +643,16 @@ enum rpcrdma_chunktype {
bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
u32, struct xdr_buf *, enum rpcrdma_chunktype);
void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
int rpcrdma_marshal_req(struct rpc_rqst *);
int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_reply_handler(struct work_struct *work);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{
xdr->head[0].iov_len = len;
xdr->len = len;
}
/* RPC/RDMA module init - xprtrdma/transport.c
*/
extern unsigned int xprt_rdma_max_inline_read;
......
......@@ -2728,7 +2728,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
static struct rpc_xprt_ops xs_local_ops = {
static const struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_alloc_slot,
......@@ -2746,7 +2746,7 @@ static struct rpc_xprt_ops xs_local_ops = {
.disable_swap = xs_disable_swap,
};
static struct rpc_xprt_ops xs_udp_ops = {
static const struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
......@@ -2768,7 +2768,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.inject_disconnect = xs_inject_disconnect,
};
static struct rpc_xprt_ops xs_tcp_ops = {
static const struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_lock_and_alloc_slot,
......@@ -2799,7 +2799,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
* The rpc_xprt_ops for the server backchannel
*/
static struct rpc_xprt_ops bc_tcp_ops = {
static const struct rpc_xprt_ops bc_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xprt_release_xprt,
.alloc_slot = xprt_alloc_slot,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment