Commit 8f39fce8 authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-for-4.16-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

NFS-over-RDMA client updates for Linux 4.16

New features:
- xprtrdma tracepoints

Bugfixes and cleanups:
- Fix memory leak if rpcrdma_buffer_create() fails
- Fix allocating extra rpcrdma_reps for the backchannel
- Remove various unused and redundant variables and lock cycles
- Fix IPv6 support in xprt_rdma_set_port()
- Fix memory leak by calling buf_free for callback replies
- Fix "bytes registered" accounting
- Fix kernel-doc comments
- SUNRPC tracepoint cleanups for consistent information
- Optimizations for __rpc_execute()
parents 0be283f6 21ead9ff
......@@ -64,7 +64,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS,
RPCRDMA_MEMWINDOWS_ASYNC,
RPCRDMA_MTHCAFMR,
RPCRDMA_FRMR,
RPCRDMA_FRWR,
RPCRDMA_ALLPHYSICAL,
RPCRDMA_LAST
};
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2017 Oracle. All rights reserved.
*/
/*
* enum ib_event_type, from include/rdma/ib_verbs.h
*/
#define IB_EVENT_LIST \
ib_event(CQ_ERR) \
ib_event(QP_FATAL) \
ib_event(QP_REQ_ERR) \
ib_event(QP_ACCESS_ERR) \
ib_event(COMM_EST) \
ib_event(SQ_DRAINED) \
ib_event(PATH_MIG) \
ib_event(PATH_MIG_ERR) \
ib_event(DEVICE_FATAL) \
ib_event(PORT_ACTIVE) \
ib_event(PORT_ERR) \
ib_event(LID_CHANGE) \
ib_event(PKEY_CHANGE) \
ib_event(SM_CHANGE) \
ib_event(SRQ_ERR) \
ib_event(SRQ_LIMIT_REACHED) \
ib_event(QP_LAST_WQE_REACHED) \
ib_event(CLIENT_REREGISTER) \
ib_event(GID_CHANGE) \
ib_event_end(WQ_FATAL)
#undef ib_event
#undef ib_event_end
#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
IB_EVENT_LIST
#undef ib_event
#undef ib_event_end
#define ib_event(x) { IB_EVENT_##x, #x },
#define ib_event_end(x) { IB_EVENT_##x, #x }
#define rdma_show_ib_event(x) \
__print_symbolic(x, IB_EVENT_LIST)
/*
* enum ib_wc_status type, from include/rdma/ib_verbs.h
*/
#define IB_WC_STATUS_LIST \
ib_wc_status(SUCCESS) \
ib_wc_status(LOC_LEN_ERR) \
ib_wc_status(LOC_QP_OP_ERR) \
ib_wc_status(LOC_EEC_OP_ERR) \
ib_wc_status(LOC_PROT_ERR) \
ib_wc_status(WR_FLUSH_ERR) \
ib_wc_status(MW_BIND_ERR) \
ib_wc_status(BAD_RESP_ERR) \
ib_wc_status(LOC_ACCESS_ERR) \
ib_wc_status(REM_INV_REQ_ERR) \
ib_wc_status(REM_ACCESS_ERR) \
ib_wc_status(REM_OP_ERR) \
ib_wc_status(RETRY_EXC_ERR) \
ib_wc_status(RNR_RETRY_EXC_ERR) \
ib_wc_status(LOC_RDD_VIOL_ERR) \
ib_wc_status(REM_INV_RD_REQ_ERR) \
ib_wc_status(REM_ABORT_ERR) \
ib_wc_status(INV_EECN_ERR) \
ib_wc_status(INV_EEC_STATE_ERR) \
ib_wc_status(FATAL_ERR) \
ib_wc_status(RESP_TIMEOUT_ERR) \
ib_wc_status_end(GENERAL_ERR)
#undef ib_wc_status
#undef ib_wc_status_end
#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
IB_WC_STATUS_LIST
#undef ib_wc_status
#undef ib_wc_status_end
#define ib_wc_status(x) { IB_WC_##x, #x },
#define ib_wc_status_end(x) { IB_WC_##x, #x }
#define rdma_show_wc_status(x) \
__print_symbolic(x, IB_WC_STATUS_LIST)
/*
* enum rdma_cm_event_type, from include/rdma/rdma_cm.h
*/
#define RDMA_CM_EVENT_LIST \
rdma_cm_event(ADDR_RESOLVED) \
rdma_cm_event(ADDR_ERROR) \
rdma_cm_event(ROUTE_RESOLVED) \
rdma_cm_event(ROUTE_ERROR) \
rdma_cm_event(CONNECT_REQUEST) \
rdma_cm_event(CONNECT_RESPONSE) \
rdma_cm_event(CONNECT_ERROR) \
rdma_cm_event(UNREACHABLE) \
rdma_cm_event(REJECTED) \
rdma_cm_event(ESTABLISHED) \
rdma_cm_event(DISCONNECTED) \
rdma_cm_event(DEVICE_REMOVAL) \
rdma_cm_event(MULTICAST_JOIN) \
rdma_cm_event(MULTICAST_ERROR) \
rdma_cm_event(ADDR_CHANGE) \
rdma_cm_event_end(TIMEWAIT_EXIT)
#undef rdma_cm_event
#undef rdma_cm_event_end
#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
RDMA_CM_EVENT_LIST
#undef rdma_cm_event
#undef rdma_cm_event_end
#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
#define rdma_show_cm_event(x) \
__print_symbolic(x, RDMA_CM_EVENT_LIST)
This diff is collapsed.
......@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status,
__entry->status = task->tk_status;
),
TP_printk("task:%u@%u, status %d",
TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id,
__entry->status)
);
......@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status,
__entry->status = status;
),
TP_printk("task:%u@%u, status %d",
TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id,
__entry->status)
);
......@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
__entry->status)
);
DEFINE_EVENT(rpc_xprt_event, xprt_timer,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
......
......@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
void (*do_action)(struct rpc_task *);
/*
* Execute any pending callback first.
* Perform the next FSM step or a pending callback.
*
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/
do_action = task->tk_callback;
task->tk_callback = NULL;
if (do_action == NULL) {
/*
* Perform the next FSM step.
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/
do_action = task->tk_action;
if (do_action == NULL)
break;
do_action = task->tk_action;
if (task->tk_callback) {
do_action = task->tk_callback;
task->tk_callback = NULL;
}
trace_rpc_task_run_action(task->tk_client, task, task->tk_action);
if (!do_action)
break;
trace_rpc_task_run_action(task->tk_client, task, do_action);
do_action(task);
/*
......
......@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
if (task->tk_status != -ETIMEDOUT)
return;
dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer)
xprt->ops->timer(xprt, task);
......
......@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
__set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
......@@ -74,21 +73,13 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
struct rpcrdma_rep *rep;
int rc = 0;
while (count--) {
rep = rpcrdma_create_rep(r_xprt);
if (IS_ERR(rep)) {
pr_err("RPC: %s: reply buffer alloc failed\n",
__func__);
rc = PTR_ERR(rep);
rc = rpcrdma_create_rep(r_xprt);
if (rc)
break;
}
rpcrdma_recv_buffer_put(rep);
}
return rc;
}
......@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free;
......@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
out_free:
......@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
/**
* rpcrdma_bc_marshal_reply - Send backwards direction reply
* @rqst: buffer containing RPC reply data
*
* Returns zero on success.
*/
int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
......@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
trace_xprtrdma_cb_reply(rqst);
return 0;
}
/**
* xprt_rdma_bc_send_reply - marshal and send a backchannel reply
* @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
*
* Caller holds the transport's write lock.
*
* Returns:
* %0 if the RPC message has been sent
* %-ENOTCONN if the caller should reconnect and call again
* %-EIO if a permanent error occurred and the request was not
* sent. Do not try to send this message again.
*/
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
int rc;
if (!xprt_connected(rqst->rq_xprt))
goto drop_connection;
rc = rpcrdma_bc_marshal_reply(rqst);
if (rc < 0)
goto failed_marshal;
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0;
failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
xprt_disconnect_done(rqst->rq_xprt);
return -ENOTCONN;
}
/**
......@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
smp_mb__after_atomic();
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
......@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
/**
* rpcrdma_bc_receive_call - Handle a backward direction call
* @xprt: transport receiving the call
* @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
* Operational assumptions:
......@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
......@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf));
......@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* the Upper Layer is done decoding it.
*/
req = rpcr_to_rdmar(rqst);
dprintk("RPC: %s: attaching rep %p to req %p\n",
__func__, rep, req);
req->rl_reply = rep;
/* Defeat the retransmit detection logic in send_request */
req->rl_connect_cookie = 0;
trace_xprtrdma_cb_call(rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
......@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
}
static int
fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES,
......@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
.page_shift = PAGE_SHIFT
};
mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL);
if (!mw->fmr.fm_physaddrs)
if (!mr->fmr.fm_physaddrs)
goto out_free;
mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(*mw->mw_sg), GFP_KERNEL);
if (!mw->mw_sg)
mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(*mr->mr_sg), GFP_KERNEL);
if (!mr->mr_sg)
goto out_free;
sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr);
if (IS_ERR(mw->fmr.fm_mr))
if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
return 0;
out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
PTR_ERR(mw->fmr.fm_mr));
PTR_ERR(mr->fmr.fm_mr));
out_free:
kfree(mw->mw_sg);
kfree(mw->fmr.fm_physaddrs);
kfree(mr->mr_sg);
kfree(mr->fmr.fm_physaddrs);
return -ENOMEM;
}
static int
__fmr_unmap(struct rpcrdma_mw *mw)
__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
int rc;
list_add(&mw->fmr.fm_mr->list, &l);
list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
list_del(&mw->fmr.fm_mr->list);
list_del(&mr->fmr.fm_mr->list);
return rc;
}
static void
fmr_op_release_mr(struct rpcrdma_mw *r)
fmr_op_release_mr(struct rpcrdma_mr *mr)
{
LIST_HEAD(unmap_list);
int rc;
/* Ensure MW is not on any rl_registered list */
if (!list_empty(&r->mw_list))
list_del(&r->mw_list);
if (!list_empty(&mr->mr_list))
list_del(&mr->mr_list);
kfree(r->fmr.fm_physaddrs);
kfree(r->mw_sg);
kfree(mr->fmr.fm_physaddrs);
kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
rc = __fmr_unmap(r);
rc = __fmr_unmap(mr);
if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
r, rc);
mr, rc);
rc = ib_dealloc_fmr(r->fmr.fm_mr);
rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
r, rc);
mr, rc);
kfree(r);
kfree(mr);
}
/* Reset of a single FMR.
*/
static void
fmr_op_recover_mr(struct rpcrdma_mw *mw)
fmr_op_recover_mr(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc;
/* ORDER: invalidate first */
rc = __fmr_unmap(mw);
/* ORDER: then DMA unmap */
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rc = __fmr_unmap(mr);
if (rc)
goto out_release;
rpcrdma_put_mw(r_xprt, mw);
/* ORDER: then DMA unmap */
rpcrdma_mr_unmap_and_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw);
pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mwlock);
list_del(&mw->mw_all);
spin_unlock(&r_xprt->rx_buf.rb_mwlock);
trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
fmr_op_release_mr(mw);
fmr_op_release_mr(mr);
}
static int
......@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
*/
static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out)
int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc;
struct rpcrdma_mw *mw;
struct rpcrdma_mr *mr;
u64 *dma_pages;
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
......@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
sg_set_page(&mw->mw_sg[i],
sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
len += seg->mr_len;
++seg;
......@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
mw->mw_dir = rpcrdma_data_dir(writing);
mr->mr_dir = rpcrdma_data_dir(writing);
mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, i, mw->mw_dir);
if (!mw->mw_nents)
mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]);
if (rc)
goto out_maperr;
mw->mw_handle = mw->fmr.fm_mr->rkey;
mw->mw_length = len;
mw->mw_offset = dma_pages[0] + pageoff;
mr->mr_handle = mr->fmr.fm_mr->rkey;
mr->mr_length = len;
mr->mr_offset = dma_pages[0] + pageoff;
*out = mw;
*out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
rpcrdma_put_mw(r_xprt, mw);
mr->mr_sg, i);
rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
pageoff, mr->mr_nents, rc);
rpcrdma_mr_unmap_and_put(mr);
return ERR_PTR(-EIO);
}
......@@ -256,13 +256,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
* Caller ensures that @mws is not empty before the call. This
* Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct rpcrdma_mw *mw;
struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list);
int rc;
......@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
list_for_each_entry(mw, mws, mw_list) {
list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
__func__, &mw->fmr);
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
__func__, &mr->fmr);
trace_xprtrdma_localinv(mr);
list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
......@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
while (!list_empty(mws)) {
mw = rpcrdma_pop_mw(mws);
dprintk("RPC: %s: DMA unmapping fmr %p\n",
__func__, &mw->fmr);
list_del(&mw->fmr.fm_mr->list);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
while (!list_empty(mrs)) {
mr = rpcrdma_mr_pop(mrs);
list_del(&mr->fmr.fm_mr->list);
rpcrdma_mr_unmap_and_put(mr);
}
return;
......@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
while (!list_empty(mws)) {
mw = rpcrdma_pop_mw(mws);
list_del(&mw->fmr.fm_mr->list);
fmr_op_recover_mr(mw);
while (!list_empty(mrs)) {
mr = rpcrdma_mr_pop(mrs);
list_del(&mr->fmr.fm_mr->list);
fmr_op_recover_mr(mr);
}
}
......
This diff is collapsed.
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/
/* rpcrdma.ko module initialization
*/
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
#include <asm/swab.h>
#define CREATE_TRACE_POINTS
#include "xprt_rdma.h"
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -73,11 +73,10 @@ struct rpcrdma_ia {
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth;
unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
......@@ -101,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
};
......@@ -232,29 +230,29 @@ enum {
};
/*
* struct rpcrdma_mw - external memory region metadata
* struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
enum rpcrdma_frmr_state {
FRMR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */
FRMR_FLUSHED_FR, /* flushed FASTREG WR */
FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
enum rpcrdma_frwr_state {
FRWR_IS_INVALID, /* ready to be used */
FRWR_IS_VALID, /* in use */
FRWR_FLUSHED_FR, /* flushed FASTREG WR */
FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
};
struct rpcrdma_frmr {
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
enum rpcrdma_frmr_state fr_state;
enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
......@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs;
};
struct rpcrdma_mw {
struct list_head mw_list;
struct scatterlist *mw_sg;
int mw_nents;
enum dma_data_direction mw_dir;
unsigned long mw_flags;
struct rpcrdma_mr {
struct list_head mr_list;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
union {
struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr;
struct rpcrdma_frwr frwr;
};
struct rpcrdma_xprt *mw_xprt;
u32 mw_handle;
u32 mw_length;
u64 mw_offset;
struct list_head mw_all;
};
/* mw_flags */
enum {
RPCRDMA_MW_F_RI = 1,
struct rpcrdma_xprt *mr_xprt;
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
struct list_head mr_all;
};
/*
......@@ -362,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */
enum {
RPCRDMA_REQ_F_BACKCHANNEL = 0,
RPCRDMA_REQ_F_PENDING,
RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_TX_RESOURCES,
};
......@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
}
static inline struct rpcrdma_req *
rpcr_to_rdmar(struct rpc_rqst *rqst)
rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
return rqst->rq_xprtdata;
}
static inline void
rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{
list_add_tail(&mw->mw_list, list);
list_add_tail(&mr->mr_list, list);
}
static inline struct rpcrdma_mw *
rpcrdma_pop_mw(struct list_head *list)
static inline struct rpcrdma_mr *
rpcrdma_mr_pop(struct list_head *list)
{
struct rpcrdma_mw *mw;
struct rpcrdma_mr *mr;
mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
list_del(&mw->mw_list);
return mw;
mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
list_del(&mr->mr_list);
return mr;
}
/*
......@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
spinlock_t rb_mwlock; /* protect rb_mws list */
struct list_head rb_mws;
spinlock_t rb_mrlock; /* protect rb_mrs list */
struct list_head rb_mrs;
struct list_head rb_all;
unsigned long rb_sc_head;
......@@ -438,13 +429,11 @@ struct rpcrdma_buffer {
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
unsigned int padding; /* non-rdma write header padding */
};
/*
......@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **);
struct rpcrdma_mr **);
void (*ro_reminv)(struct rpcrdma_rep *rep,
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
void (*ro_recover_mr)(struct rpcrdma_mw *);
void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *,
struct rpcrdma_mw *);
void (*ro_release_mr)(struct rpcrdma_mw *);
struct rpcrdma_mr *);
void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname;
const int ro_send_w_inv_ok;
};
......@@ -525,6 +516,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
static inline const char *
rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
{
return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
}
static inline const char *
rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
{
return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
}
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
......@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
......@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
......@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
......@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
#include <trace/events/rpcrdma.h>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment