Commit d3d9428d authored by Chuck Lever's avatar Chuck Lever Committed by Greg Kroah-Hartman

xprtrdma: Fix DMAR failure in frwr_op_map() after reconnect

commit 62bdf94a upstream.

When a LOCALINV WR is flushed, the frmr is marked STALE, then
frwr_op_unmap_sync DMA-unmaps the frmr's SGL. These STALE frmrs
are then recovered when frwr_op_map hunts for an INVALID frmr to
use.

All other cases that need frmr recovery leave that SGL DMA-mapped.
The FRMR recovery path unconditionally DMA-unmaps the frmr's SGL.

To avoid DMA unmapping the SGL twice for flushed LOCAL_INV WRs,
alter the recovery logic (rather than the hot frwr_op_unmap_sync
path) to distinguish among these cases. This solution also takes
care of the case where multiple LOCAL_INV WRs are issued for the
same rpcrdma_req, some complete successfully, but some are flushed.
Reported-by: default avatarVasco Steinmetz <linux@kyberraum.net>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Tested-by: default avatarVasco Steinmetz <linux@kyberraum.net>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 31c749be
...@@ -44,18 +44,20 @@ ...@@ -44,18 +44,20 @@
* being done. * being done.
* *
* When the underlying transport disconnects, MRs are left in one of * When the underlying transport disconnects, MRs are left in one of
* three states: * four states:
* *
* INVALID: The MR was not in use before the QP entered ERROR state. * INVALID: The MR was not in use before the QP entered ERROR state.
* (Or, the LOCAL_INV WR has not completed or flushed yet).
*
* STALE: The MR was being registered or unregistered when the QP
* entered ERROR state, and the pending WR was flushed.
* *
* VALID: The MR was registered before the QP entered ERROR state. * VALID: The MR was registered before the QP entered ERROR state.
* *
* When frwr_op_map encounters STALE and VALID MRs, they are recovered * FLUSHED_FR: The MR was being registered when the QP entered ERROR
* with ib_dereg_mr and then are re-initialized. Beause MR recovery * state, and the pending WR was flushed.
*
* FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
* state, and the pending WR was flushed.
*
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the * allocates fresh resources, it is deferred to a workqueue, and the
* recovered MRs are placed back on the rb_mws list when recovery is * recovered MRs are placed back on the rb_mws list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while * complete. frwr_op_map allocates another MR for the current RPC while
...@@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) ...@@ -175,12 +177,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
static void static void
frwr_op_recover_mr(struct rpcrdma_mw *mw) frwr_op_recover_mr(struct rpcrdma_mw *mw)
{ {
enum rpcrdma_frmr_state state = mw->frmr.fr_state;
struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc; int rc;
rc = __frwr_reset_mr(ia, mw); rc = __frwr_reset_mr(ia, mw);
ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); if (state != FRMR_FLUSHED_LI)
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
if (rc) if (rc)
goto out_release; goto out_release;
...@@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -261,10 +266,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
} }
static void static void
__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
const char *wr)
{ {
frmr->fr_state = FRMR_IS_STALE;
if (wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: %s: %s (%u/0x%x)\n", pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
wr, ib_wc_status_msg(wc->status), wr, ib_wc_status_msg(wc->status),
...@@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) ...@@ -287,7 +290,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe; cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
__frwr_sendcompletion_flush(wc, frmr, "fastreg"); frmr->fr_state = FRMR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
} }
} }
...@@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) ...@@ -307,7 +311,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe; cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
__frwr_sendcompletion_flush(wc, frmr, "localinv"); frmr->fr_state = FRMR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
} }
} }
...@@ -327,8 +332,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) ...@@ -327,8 +332,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
cqe = wc->wr_cqe; cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) if (wc->status != IB_WC_SUCCESS) {
__frwr_sendcompletion_flush(wc, frmr, "localinv"); frmr->fr_state = FRMR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
complete(&frmr->fr_linv_done); complete(&frmr->fr_linv_done);
} }
......
...@@ -207,7 +207,8 @@ struct rpcrdma_rep { ...@@ -207,7 +207,8 @@ struct rpcrdma_rep {
enum rpcrdma_frmr_state { enum rpcrdma_frmr_state {
FRMR_IS_INVALID, /* ready to be used */ FRMR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */ FRMR_IS_VALID, /* in use */
FRMR_IS_STALE, /* failed completion */ FRMR_FLUSHED_FR, /* flushed FASTREG WR */
FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
}; };
struct rpcrdma_frmr { struct rpcrdma_frmr {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment