Commit b4e2901c authored by Steve Wise's avatar Steve Wise Committed by Roland Dreier

RDMA/cxgb4: SQ flush fix

There is a race when moving a QP from RTS->CLOSING where a SQ work
request could be posted after the FW receives the RDMA_RI/FINI WR.
The SQ work request will never get processed, and should be completed
with FLUSHED status.  Function c4iw_flush_sq(), however was dropping
the oldest SQ work request when in CLOSING or IDLE states, instead of
completing the pending work request. If that oldest pending work
request was actually complete and has a CQE in the CQ, then when that
CQE is proceessed in poll_cq, we'll BUG_ON() due to the inconsistent
SQ/CQ state.

This is a very small timing hole and has only been hit once so far.

The fix is two-fold:

1) c4iw_flush_sq() MUST always flush all non-completed WRs with FLUSHED
   status regardless of the QP state.

2) In c4iw_modify_rc_qp(), always set the "in error" bit on the queue
   before moving the state out of RTS.  This ensures that the state
   transition will not happen while another thread is in
   post_rc_send(), because set_state() and post_rc_send() both aquire
   the qp spinlock.  Also, once we transition the state out of RTS,
   subsequent calls to post_rc_send() will fail because the "in error"
   bit is set.  I don't think this fully closes the race where the FW
   can get a FINI followed a SQ work request being posted (because
   they are posted to differente EQs), but the #1 fix will handle the
   issue by flushing the SQ work request.
Signed-off-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent def4771f
...@@ -235,15 +235,12 @@ int c4iw_flush_sq(struct c4iw_qp *qhp) ...@@ -235,15 +235,12 @@ int c4iw_flush_sq(struct c4iw_qp *qhp)
struct t4_cq *cq = &chp->cq; struct t4_cq *cq = &chp->cq;
int idx; int idx;
struct t4_swsqe *swsqe; struct t4_swsqe *swsqe;
int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
qhp->attr.state != C4IW_QP_STATE_IDLE);
if (wq->sq.flush_cidx == -1) if (wq->sq.flush_cidx == -1)
wq->sq.flush_cidx = wq->sq.cidx; wq->sq.flush_cidx = wq->sq.cidx;
idx = wq->sq.flush_cidx; idx = wq->sq.flush_cidx;
BUG_ON(idx >= wq->sq.size); BUG_ON(idx >= wq->sq.size);
while (idx != wq->sq.pidx) { while (idx != wq->sq.pidx) {
if (error) {
swsqe = &wq->sq.sw_sq[idx]; swsqe = &wq->sq.sw_sq[idx];
BUG_ON(swsqe->flushed); BUG_ON(swsqe->flushed);
swsqe->flushed = 1; swsqe->flushed = 1;
...@@ -253,9 +250,6 @@ int c4iw_flush_sq(struct c4iw_qp *qhp) ...@@ -253,9 +250,6 @@ int c4iw_flush_sq(struct c4iw_qp *qhp)
advance_oldest_read(wq); advance_oldest_read(wq);
} }
flushed++; flushed++;
} else {
t4_sq_consume(wq);
}
if (++idx == wq->sq.size) if (++idx == wq->sq.size)
idx = 0; idx = 0;
} }
......
...@@ -1367,6 +1367,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ...@@ -1367,6 +1367,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
switch (attrs->next_state) { switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING: case C4IW_QP_STATE_CLOSING:
BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_CLOSING); set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep; ep = qhp->ep;
if (!internal) { if (!internal) {
...@@ -1374,16 +1375,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ...@@ -1374,16 +1375,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
disconnect = 1; disconnect = 1;
c4iw_get_ep(&qhp->ep->com); c4iw_get_ep(&qhp->ep->com);
} }
t4_set_wq_in_error(&qhp->wq);
ret = rdma_fini(rhp, qhp, ep); ret = rdma_fini(rhp, qhp, ep);
if (ret) if (ret)
goto err; goto err;
break; break;
case C4IW_QP_STATE_TERMINATE: case C4IW_QP_STATE_TERMINATE:
t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_TERMINATE); set_state(qhp, C4IW_QP_STATE_TERMINATE);
qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode; qhp->attr.ecode = attrs->ecode;
t4_set_wq_in_error(&qhp->wq);
ep = qhp->ep; ep = qhp->ep;
disconnect = 1; disconnect = 1;
if (!internal) if (!internal)
...@@ -1396,8 +1396,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ...@@ -1396,8 +1396,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
c4iw_get_ep(&qhp->ep->com); c4iw_get_ep(&qhp->ep->com);
break; break;
case C4IW_QP_STATE_ERROR: case C4IW_QP_STATE_ERROR:
set_state(qhp, C4IW_QP_STATE_ERROR);
t4_set_wq_in_error(&qhp->wq); t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_ERROR);
if (!internal) { if (!internal) {
abort = 1; abort = 1;
disconnect = 1; disconnect = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment