Commit d9d1f5e7 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Drop stale TID RDMA packets that cause TIDErr

In a congested fabric with adaptive routing enabled, traces show that
packets could be delivered out of order. A stale TID RDMA data packet
could lead to TidErr if the TID entries have been released by duplicate
data packets generated from retries, and subsequently erroneously force
the qp into error state in the current implementation.

Since the payload has already been dropped by hardware, the packet can
be simply dropped and it is no longer necessary to put the qp into
error state.

Fixes: 9905bf06 ("IB/hfi1: Add functions to receive TID RDMA READ response")
Cc: <stable@vger.kernel.org>
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Link: https://lore.kernel.org/r/20190815192058.105923.72324.stgit@awfm-01.aw.intel.comSigned-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 90fdae66
...@@ -2574,18 +2574,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) ...@@ -2574,18 +2574,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
hfi1_kern_clear_hw_flow(priv->rcd, qp); hfi1_kern_clear_hw_flow(priv->rcd, qp);
} }
static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
struct hfi1_packet *packet, u8 rcv_type,
u8 opcode)
{ {
struct rvt_qp *qp = packet->qp; struct rvt_qp *qp = packet->qp;
struct hfi1_qp_priv *qpriv = qp->priv;
u32 ipsn;
struct ib_other_headers *ohdr = packet->ohdr;
struct rvt_ack_entry *e;
struct tid_rdma_request *req;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
u32 i;
if (rcv_type >= RHF_RCV_TYPE_IB) if (rcv_type >= RHF_RCV_TYPE_IB)
goto done; goto done;
...@@ -2602,41 +2593,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, ...@@ -2602,41 +2593,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
if (rcv_type == RHF_RCV_TYPE_EAGER) { if (rcv_type == RHF_RCV_TYPE_EAGER) {
hfi1_restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
hfi1_schedule_send(qp); hfi1_schedule_send(qp);
goto done_unlock;
} }
/* /* Since no payload is delivered, just drop the packet */
* For TID READ response, error out QP after freeing the tid
* resources.
*/
if (opcode == TID_OP(READ_RESP)) {
ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
if (cmp_psn(ipsn, qp->s_last_psn) > 0 &&
cmp_psn(ipsn, qp->s_psn) < 0) {
hfi1_kern_read_tid_flow_free(qp);
spin_unlock(&qp->s_lock);
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto done;
}
goto done_unlock;
}
/*
* Error out the qp for TID RDMA WRITE
*/
hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
for (i = 0; i < rvt_max_atomic(rdi); i++) {
e = &qp->s_ack_queue[i];
if (e->opcode == TID_OP(WRITE_REQ)) {
req = ack_to_tid_req(e);
hfi1_kern_exp_rcv_clear_all(req);
}
}
spin_unlock(&qp->s_lock);
rvt_rc_error(qp, IB_WC_LOC_LEN_ERR);
goto done;
done_unlock:
spin_unlock(&qp->s_lock); spin_unlock(&qp->s_lock);
done: done:
return true; return true;
...@@ -2925,7 +2884,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, ...@@ -2925,7 +2884,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
if (lnh == HFI1_LRH_GRH) if (lnh == HFI1_LRH_GRH)
goto r_unlock; goto r_unlock;
if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode)) if (tid_rdma_tid_err(packet, rcv_type))
goto r_unlock; goto r_unlock;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment