Commit d58c1834 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Drop stale TID RDMA packets

In a congested fabric with adaptive routing enabled, traces show that
the sender could receive stale TID RDMA NAK packets that contain newer
KDETH PSNs and older Verbs PSNs. If not dropped, these packets could
cause the incorrect rewinding of the software flows and the incorrect
completion of TID RDMA WRITE requests, and eventually leading to memory
corruption and kernel crash.

The current code drops stale TID RDMA ACK/NAK packets solely based
on KDETH PSNs, which may lead to erroneous processing. This patch
fixes the issue by also checking the Verbs PSN. Addition checks are
added before rewinding the TID RDMA WRITE DATA packets.

Fixes: 9e93e967 ("IB/hfi1: Add a function to receive TID RDMA ACK packet")
Cc: <stable@vger.kernel.org>
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Link: https://lore.kernel.org/r/20190815192033.105923.44192.stgit@awfm-01.aw.intel.comSigned-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 9b440078
...@@ -4509,7 +4509,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ...@@ -4509,7 +4509,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
struct tid_rdma_request *req; struct tid_rdma_request *req;
struct tid_rdma_flow *flow; struct tid_rdma_flow *flow;
u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
unsigned long flags; unsigned long flags;
u16 fidx; u16 fidx;
...@@ -4538,6 +4538,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ...@@ -4538,6 +4538,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
ack_kpsn--; ack_kpsn--;
} }
if (unlikely(qp->s_acked == qp->s_tail))
goto ack_op_err;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked); wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE) if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
...@@ -4550,7 +4553,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ...@@ -4550,7 +4553,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
/* Drop stale ACK/NAK */ /* Drop stale ACK/NAK */
if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0) if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
goto ack_op_err; goto ack_op_err;
while (cmp_psn(ack_kpsn, while (cmp_psn(ack_kpsn,
...@@ -4712,7 +4716,12 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ...@@ -4712,7 +4716,12 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
switch ((aeth >> IB_AETH_CREDIT_SHIFT) & switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
IB_AETH_CREDIT_MASK) { IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */ case 0: /* PSN sequence error */
if (!req->flows)
break;
flow = &req->flows[req->acked_tail]; flow = &req->flows[req->acked_tail];
flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
if (cmp_psn(psn, flpsn) > 0)
break;
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
flow); flow);
req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment