Commit 445fd4f4 authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe

RDMA/rxe: Fix rnr retry behavior

Currently the completer tasklet when retransmit timer or the rnr timer
fires the same flag (qp->req.need_retry) is set so that if either timer
fires it will attempt to perform a retry flow on the send queue.  This has
the effect of responding to an RNR NAK at the first retransmit timer event
which might not allow the requested rnr timeout.

This patch adds a new flag (qp->req.wait_for_rnr_timer) which, if set,
prevents a retry flow until the rnr nak timer fires.

This patch fixes rnr retry errors which can be observed by running the
pyverbs test_rdmacm_async_traffic_external_qp multiple times. With this
patch applied they do not occur.

Link: https://lore.kernel.org/linux-rdma/a8287823-1408-4273-bc22-99a0678db640@gmail.com/
Link: https://lore.kernel.org/linux-rdma/2bafda9e-2bb6-186d-12a1-179e8f6a2678@talpey.com/
Fixes: 8700e3e7 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/20220630190425.2251-6-rpearsonhpe@gmail.comSigned-off-by: default avatarBob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 930119a1
...@@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t) ...@@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t)
{ {
struct rxe_qp *qp = from_timer(qp, t, retrans_timer); struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
if (qp->valid) { if (qp->valid) {
qp->comp.timeout = 1; qp->comp.timeout = 1;
rxe_run_task(&qp->comp.task, 1); rxe_run_task(&qp->comp.task, 1);
...@@ -730,11 +732,15 @@ int rxe_completer(void *arg) ...@@ -730,11 +732,15 @@ int rxe_completer(void *arg)
break; break;
case COMPST_RNR_RETRY: case COMPST_RNR_RETRY:
/* we come here if we received an RNR NAK */
if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry > 0) {
if (qp->comp.rnr_retry != 7) if (qp->comp.rnr_retry != 7)
qp->comp.rnr_retry--; qp->comp.rnr_retry--;
qp->req.need_retry = 1; /* don't start a retry flow until the
* rnr timer has fired
*/
qp->req.wait_for_rnr_timer = 1;
pr_debug("qp#%d set rnr nak timer\n", pr_debug("qp#%d set rnr nak timer\n",
qp_num(qp)); qp_num(qp));
mod_timer(&qp->rnr_nak_timer, mod_timer(&qp->rnr_nak_timer,
......
...@@ -505,6 +505,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) ...@@ -505,6 +505,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
atomic_set(&qp->ssn, 0); atomic_set(&qp->ssn, 0);
qp->req.opcode = -1; qp->req.opcode = -1;
qp->req.need_retry = 0; qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
qp->req.noack_pkts = 0; qp->req.noack_pkts = 0;
qp->resp.msn = 0; qp->resp.msn = 0;
qp->resp.opcode = -1; qp->resp.opcode = -1;
......
...@@ -100,7 +100,11 @@ void rnr_nak_timer(struct timer_list *t) ...@@ -100,7 +100,11 @@ void rnr_nak_timer(struct timer_list *t)
{ {
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
rxe_run_task(&qp->req.task, 1); rxe_run_task(&qp->req.task, 1);
} }
...@@ -641,10 +645,17 @@ int rxe_requester(void *arg) ...@@ -641,10 +645,17 @@ int rxe_requester(void *arg)
qp->req.need_rd_atomic = 0; qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0; qp->req.wait_psn = 0;
qp->req.need_retry = 0; qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
goto exit; goto exit;
} }
if (unlikely(qp->req.need_retry)) { /* we come here if the retransmot timer has fired
* or if the rnr timer has fired. If the retransmit
* timer fires while we are processing an RNR NAK wait
* until the rnr timer has fired before starting the
* retry flow
*/
if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
req_retry(qp); req_retry(qp);
qp->req.need_retry = 0; qp->req.need_retry = 0;
} }
......
...@@ -123,6 +123,7 @@ struct rxe_req_info { ...@@ -123,6 +123,7 @@ struct rxe_req_info {
int need_rd_atomic; int need_rd_atomic;
int wait_psn; int wait_psn;
int need_retry; int need_retry;
int wait_for_rnr_timer;
int noack_pkts; int noack_pkts;
struct rxe_task task; struct rxe_task task;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment