Commit 4dd72636 authored by Amrani, Ram's avatar Amrani, Ram Committed by Doug Ledford

RDMA/qedr: destroy CQ only after HW releases it

Wait for all relevant CNQ interrupts before freeing the CQ.
Don't invoke completion handlers for a destroyed CQ.
Signed-off-by: default avatarRam Amrani <Ram.Amrani@cavium.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 942b3b2c
...@@ -438,14 +438,21 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle) ...@@ -438,14 +438,21 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
cq->arm_flags = 0; cq->arm_flags = 0;
if (cq->ibcq.comp_handler) if (!cq->destroyed && cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler) (*cq->ibcq.comp_handler)
(&cq->ibcq, cq->ibcq.cq_context); (&cq->ibcq, cq->ibcq.cq_context);
/* The CQ's CNQ notification counter is checked before
* destroying the CQ in a busy-wait loop that waits for all of
* the CQ's CNQ interrupts to be processed. It is increased
* here, only after the completion handler, to ensure that the
* the handler is not running when the CQ is destroyed.
*/
cq->cnq_notif++;
sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl); sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
cnq->n_comp++; cnq->n_comp++;
} }
qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index, qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
......
...@@ -272,6 +272,8 @@ struct qedr_cq { ...@@ -272,6 +272,8 @@ struct qedr_cq {
u32 cq_cons; u32 cq_cons;
struct qedr_userq q; struct qedr_userq q;
u8 destroyed;
u16 cnq_notif;
}; };
struct qedr_pd { struct qedr_pd {
......
...@@ -822,6 +822,17 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) ...@@ -822,6 +822,17 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{ {
struct qedr_cq *cq = get_qedr_cq(ibcq); struct qedr_cq *cq = get_qedr_cq(ibcq);
unsigned long sflags; unsigned long sflags;
struct qedr_dev *dev;
dev = get_qedr_dev(ibcq->device);
if (cq->destroyed) {
DP_ERR(dev,
"warning: arm was invoked after destroy for cq %p (icid=%d)\n",
cq, cq->icid);
return -EINVAL;
}
if (cq->cq_type == QEDR_CQ_TYPE_GSI) if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return 0; return 0;
...@@ -987,16 +998,22 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) ...@@ -987,16 +998,22 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
return 0; return 0;
} }
#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
#define QEDR_DESTROY_CQ_ITER_DURATION (10)
int qedr_destroy_cq(struct ib_cq *ibcq) int qedr_destroy_cq(struct ib_cq *ibcq)
{ {
struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams; struct qed_rdma_destroy_cq_out_params oparams;
struct qed_rdma_destroy_cq_in_params iparams; struct qed_rdma_destroy_cq_in_params iparams;
struct qedr_cq *cq = get_qedr_cq(ibcq); struct qedr_cq *cq = get_qedr_cq(ibcq);
int iter;
int rc; int rc;
DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid); DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
cq->destroyed = 1;
/* GSIs CQs are handled by driver, so they don't exist in the FW */ /* GSIs CQs are handled by driver, so they don't exist in the FW */
if (cq->cq_type == QEDR_CQ_TYPE_GSI) if (cq->cq_type == QEDR_CQ_TYPE_GSI)
goto done; goto done;
...@@ -1013,10 +1030,50 @@ int qedr_destroy_cq(struct ib_cq *ibcq) ...@@ -1013,10 +1030,50 @@ int qedr_destroy_cq(struct ib_cq *ibcq)
ib_umem_release(cq->q.umem); ib_umem_release(cq->q.umem);
} }
/* We don't want the IRQ handler to handle a non-existing CQ so we
* wait until all CNQ interrupts, if any, are received. This will always
* happen and will always happen very fast. If not, then a serious error
* has occured. That is why we can use a long delay.
* We spin for a short time so we don’t lose time on context switching
* in case all the completions are handled in that span. Otherwise
* we sleep for a while and check again. Since the CNQ may be
* associated with (only) the current CPU we use msleep to allow the
* current CPU to be freed.
* The CNQ notification is increased in qedr_irq_handler().
*/
iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
udelay(QEDR_DESTROY_CQ_ITER_DURATION);
iter--;
}
iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
msleep(QEDR_DESTROY_CQ_ITER_DURATION);
iter--;
}
if (oparams.num_cq_notif != cq->cnq_notif)
goto err;
/* Note that we don't need to have explicit code to wait for the
* completion of the event handler because it is invoked from the EQ.
* Since the destroy CQ ramrod has also been received on the EQ we can
* be certain that there's no event handler in process.
*/
done: done:
cq->sig = ~cq->sig;
kfree(cq); kfree(cq);
return 0; return 0;
err:
DP_ERR(dev,
"CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
return -EINVAL;
} }
static inline int get_gid_info_from_table(struct ib_qp *ibqp, static inline int get_gid_info_from_table(struct ib_qp *ibqp,
...@@ -3419,6 +3476,13 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) ...@@ -3419,6 +3476,13 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int update = 0; int update = 0;
int done = 0; int done = 0;
if (cq->destroyed) {
DP_ERR(dev,
"warning: poll was invoked after destroy for cq %p (icid=%d)\n",
cq, cq->icid);
return 0;
}
if (cq->cq_type == QEDR_CQ_TYPE_GSI) if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return qedr_gsi_poll_cq(ibcq, num_entries, wc); return qedr_gsi_poll_cq(ibcq, num_entries, wc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment