Commit d0e84c0a authored by Yishai Hadas's avatar Yishai Hadas Committed by Jason Gunthorpe

IB/mlx5: Add support for drain SQ & RQ

This patch follows the logic from ib_core but considers the internal
device state upon executing the involved commands.

Specifically,
Upon internal error state modify QP to an error state can be assumed to
be success as each in-progress WR going to be flushed in error in any
case as expected by that modify command.

In addition,
As the drain should never fail the driver makes sure that post_send/recv
will succeed even if the device is already in an internal error state.
As such once the driver will supply the simulated/SW CQEs the CQE for
the drain WR will be handled as well.

In case of an internal error state the CQE for the drain WR may be
completed as part of the main task that handled the error state or by
the task that issued the drain WR.

As the above depends on scheduling the code takes the relevant locks and
actions to make sure that the completion handler for that WR will always
be called after that the post_send/recv were issued but not in parallel
to the other task that handles the error flow.
Signed-off-by: default avatarYishai Hadas <yishaih@mellanox.com>
Reviewed-by: default avatarMax Gurtovoy <maxg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent ea8c2d8f
...@@ -5601,6 +5601,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) ...@@ -5601,6 +5601,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.modify_qp = mlx5_ib_modify_qp; dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
dev->ib_dev.query_qp = mlx5_ib_query_qp; dev->ib_dev.query_qp = mlx5_ib_query_qp;
dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
dev->ib_dev.post_send = mlx5_ib_post_send; dev->ib_dev.post_send = mlx5_ib_post_send;
dev->ib_dev.post_recv = mlx5_ib_post_recv; dev->ib_dev.post_recv = mlx5_ib_post_recv;
dev->ib_dev.create_cq = mlx5_ib_create_cq; dev->ib_dev.create_cq = mlx5_ib_create_cq;
......
...@@ -1016,6 +1016,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1016,6 +1016,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr); struct ib_qp_init_attr *qp_init_attr);
int mlx5_ib_destroy_qp(struct ib_qp *qp); int mlx5_ib_destroy_qp(struct ib_qp *qp);
void mlx5_ib_drain_sq(struct ib_qp *qp);
void mlx5_ib_drain_rq(struct ib_qp *qp);
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr); struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
......
...@@ -4361,9 +4361,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, ...@@ -4361,9 +4361,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
qp->sq.w_list[idx].next = qp->sq.cur_post; qp->sq.w_list[idx].next = qp->sq.cur_post;
} }
static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr, bool drain)
struct ib_send_wr **bad_wr)
{ {
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
...@@ -4394,7 +4393,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -4394,7 +4393,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags); spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
err = -EIO; err = -EIO;
*bad_wr = wr; *bad_wr = wr;
nreq = 0; nreq = 0;
...@@ -4691,13 +4690,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -4691,13 +4690,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return err; return err;
} }
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
}
static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
{ {
sig->signature = calc_sig(sig, size); sig->signature = calc_sig(sig, size);
} }
int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, static int _mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr) struct ib_recv_wr **bad_wr, bool drain)
{ {
struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *scat; struct mlx5_wqe_data_seg *scat;
...@@ -4715,7 +4720,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -4715,7 +4720,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->rq.lock, flags); spin_lock_irqsave(&qp->rq.lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
err = -EIO; err = -EIO;
*bad_wr = wr; *bad_wr = wr;
nreq = 0; nreq = 0;
...@@ -4777,6 +4782,12 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -4777,6 +4782,12 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return err; return err;
} }
int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
}
static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
{ {
switch (mlx5_state) { switch (mlx5_state) {
...@@ -5698,3 +5709,131 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, ...@@ -5698,3 +5709,131 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
kvfree(in); kvfree(in);
return err; return err;
} }
struct mlx5_ib_drain_cqe {
struct ib_cqe cqe;
struct completion done;
};
static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
struct mlx5_ib_drain_cqe,
cqe);
complete(&cqe->done);
}
/* This function returns only once the drained WR was completed */
static void handle_drain_completion(struct ib_cq *cq,
struct mlx5_ib_drain_cqe *sdrain,
struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
if (cq->poll_ctx == IB_POLL_DIRECT) {
while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
ib_process_cq_direct(cq, -1);
return;
}
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
struct mlx5_ib_cq *mcq = to_mcq(cq);
bool triggered = false;
unsigned long flags;
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
/* Make sure that the CQ handler won't run if wasn't run yet */
if (!mcq->mcq.reset_notify_added)
mcq->mcq.reset_notify_added = 1;
else
triggered = true;
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (triggered) {
/* Wait for any scheduled/running task to be ended */
switch (cq->poll_ctx) {
case IB_POLL_SOFTIRQ:
irq_poll_disable(&cq->iop);
irq_poll_enable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
cancel_work_sync(&cq->work);
break;
default:
WARN_ON_ONCE(1);
}
}
/* Run the CQ handler - this makes sure that the drain WR will
* be processed if wasn't processed yet.
*/
mcq->mcq.comp(&mcq->mcq);
}
wait_for_completion(&sdrain->done);
}
void mlx5_ib_drain_sq(struct ib_qp *qp)
{
struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct mlx5_ib_drain_cqe sdrain;
struct ib_send_wr *bad_swr;
struct ib_rdma_wr swr = {
.wr = {
.next = NULL,
{ .wr_cqe = &sdrain.cqe, },
.opcode = IB_WR_RDMA_WRITE,
},
};
int ret;
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_core_dev *mdev = dev->mdev;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
sdrain.cqe.done = mlx5_ib_drain_qp_done;
init_completion(&sdrain.done);
ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
handle_drain_completion(cq, &sdrain, dev);
}
void mlx5_ib_drain_rq(struct ib_qp *qp)
{
struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct mlx5_ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_core_dev *mdev = dev->mdev;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = mlx5_ib_drain_qp_done;
init_completion(&rdrain.done);
ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
handle_drain_completion(cq, &rdrain, dev);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment