Commit 39646d9b authored by Dragos Tatulea's avatar Dragos Tatulea Committed by Saeed Mahameed

net/mlx5e: xsk: Fix crash on regular rq reactivation

When the regular rq is reactivated after the XSK socket is closed
it could be reading stale cqes which eventually corrupts the rq.
This leads to no more traffic being received on the regular rq and a
crash on the next close or deactivation of the rq.

Kal Cuttler Conely reported this issue as a crash on the release
path when the xdpsock sample program is stopped (killed) and restarted
in sequence while traffic is running.

This patch flushes all cqes when during the rq flush. The cqe flushing
is done in the reset state of the rq. mlx5e_rq_to_ready code is moved
into the flush function to allow for this.

Fixes: 082a9edf ("net/mlx5e: xsk: Flush RQ on XSK activation to save memory")
Reported-by: default avatarKal Cutter Conley <kal.conley@dectris.com>
Closes: https://lore.kernel.org/xdp-newbies/CAHApi-nUAs4TeFWUDV915CZJo07XVg2Vp63-no7UDfj6wur9nQ@mail.gmail.comSigned-off-by: default avatarDragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent e0f52298
......@@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s
return err;
}
static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq)
{
struct mlx5_cqwq *cqwq = &rq->cq.wq;
struct mlx5_cqe64 *cqe;
if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) {
while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)))
mlx5_cqwq_pop(cqwq);
} else {
while ((cqe = mlx5_cqwq_get_cqe(cqwq)))
mlx5_cqwq_pop(cqwq);
}
mlx5_cqwq_update_db_record(cqwq);
}
int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
{
struct net_device *dev = rq->netdev;
int err;
......@@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
return err;
}
mlx5e_free_rx_descs(rq);
mlx5e_flush_rq_cq(rq);
err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
......@@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
return 0;
}
int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
{
mlx5e_free_rx_descs(rq);
return mlx5e_rq_to_ready(rq, curr_state);
}
static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
{
struct mlx5_core_dev *mdev = rq->mdev;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment