Commit 063bd355 authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Saeed Mahameed

net/mlx5: Fix a race on command flush flow

Fix a refcount use after free warning due to a race on command entry.
Such race occurs when one of the commands releases its last refcount and
frees its index and entry while another process running command flush
flow takes refcount to this command entry. The process which handles
commands flush may see this command as needed to be flushed if the other
process released its refcount but didn't release the index yet. Fix it
by adding the needed spin lock.

It fixes the following warning trace:

refcount_t: addition on 0; use-after-free.
WARNING: CPU: 11 PID: 540311 at lib/refcount.c:25 refcount_warn_saturate+0x80/0xe0
...
RIP: 0010:refcount_warn_saturate+0x80/0xe0
...
Call Trace:
 <TASK>
 mlx5_cmd_trigger_completions+0x293/0x340 [mlx5_core]
 mlx5_cmd_flush+0x3a/0xf0 [mlx5_core]
 enter_error_state+0x44/0x80 [mlx5_core]
 mlx5_fw_fatal_reporter_err_work+0x37/0xe0 [mlx5_core]
 process_one_work+0x1be/0x390
 worker_thread+0x4d/0x3d0
 ? rescuer_thread+0x350/0x350
 kthread+0x141/0x160
 ? set_kthread_struct+0x40/0x40
 ret_from_fork+0x1f/0x30
 </TASK>

Fixes: 50b2412b ("net/mlx5: Avoid possible free of command entry while timeout comp handler")
Signed-off-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Reviewed-by: default avatarEran Ben Elisha <eranbe@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent ac77998b
...@@ -131,11 +131,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd) ...@@ -131,11 +131,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx) static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{ {
unsigned long flags; lockdep_assert_held(&cmd->alloc_lock);
spin_lock_irqsave(&cmd->alloc_lock, flags);
set_bit(idx, &cmd->bitmask); set_bit(idx, &cmd->bitmask);
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
} }
static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
...@@ -145,17 +142,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) ...@@ -145,17 +142,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
{ {
struct mlx5_cmd *cmd = ent->cmd;
unsigned long flags;
spin_lock_irqsave(&cmd->alloc_lock, flags);
if (!refcount_dec_and_test(&ent->refcnt)) if (!refcount_dec_and_test(&ent->refcnt))
return; goto out;
if (ent->idx >= 0) { if (ent->idx >= 0) {
struct mlx5_cmd *cmd = ent->cmd;
cmd_free_index(cmd, ent->idx); cmd_free_index(cmd, ent->idx);
up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); up(ent->page_queue ? &cmd->pages_sem : &cmd->sem);
} }
cmd_free_ent(ent); cmd_free_ent(ent);
out:
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
} }
static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment