Commit d965465b authored by Ido Schimmel's avatar Ido Schimmel Committed by David S. Miller

mlxsw: core: Fix possible deadlock

When an EMAD is transmitted, a timeout work item is scheduled with a
delay of 200ms, so that another EMAD will be retried until a maximum of
five retries.

In certain situations, it's possible for the function waiting on the
EMAD to be associated with a work item that is queued on the same
workqueue (`mlxsw_core`) as the timeout work item. This results in
flushing a work item on the same workqueue.

According to commit e159489b ("workqueue: relax lockdep annotation
on flush_work()") the above may lead to a deadlock in case the workqueue
has only one worker active or if the system in under memory pressure and
the rescue worker is in use. The latter explains the very rare and
random nature of the lockdep splats we have been seeing:

[   52.730240] ============================================
[   52.736179] WARNING: possible recursive locking detected
[   52.742119] 4.14.0-rc3jiri+ #4 Not tainted
[   52.746697] --------------------------------------------
[   52.752635] kworker/1:3/599 is trying to acquire lock:
[   52.758378]  (mlxsw_core_driver_name){+.+.}, at: [<ffffffff811c4fa4>] flush_work+0x3a4/0x5e0
[   52.767837]
               but task is already holding lock:
[   52.774360]  (mlxsw_core_driver_name){+.+.}, at: [<ffffffff811c65c4>] process_one_work+0x7d4/0x12f0
[   52.784495]
               other info that might help us debug this:
[   52.791794]  Possible unsafe locking scenario:
[   52.798413]        CPU0
[   52.801144]        ----
[   52.803875]   lock(mlxsw_core_driver_name);
[   52.808556]   lock(mlxsw_core_driver_name);
[   52.813236]
                *** DEADLOCK ***
[   52.819857]  May be due to missing lock nesting notation
[   52.827450] 3 locks held by kworker/1:3/599:
[   52.832221]  #0:  (mlxsw_core_driver_name){+.+.}, at: [<ffffffff811c65c4>] process_one_work+0x7d4/0x12f0
[   52.842846]  #1:  ((&(&bridge->fdb_notify.dw)->work)){+.+.}, at: [<ffffffff811c65c4>] process_one_work+0x7d4/0x12f0
[   52.854537]  #2:  (rtnl_mutex){+.+.}, at: [<ffffffff822ad8e7>] rtnl_lock+0x17/0x20
[   52.863021]
               stack backtrace:
[   52.867890] CPU: 1 PID: 599 Comm: kworker/1:3 Not tainted 4.14.0-rc3jiri+ #4
[   52.875773] Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016
[   52.886267] Workqueue: mlxsw_core mlxsw_sp_fdb_notify_work [mlxsw_spectrum]
[   52.894060] Call Trace:
[   52.909122]  __lock_acquire+0xf6f/0x2a10
[   53.025412]  lock_acquire+0x158/0x440
[   53.047557]  flush_work+0x3c4/0x5e0
[   53.087571]  __cancel_work_timer+0x3ca/0x5e0
[   53.177051]  cancel_delayed_work_sync+0x13/0x20
[   53.182142]  mlxsw_reg_trans_bulk_wait+0x12d/0x7a0 [mlxsw_core]
[   53.194571]  mlxsw_core_reg_access+0x586/0x990 [mlxsw_core]
[   53.225365]  mlxsw_reg_query+0x10/0x20 [mlxsw_core]
[   53.230882]  mlxsw_sp_fdb_notify_work+0x2a3/0x9d0 [mlxsw_spectrum]
[   53.237801]  process_one_work+0x8f1/0x12f0
[   53.321804]  worker_thread+0x1fd/0x10c0
[   53.435158]  kthread+0x28e/0x370
[   53.448703]  ret_from_fork+0x2a/0x40
[   53.453017] mlxsw_spectrum 0000:01:00.0: EMAD retries (2/5) (tid=bf4549b100000774)
[   53.453119] mlxsw_spectrum 0000:01:00.0: EMAD retries (5/5) (tid=bf4549b100000770)
[   53.453132] mlxsw_spectrum 0000:01:00.0: EMAD reg access failed (tid=bf4549b100000770,reg_id=200b(sfn),type=query,status=0(operation performed))
[   53.453143] mlxsw_spectrum 0000:01:00.0: Failed to get FDB notifications

Fix this by creating another workqueue for EMAD timeouts, thereby
preventing the situation of a work item trying to flush a work item
queued on the same workqueue.

Fixes: caf7297e ("mlxsw: core: Introduce support for asynchronous EMAD register access")
Signed-off-by: default avatarIdo Schimmel <idosch@mellanox.com>
Reported-by: default avatarJiri Pirko <jiri@mellanox.com>
Signed-off-by: default avatarJiri Pirko <jiri@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1b72bf5a
...@@ -96,6 +96,7 @@ struct mlxsw_core { ...@@ -96,6 +96,7 @@ struct mlxsw_core {
const struct mlxsw_bus *bus; const struct mlxsw_bus *bus;
void *bus_priv; void *bus_priv;
const struct mlxsw_bus_info *bus_info; const struct mlxsw_bus_info *bus_info;
struct workqueue_struct *emad_wq;
struct list_head rx_listener_list; struct list_head rx_listener_list;
struct list_head event_listener_list; struct list_head event_listener_list;
struct { struct {
...@@ -465,7 +466,7 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) ...@@ -465,7 +466,7 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans)
{ {
unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS);
mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout);
} }
static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
...@@ -587,12 +588,18 @@ static const struct mlxsw_listener mlxsw_emad_rx_listener = ...@@ -587,12 +588,18 @@ static const struct mlxsw_listener mlxsw_emad_rx_listener =
static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
{ {
struct workqueue_struct *emad_wq;
u64 tid; u64 tid;
int err; int err;
if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
return 0; return 0;
emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0);
if (!emad_wq)
return -ENOMEM;
mlxsw_core->emad_wq = emad_wq;
/* Set the upper 32 bits of the transaction ID field to a random /* Set the upper 32 bits of the transaction ID field to a random
* number. This allows us to discard EMADs addressed to other * number. This allows us to discard EMADs addressed to other
* devices. * devices.
...@@ -619,6 +626,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) ...@@ -619,6 +626,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
err_emad_trap_set: err_emad_trap_set:
mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
mlxsw_core); mlxsw_core);
destroy_workqueue(mlxsw_core->emad_wq);
return err; return err;
} }
...@@ -631,6 +639,7 @@ static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) ...@@ -631,6 +639,7 @@ static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core)
mlxsw_core->emad.use_emad = false; mlxsw_core->emad.use_emad = false;
mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
mlxsw_core); mlxsw_core);
destroy_workqueue(mlxsw_core->emad_wq);
} }
static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment