Commit c1d4d2e9 authored by Mohamad Haj Yahia's avatar Mohamad Haj Yahia Committed by David S. Miller

net/mlx5: Avoid calling sleeping function by the health poll thread

In internal error state the health poll thread will eventually call
synchronize_irq() (to safely trigger command completions) which might
sleep, so we are calling sleeping function from atomic context which is
invalid.
Here we move trigger_cmd_completions(dev) to enter error state which is
the earliest stage in error state handling.
This way we won't need to wait for next health poll to trigger command
completions and will solve the scheduling while atomic issue.
mlx5_enter_error_state can be called from two contexts, protect it with
dev->intf_state_lock

Fixes: 89d44f0a ('net/mlx5_core: Add pci error handlers to mlx5_core driver')
Signed-off-by: default avatarMohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0d834442
...@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev) ...@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
void mlx5_enter_error_state(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev)
{ {
mutex_lock(&dev->intf_state_mutex);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return; goto unlock;
mlx5_core_err(dev, "start\n"); mlx5_core_err(dev, "start\n");
if (pci_channel_offline(dev->pdev) || in_fatal(dev)) if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
trigger_cmd_completions(dev);
}
mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
mlx5_core_err(dev, "end\n"); mlx5_core_err(dev, "end\n");
unlock:
mutex_unlock(&dev->intf_state_mutex);
} }
static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
...@@ -245,7 +251,6 @@ static void poll_health(unsigned long data) ...@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
u32 count; u32 count;
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
trigger_cmd_completions(dev);
mod_timer(&health->timer, get_next_poll_jiffies()); mod_timer(&health->timer, get_next_poll_jiffies());
return; return;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment