Commit e1ad07b9 authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Saeed Mahameed

net/mlx5: Fix sync reset event handler error flow

When sync reset now event handling fails on mlx5_pci_link_toggle() then
no reset was done. However, since mlx5_cmd_fast_teardown_hca() was
already done, the firmware function is closed and the driver is left
without firmware functionality.

Fix it by setting device error state and reopen the firmware resources.
Reopening is done by the thread that was called for devlink reload
fw_activate as it already holds the devlink lock.

Fixes: 5ec69744 ("net/mlx5: Add support for devlink reload action fw activate")
Signed-off-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Reviewed-by: default avatarAya Levin <ayal@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 6d942e40
......@@ -9,7 +9,8 @@ enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
MLX5_FW_RESET_FLAGS_PENDING_COMP,
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
};
struct mlx5_fw_reset {
......@@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
err = mlx5_pci_link_toggle(dev);
if (err) {
mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
goto done;
set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
}
mlx5_enter_error_state(dev, true);
......@@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
goto out;
}
err = fw_reset->ret;
if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) {
mlx5_unload_one_devl_locked(dev);
mlx5_load_one_devl_locked(dev, false);
}
out:
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment