Commit a9f168e4 authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Saeed Mahameed

net/mlx5: Check with FW that sync reset completed successfully

Even if the PF driver had no error on his part of the sync reset flow,
the firmware can see wider picture as it syncs all the PFs in the flow.
So add at end of sync reset flow check with firmware by reading MFRL
register and initialization segment that the flow had no issue from
firmware point of view too.
Signed-off-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Reviewed-by: default avatarShay Drory <shayd@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 6486c0f4
......@@ -212,6 +212,9 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
/* On fw_activate action, also driver is reloaded and reinit performed */
*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
ret = mlx5_load_one_devl_locked(dev, true);
if (ret)
return ret;
ret = mlx5_fw_reset_verify_fw_complete(dev, extack);
break;
default:
/* Unsupported action should not get to this function */
......
......@@ -127,17 +127,23 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
goto out;
if (!reset_state)
return 0;
switch (reset_state) {
case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
NL_SET_ERR_MSG_MOD(extack, "Sync reset still in progress");
return -EBUSY;
case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
case MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT:
NL_SET_ERR_MSG_MOD(extack, "Sync reset negotiation timeout");
return -ETIMEDOUT;
case MLX5_MFRL_REG_RESET_STATE_NACK:
NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
return -EPERM;
case MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT:
NL_SET_ERR_MSG_MOD(extack, "Sync reset unload timeout");
return -ETIMEDOUT;
}
out:
......@@ -151,7 +157,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
int err;
int err, rst_res;
set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
......@@ -164,13 +170,34 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
return 0;
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
return mlx5_fw_reset_get_reset_state_err(dev, extack);
if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) {
rst_res = mlx5_fw_reset_get_reset_state_err(dev, extack);
return rst_res ? rst_res : err;
}
NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
return mlx5_cmd_check(dev, err, in, out);
}
int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev,
struct netlink_ext_ack *extack)
{
u8 rst_state;
int err;
err = mlx5_fw_reset_get_reset_state_err(dev, extack);
if (err)
return err;
rst_state = mlx5_get_fw_rst_state(dev);
if (!rst_state)
return 0;
mlx5_core_err(dev, "Sync reset did not complete, state=%d\n", rst_state);
NL_SET_ERR_MSG_MOD(extack, "Sync reset did not complete successfully");
return rst_state;
}
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
{
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
......
......@@ -12,6 +12,8 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev,
struct netlink_ext_ack *extack);
void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
......
......@@ -10858,8 +10858,9 @@ enum {
MLX5_MFRL_REG_RESET_STATE_IDLE = 0,
MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1,
MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2,
MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3,
MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT = 3,
MLX5_MFRL_REG_RESET_STATE_NACK = 4,
MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT = 5,
};
enum {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment