Commit eabe8e5e authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Jakub Kicinski

net/mlx5: Handle sync reset now event

On sync_reset_now event the driver does reload and PCI link toggle to
activate firmware upgrade reset. When the firmware sends this event it
syncs the event on all PFs, so all PFs will do PCI link toggle at once.
To do PCI link toggle, the driver ensures that no other device ID under
the same bridge by checking that all the PF functions under the same PCI
bridge have same device ID. If no other device it uses PCI bridge link
control to turn link down and up.
Signed-off-by: default avatarMoshe Shemesh <moshe@mellanox.com>
Reviewed-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 38b9f903
...@@ -13,6 +13,7 @@ struct mlx5_fw_reset { ...@@ -13,6 +13,7 @@ struct mlx5_fw_reset {
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct work_struct reset_request_work; struct work_struct reset_request_work;
struct work_struct reset_reload_work; struct work_struct reset_reload_work;
struct work_struct reset_now_work;
unsigned long reset_flags; unsigned long reset_flags;
struct timer_list timer; struct timer_list timer;
}; };
...@@ -156,6 +157,120 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) ...@@ -156,6 +157,120 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
} }
#define MLX5_PCI_LINK_UP_TIMEOUT 2000
static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
{
struct pci_bus *bridge_bus = dev->pdev->bus;
struct pci_dev *bridge = bridge_bus->self;
u16 reg16, dev_id, sdev_id;
unsigned long timeout;
struct pci_dev *sdev;
int cap, err;
u32 reg32;
/* Check that all functions under the pci bridge are PFs of
* this device otherwise fail this function.
*/
err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
if (err)
return err;
list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
if (err)
return err;
if (sdev_id != dev_id)
return -EPERM;
}
cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
if (!cap)
return -EOPNOTSUPP;
list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
pci_save_state(sdev);
pci_cfg_access_lock(sdev);
}
/* PCI link toggle */
err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, &reg16);
if (err)
return err;
reg16 |= PCI_EXP_LNKCTL_LD;
err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
if (err)
return err;
msleep(500);
reg16 &= ~PCI_EXP_LNKCTL_LD;
err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
if (err)
return err;
/* Check link */
err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
if (err)
return err;
if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
msleep(1000);
goto restore;
}
timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
do {
err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
if (err)
return err;
if (reg16 & PCI_EXP_LNKSTA_DLLLA)
break;
msleep(20);
} while (!time_after(jiffies, timeout));
if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
mlx5_core_info(dev, "PCI Link up\n");
} else {
mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
reg16, MLX5_PCI_LINK_UP_TIMEOUT);
err = -ETIMEDOUT;
}
restore:
list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
pci_cfg_access_unlock(sdev);
pci_restore_state(sdev);
}
return err;
}
static void mlx5_sync_reset_now_event(struct work_struct *work)
{
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
reset_now_work);
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
mlx5_sync_reset_clear_reset_requested(dev, false);
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
err = mlx5_cmd_fast_teardown_hca(dev);
if (err) {
mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
goto done;
}
err = mlx5_pci_link_toggle(dev);
if (err) {
mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
goto done;
}
mlx5_enter_error_state(dev, true);
mlx5_unload_one(dev, false);
done:
mlx5_load_one(dev, false);
}
static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe) static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe)
{ {
struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe; struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe;
...@@ -167,6 +282,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct ...@@ -167,6 +282,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
case MLX5_SYNC_RST_STATE_RESET_REQUEST: case MLX5_SYNC_RST_STATE_RESET_REQUEST:
queue_work(fw_reset->wq, &fw_reset->reset_request_work); queue_work(fw_reset->wq, &fw_reset->reset_request_work);
break; break;
case MLX5_SYNC_RST_STATE_RESET_NOW:
queue_work(fw_reset->wq, &fw_reset->reset_now_work);
break;
} }
} }
...@@ -216,6 +334,7 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev) ...@@ -216,6 +334,7 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment