Commit 32def412 authored by Amir Tzin's avatar Amir Tzin Committed by Saeed Mahameed

net/mlx5: Read timeout values from DTOR

Replace hard coded timeouts with values stored by firmware in default
timeouts register (DTOR). Timeouts are read during driver load. If DTOR
is not supported by firmware then fallback to hard coded defaults
instead.
Signed-off-by: default avatarAmir Tzin <amirtz@nvidia.com>
Reviewed-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 5945e1ad
......@@ -32,7 +32,6 @@ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000
struct mlx5e_err_ctx {
int (*recover)(void *ctx);
......
......@@ -6,6 +6,7 @@
#include "txrx.h"
#include "devlink.h"
#include "ptp.h"
#include "lib/tout.h"
static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
{
......@@ -32,8 +33,10 @@ static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
{
unsigned long exp_time = jiffies +
msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
struct mlx5_core_dev *dev = icosq->channel->mdev;
unsigned long exp_time;
exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
while (time_before(jiffies, exp_time)) {
if (icosq->cc == icosq->pc)
......
......@@ -4,11 +4,14 @@
#include "health.h"
#include "en/ptp.h"
#include "en/devlink.h"
#include "lib/tout.h"
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
{
unsigned long exp_time = jiffies +
msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
struct mlx5_core_dev *dev = sq->mdev;
unsigned long exp_time;
exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
while (time_before(jiffies, exp_time)) {
if (sq->cc == sq->pc)
......
......@@ -35,6 +35,7 @@
#include <linux/module.h>
#include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h"
#include "lib/tout.h"
#include "accel/tls.h"
enum {
......@@ -317,10 +318,9 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
return 0;
}
#define MLX5_FAST_TEARDOWN_WAIT_MS 3000
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
{
unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN);
u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {};
u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
int state;
......@@ -618,17 +618,18 @@ static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
fwhandle, 0);
}
#define MLX5_FSM_REACTIVATE_TOUT 5000 /* msecs */
static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status)
{
unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5_FSM_REACTIVATE_TOUT);
struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
u32 out[MLX5_ST_SZ_DW(mirc_reg)];
u32 in[MLX5_ST_SZ_DW(mirc_reg)];
unsigned long exp_time;
int err;
exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE));
if (!MLX5_CAP_MCAM_REG2(dev, mirc))
return -EOPNOTSUPP;
......
......@@ -3,6 +3,7 @@
#include "fw_reset.h"
#include "diag/fw_tracer.h"
#include "lib/tout.h"
enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
......@@ -228,8 +229,6 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
}
#define MLX5_PCI_LINK_UP_TIMEOUT 2000
static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
{
struct pci_bus *bridge_bus = dev->pdev->bus;
......@@ -286,7 +285,7 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
goto restore;
}
timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE));
do {
err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
if (err)
......@@ -299,8 +298,8 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
mlx5_core_info(dev, "PCI Link up\n");
} else {
mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
reg16, MLX5_PCI_LINK_UP_TIMEOUT);
mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n",
reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
err = -ETIMEDOUT;
}
......@@ -395,16 +394,15 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
return NOTIFY_OK;
}
#define MLX5_FW_RESET_TIMEOUT_MSEC 5000
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
{
unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC);
unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE));
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
int err;
if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n",
MLX5_FW_RESET_TIMEOUT_MSEC / 1000);
mlx5_core_warn(dev, "FW sync reset timeout after %llu seconds\n",
mlx5_tout_ms(dev, PCI_SYNC_UPDATE) / 1000);
err = -ETIMEDOUT;
goto out;
}
......
......@@ -40,10 +40,10 @@
#include "lib/eq.h"
#include "lib/mlx5.h"
#include "lib/pci_vsc.h"
#include "lib/tout.h"
#include "diag/fw_tracer.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
MAX_MISSES = 3,
};
......@@ -219,11 +219,9 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mutex_unlock(&dev->intf_state_mutex);
}
#define MLX5_CRDUMP_WAIT_MS 60000
#define MLX5_FW_RESET_WAIT_MS 1000
void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
{
unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS;
unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
int lock = -EBUSY;
mutex_lock(&dev->intf_state_mutex);
......@@ -237,7 +235,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
lock = lock_sem_sw_reset(dev, true);
if (lock == -EBUSY) {
delay_ms = MLX5_CRDUMP_WAIT_MS;
delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
goto recover_from_sw_reset;
}
/* Execute SW reset */
......@@ -307,13 +305,11 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
mlx5_disable_device(dev);
}
/* How much time to wait until health resetting the driver (in msecs) */
#define MLX5_RECOVERY_WAIT_MSECS 60000
int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
{
unsigned long end;
end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS);
end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
while (sensor_pci_not_working(dev)) {
if (time_after(jiffies, end))
return -ETIMEDOUT;
......@@ -674,13 +670,13 @@ static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
devlink_health_reporter_destroy(health->fw_fatal_reporter);
}
static unsigned long get_next_poll_jiffies(void)
static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
{
unsigned long next;
get_random_bytes(&next, sizeof(next));
next %= HZ;
next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
return next;
}
......@@ -740,11 +736,12 @@ static void poll_health(struct timer_list *t)
queue_work(health->wq, &health->report_work);
out:
mod_timer(&health->timer, get_next_poll_jiffies());
mod_timer(&health->timer, get_next_poll_jiffies(dev));
}
void mlx5_start_health_poll(struct mlx5_core_dev *dev)
{
u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
struct mlx5_core_health *health = &dev->priv.health;
timer_setup(&health->timer, poll_health, 0);
......@@ -753,7 +750,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;
health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
add_timer(&health->timer);
}
......
......@@ -13,7 +13,17 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
[MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
[MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
[MLX5_TO_FW_INIT_MS] = 2000,
[MLX5_TO_CMD_MS] = 60000
[MLX5_TO_CMD_MS] = 60000,
[MLX5_TO_PCI_TOGGLE_MS] = 2000,
[MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000,
[MLX5_TO_FULL_CRDUMP_MS] = 60000,
[MLX5_TO_FW_RESET_MS] = 60000,
[MLX5_TO_FLUSH_ON_ERROR_MS] = 2000,
[MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000,
[MLX5_TO_TEARDOWN_MS] = 3000,
[MLX5_TO_FSM_REACTIVATE_MS] = 5000,
[MLX5_TO_RECLAIM_PAGES_MS] = 5000,
[MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000
};
static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
......@@ -94,3 +104,59 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type)
{
return dev->timeouts->to[type];
}
#define MLX5_TIMEOUT_QUERY(fld, reg_out) \
({ \
struct mlx5_ifc_default_timeout_bits *time_field; \
u32 to_multi, to_value; \
u64 to_val_ms; \
\
time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \
to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \
to_value = MLX5_GET(default_timeout, time_field, to_value); \
to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \
to_val_ms; \
})
#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \
({ \
u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \
tout_set(dev, fw_to + (to_extra), to_type); \
fw_to; \
})
static int tout_query_dtor(struct mlx5_core_dev *dev)
{
u64 pcie_toggle_to_val, tear_down_to_val;
u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {};
u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {};
int err;
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0);
if (err)
return err;
pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0);
MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val);
tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0);
MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS,
tear_down_to_val);
MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0);
MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0);
MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0);
MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
return 0;
}
int mlx5_tout_query_dtor(struct mlx5_core_dev *dev)
{
if (tout_is_supported(dev))
return tout_query_dtor(dev);
return 0;
}
......@@ -14,6 +14,18 @@ enum mlx5_timeouts_types {
MLX5_TO_FW_INIT_MS,
MLX5_TO_CMD_MS,
/* DTOR timeouts */
MLX5_TO_PCI_TOGGLE_MS,
MLX5_TO_HEALTH_POLL_INTERVAL_MS,
MLX5_TO_FULL_CRDUMP_MS,
MLX5_TO_FW_RESET_MS,
MLX5_TO_FLUSH_ON_ERROR_MS,
MLX5_TO_PCI_SYNC_UPDATE_MS,
MLX5_TO_TEARDOWN_MS,
MLX5_TO_FSM_REACTIVATE_MS,
MLX5_TO_RECLAIM_PAGES_MS,
MLX5_TO_RECLAIM_VFS_PAGES_MS,
MAX_TIMEOUT_TYPES
};
......@@ -21,6 +33,7 @@ struct mlx5_core_dev;
int mlx5_tout_init(struct mlx5_core_dev *dev);
void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
......
......@@ -1020,6 +1020,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
goto err_disable_hca;
}
err = mlx5_tout_query_dtor(dev);
if (err) {
mlx5_core_err(dev, "failed to read dtor\n");
goto reclaim_boot_pages;
}
err = set_hca_ctrl(dev);
if (err) {
mlx5_core_err(dev, "set_hca_ctrl failed\n");
......
......@@ -38,6 +38,7 @@
#include <linux/xarray.h>
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/tout.h"
enum {
MLX5_PAGES_CANT_GIVE = 0,
......@@ -64,11 +65,6 @@ struct fw_page {
unsigned int free_count;
};
enum {
MAX_RECLAIM_TIME_MSECS = 5000,
MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
};
enum {
MLX5_MAX_RECLAIM_TIME_MILI = 5000,
MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
......@@ -641,7 +637,8 @@ static int optimal_reclaimed_pages(void)
static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
struct rb_root *root, u16 func_id)
{
unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
unsigned long end = jiffies + recl_pages_to_jiffies;
while (!RB_EMPTY_ROOT(root)) {
int nclaimed;
......@@ -656,7 +653,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
}
if (nclaimed)
end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
end = jiffies + recl_pages_to_jiffies;
if (time_after(jiffies, end)) {
mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
......@@ -727,7 +724,8 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
{
unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES));
unsigned long end = jiffies + recl_vf_pages_to_jiffies;
int prev_pages = *pages;
/* In case of internal error we will free the pages manually later */
......@@ -743,7 +741,7 @@ int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
return -ETIMEDOUT;
}
if (*pages < prev_pages) {
end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
end = jiffies + recl_vf_pages_to_jiffies;
prev_pages = *pages;
}
msleep(50);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment