Commit 6d70cd2a authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'bnxt_en-error-recovery-improvements'

Michael Chan says:

====================
bnxt_en: Error recovery improvements.

This series contains a number of improvements in the area of error
recovery.  Most error recovery scenarios are tightly coordinated with
the firmware.  A number of patches add retry logic to establish
connection with the firmware if there are indications that the
firmware is still alive and will likely transition back to the
normal state.  Some patches speed up the recovery process and make
it more reliable.  There are some cleanup patches as well.
====================

Link: https://lore.kernel.org/r/1611558501-11022-1-git-send-email-michael.chan@broadcom.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents ae189ccb 0da65f49
This diff is collapsed.
...@@ -656,6 +656,7 @@ struct nqe_cn { ...@@ -656,6 +656,7 @@ struct nqe_cn {
#define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len)
#define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input)
#define DFLT_HWRM_CMD_TIMEOUT 500 #define DFLT_HWRM_CMD_TIMEOUT 500
#define HWRM_CMD_MAX_TIMEOUT 40000
#define SHORT_HWRM_CMD_TIMEOUT 20 #define SHORT_HWRM_CMD_TIMEOUT 20
#define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout)
#define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4)
...@@ -1345,9 +1346,14 @@ struct bnxt_test_info { ...@@ -1345,9 +1346,14 @@ struct bnxt_test_info {
#define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 #define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014
#define BNXT_CAG_REG_BASE 0x300000 #define BNXT_CAG_REG_BASE 0x300000
#define BNXT_GRC_REG_STATUS_P5 0x520
#define BNXT_GRCPF_REG_KONG_COMM 0xA00 #define BNXT_GRCPF_REG_KONG_COMM 0xA00
#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER 0xB00 #define BNXT_GRCPF_REG_KONG_COMM_TRIGGER 0xB00
#define BNXT_GRC_REG_CHIP_NUM 0x48
#define BNXT_GRC_REG_BASE 0x260000
#define BNXT_GRC_BASE_MASK 0xfffff000 #define BNXT_GRC_BASE_MASK 0xfffff000
#define BNXT_GRC_OFFSET_MASK 0x00000ffc #define BNXT_GRC_OFFSET_MASK 0x00000ffc
...@@ -1441,6 +1447,8 @@ struct bnxt_ctx_pg_info { ...@@ -1441,6 +1447,8 @@ struct bnxt_ctx_pg_info {
#define BNXT_MAX_TQM_RINGS \ #define BNXT_MAX_TQM_RINGS \
(BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS) (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS)
#define BNXT_BACKING_STORE_CFG_LEGACY_LEN 256
struct bnxt_ctx_mem_info { struct bnxt_ctx_mem_info {
u32 qp_max_entries; u32 qp_max_entries;
u16 qp_min_qp1_entries; u16 qp_min_qp1_entries;
...@@ -1532,9 +1540,22 @@ struct bnxt_fw_reporter_ctx { ...@@ -1532,9 +1540,22 @@ struct bnxt_fw_reporter_ctx {
#define BNXT_FW_HEALTH_WIN_OFF(reg) (BNXT_FW_HEALTH_WIN_BASE + \ #define BNXT_FW_HEALTH_WIN_OFF(reg) (BNXT_FW_HEALTH_WIN_BASE + \
((reg) & BNXT_GRC_OFFSET_MASK)) ((reg) & BNXT_GRC_OFFSET_MASK))
#define BNXT_FW_STATUS_HEALTH_MSK 0xffff
#define BNXT_FW_STATUS_HEALTHY 0x8000 #define BNXT_FW_STATUS_HEALTHY 0x8000
#define BNXT_FW_STATUS_SHUTDOWN 0x100000 #define BNXT_FW_STATUS_SHUTDOWN 0x100000
#define BNXT_FW_IS_HEALTHY(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\
BNXT_FW_STATUS_HEALTHY)
#define BNXT_FW_IS_BOOTING(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) < \
BNXT_FW_STATUS_HEALTHY)
#define BNXT_FW_IS_ERR(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \
BNXT_FW_STATUS_HEALTHY)
#define BNXT_FW_RETRY 5
#define BNXT_FW_IF_RETRY 10
struct bnxt { struct bnxt {
void __iomem *bar0; void __iomem *bar0;
void __iomem *bar1; void __iomem *bar1;
...@@ -1788,6 +1809,7 @@ struct bnxt { ...@@ -1788,6 +1809,7 @@ struct bnxt {
#define BNXT_STATE_FW_FATAL_COND 6 #define BNXT_STATE_FW_FATAL_COND 6
#define BNXT_STATE_DRV_REGISTERED 7 #define BNXT_STATE_DRV_REGISTERED 7
#define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8
#define BNXT_STATE_NAPI_DISABLED 9
#define BNXT_NO_FW_ACCESS(bp) \ #define BNXT_NO_FW_ACCESS(bp) \
(test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \
......
...@@ -44,21 +44,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, ...@@ -44,21 +44,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct bnxt *bp = devlink_health_reporter_priv(reporter); struct bnxt *bp = devlink_health_reporter_priv(reporter);
u32 val, health_status; u32 val;
int rc; int rc;
if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return 0; return 0;
val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
health_status = val & 0xffff;
if (health_status < BNXT_FW_STATUS_HEALTHY) { if (BNXT_FW_IS_BOOTING(val)) {
rc = devlink_fmsg_string_pair_put(fmsg, "Description", rc = devlink_fmsg_string_pair_put(fmsg, "Description",
"Not yet completed initialization"); "Not yet completed initialization");
if (rc) if (rc)
return rc; return rc;
} else if (health_status > BNXT_FW_STATUS_HEALTHY) { } else if (BNXT_FW_IS_ERR(val)) {
rc = devlink_fmsg_string_pair_put(fmsg, "Description", rc = devlink_fmsg_string_pair_put(fmsg, "Description",
"Encountered fatal error and cannot recover"); "Encountered fatal error and cannot recover");
if (rc) if (rc)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment