Commit 8718d60e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'add-devlink-and-devlink-health-reporters-to'

George Cherian says:

====================
Add devlink and devlink health reporters to octeontx2

Add basic devlink and devlink health reporters.
Devlink health reporters are added for NPA block.

Address Jakub's comment to add devlink support for error reporting.
https://www.spinics.net/lists/netdev/msg670712.html

For now, I have dropped the NIX block health reporters.
This series attempts to add health reporters only for the NPA block.
As per Jakub's suggestion separate reporters per event is used and also
got rid of the counters.
====================

Link: https://lore.kernel.org/r/20201211062526.2302643-1-george.cherian@marvell.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 0e12c027 80b94148
...@@ -12,6 +12,7 @@ Contents ...@@ -12,6 +12,7 @@ Contents
- `Overview`_ - `Overview`_
- `Drivers`_ - `Drivers`_
- `Basic packet flow`_ - `Basic packet flow`_
- `Devlink health reporters`_
Overview Overview
======== ========
...@@ -157,3 +158,52 @@ Egress ...@@ -157,3 +158,52 @@ Egress
3. The SQ descriptor ring is maintained in buffers allocated from SQ mapped pool of NPA block LF. 3. The SQ descriptor ring is maintained in buffers allocated from SQ mapped pool of NPA block LF.
4. NIX block transmits the pkt on the designated channel. 4. NIX block transmits the pkt on the designated channel.
5. NPC MCAM entries can be installed to divert pkt onto a different channel. 5. NPC MCAM entries can be installed to divert pkt onto a different channel.
Devlink health reporters
========================
NPA Reporters
-------------
The NPA reporters are responsible for reporting and recovering the following group of errors
1. GENERAL events
- Error due to operation of unmapped PF.
- Error due to disabled alloc/free for other HW blocks (NIX, SSO, TIM, DPI and AURA).
2. ERROR events
- Fault due to NPA_AQ_INST_S read or NPA_AQ_RES_S write.
- AQ Doorbell Error.
3. RAS events
- RAS Error Reporting for NPA_AQ_INST_S/NPA_AQ_RES_S.
4. RVU events
- Error due to unmapped slot.
Sample Output
-------------
~# devlink health
pci/0002:01:00.0:
reporter hw_npa_intr
state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true
reporter hw_npa_gen
state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true
reporter hw_npa_err
state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true
reporter hw_npa_ras
state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true
Each reporter dumps the
- Error Type
- Error Register value
- Reason in words
For eg:
~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen
NPA_AF_GENERAL:
NPA General Interrupt Reg : 1
NIX0: free disabled RX
~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr
NPA_AF_RVU:
NPA RVU Interrupt Reg : 1
Unmap Slot Error
~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err
NPA_AF_ERR:
NPA Error Interrupt Reg : 4096
AQ Doorbell Error
...@@ -9,6 +9,7 @@ config OCTEONTX2_MBOX ...@@ -9,6 +9,7 @@ config OCTEONTX2_MBOX
config OCTEONTX2_AF config OCTEONTX2_AF
tristate "Marvell OcteonTX2 RVU Admin Function driver" tristate "Marvell OcteonTX2 RVU Admin Function driver"
select OCTEONTX2_MBOX select OCTEONTX2_MBOX
select NET_DEVLINK
depends on (64BIT && COMPILE_TEST) || ARM64 depends on (64BIT && COMPILE_TEST) || ARM64
depends on PCI depends on PCI
help help
......
...@@ -10,4 +10,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o ...@@ -10,4 +10,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
octeontx2_mbox-y := mbox.o rvu_trace.o octeontx2_mbox-y := mbox.o rvu_trace.o
octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \
rvu_cpt.o rvu_cpt.o rvu_devlink.o
...@@ -2826,17 +2826,23 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2826,17 +2826,23 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err) if (err)
goto err_flr; goto err_flr;
err = rvu_register_dl(rvu);
if (err)
goto err_irq;
rvu_setup_rvum_blk_revid(rvu); rvu_setup_rvum_blk_revid(rvu);
/* Enable AF's VFs (if any) */ /* Enable AF's VFs (if any) */
err = rvu_enable_sriov(rvu); err = rvu_enable_sriov(rvu);
if (err) if (err)
goto err_irq; goto err_dl;
/* Initialize debugfs */ /* Initialize debugfs */
rvu_dbg_init(rvu); rvu_dbg_init(rvu);
return 0; return 0;
err_dl:
rvu_unregister_dl(rvu);
err_irq: err_irq:
rvu_unregister_interrupts(rvu); rvu_unregister_interrupts(rvu);
err_flr: err_flr:
...@@ -2868,6 +2874,7 @@ static void rvu_remove(struct pci_dev *pdev) ...@@ -2868,6 +2874,7 @@ static void rvu_remove(struct pci_dev *pdev)
rvu_dbg_exit(rvu); rvu_dbg_exit(rvu);
rvu_unregister_interrupts(rvu); rvu_unregister_interrupts(rvu);
rvu_unregister_dl(rvu);
rvu_flr_wq_destroy(rvu); rvu_flr_wq_destroy(rvu);
rvu_cgx_exit(rvu); rvu_cgx_exit(rvu);
rvu_fwdata_exit(rvu); rvu_fwdata_exit(rvu);
......
...@@ -12,7 +12,10 @@ ...@@ -12,7 +12,10 @@
#define RVU_H #define RVU_H
#include <linux/pci.h> #include <linux/pci.h>
#include <net/devlink.h>
#include "rvu_struct.h" #include "rvu_struct.h"
#include "rvu_devlink.h"
#include "common.h" #include "common.h"
#include "mbox.h" #include "mbox.h"
#include "npc.h" #include "npc.h"
...@@ -422,6 +425,7 @@ struct rvu { ...@@ -422,6 +425,7 @@ struct rvu {
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
struct rvu_debugfs rvu_dbg; struct rvu_debugfs rvu_dbg;
#endif #endif
struct rvu_devlink *rvu_dl;
}; };
static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val)
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/* Marvell OcteonTx2 RVU Devlink
*
* Copyright (C) 2020 Marvell.
*
*/
#ifndef RVU_DEVLINK_H
#define RVU_DEVLINK_H
#define RVU_REPORTERS(_name) \
static const struct devlink_health_reporter_ops rvu_ ## _name ## _reporter_ops = { \
.name = #_name, \
.recover = rvu_ ## _name ## _recover, \
.dump = rvu_ ## _name ## _dump, \
}
enum npa_af_rvu_health {
NPA_AF_RVU_INTR,
NPA_AF_RVU_GEN,
NPA_AF_RVU_ERR,
NPA_AF_RVU_RAS,
};
struct rvu_npa_event_ctx {
u64 npa_af_rvu_int;
u64 npa_af_rvu_gen;
u64 npa_af_rvu_err;
u64 npa_af_rvu_ras;
};
struct rvu_npa_health_reporters {
struct rvu_npa_event_ctx *npa_event_ctx;
struct devlink_health_reporter *rvu_hw_npa_intr_reporter;
struct work_struct intr_work;
struct devlink_health_reporter *rvu_hw_npa_gen_reporter;
struct work_struct gen_work;
struct devlink_health_reporter *rvu_hw_npa_err_reporter;
struct work_struct err_work;
struct devlink_health_reporter *rvu_hw_npa_ras_reporter;
struct work_struct ras_work;
};
struct rvu_devlink {
struct devlink *dl;
struct rvu *rvu;
struct workqueue_struct *devlink_wq;
struct rvu_npa_health_reporters *rvu_npa_health_reporter;
};
/* Devlink APIs */
int rvu_register_dl(struct rvu *rvu);
void rvu_unregister_dl(struct rvu *rvu);
#endif /* RVU_DEVLINK_H */
...@@ -64,6 +64,16 @@ enum rvu_af_int_vec_e { ...@@ -64,6 +64,16 @@ enum rvu_af_int_vec_e {
RVU_AF_INT_VEC_CNT = 0x5, RVU_AF_INT_VEC_CNT = 0x5,
}; };
/* NPA Admin function Interrupt Vector Enumeration */
enum npa_af_int_vec_e {
NPA_AF_INT_VEC_RVU = 0x0,
NPA_AF_INT_VEC_GEN = 0x1,
NPA_AF_INT_VEC_AQ_DONE = 0x2,
NPA_AF_INT_VEC_AF_ERR = 0x3,
NPA_AF_INT_VEC_POISON = 0x4,
NPA_AF_INT_VEC_CNT = 0x5,
};
/** /**
* RVU PF Interrupt Vector Enumeration * RVU PF Interrupt Vector Enumeration
*/ */
...@@ -104,6 +114,19 @@ enum npa_aq_instop { ...@@ -104,6 +114,19 @@ enum npa_aq_instop {
NPA_AQ_INSTOP_UNLOCK = 0x5, NPA_AQ_INSTOP_UNLOCK = 0x5,
}; };
/* ALLOC/FREE input queues Enumeration from coprocessors */
enum npa_inpq {
NPA_INPQ_NIX0_RX = 0x0,
NPA_INPQ_NIX0_TX = 0x1,
NPA_INPQ_NIX1_RX = 0x2,
NPA_INPQ_NIX1_TX = 0x3,
NPA_INPQ_SSO = 0x4,
NPA_INPQ_TIM = 0x5,
NPA_INPQ_DPI = 0x6,
NPA_INPQ_AURA_OP = 0xe,
NPA_INPQ_INTERNAL_RSV = 0xf,
};
/* NPA admin queue instruction structure */ /* NPA admin queue instruction structure */
struct npa_aq_inst_s { struct npa_aq_inst_s {
#if defined(__BIG_ENDIAN_BITFIELD) #if defined(__BIG_ENDIAN_BITFIELD)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment