Commit 89d1cf89 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_for_4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC updates from Borislav Petkov:

 - an EDAC driver for Cavium ThunderX RAS IP (Sergey Temerkhanov)

 - removal of DRAM error reporting through PCI SERR NMI (Borislav
   Petkov)

 - misc small fixes (Jan Glauber, Thor Thayer)

* tag 'edac_for_4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, ghes: Do not enable it by default
  EDAC: Rename report status accessors
  EDAC: Delete edac_stub.c
  EDAC: Update Kconfig help text
  EDAC: Remove EDAC_MM_EDAC
  EDAC: Issue tracepoint only when it is defined
  ACPI/extlog: Add EDAC dependency
  EDAC: Move edac_op_state to edac_mc.c
  EDAC: Remove edac_err_assert
  EDAC: Get rid of edac_handlers
  x86/nmi, EDAC: Get rid of DRAM error reporting thru PCI SERR NMI
  EDAC, highbank: Align Makefile directives
  EDAC, thunderx: Remove unused code
  EDAC, thunderx: Change LMC index calculation
  EDAC, altera: Fix peripheral warnings for Cyclone5
  EDAC, thunderx: Fix L2C MCI interrupt disable
  EDAC, thunderx: Add Cavium ThunderX EDAC driver
parents 08c521a2 f8d5549d
......@@ -4715,6 +4715,7 @@ L: linux-edac@vger.kernel.org
L: linux-mips@linux-mips.org
S: Supported
F: drivers/edac/octeon_edac*
F: drivers/edac/thunderx_edac*
EDAC-E752X
M: Mark Gross <mark.gross@intel.com>
......
......@@ -748,7 +748,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_LEDS_TRIGGER_TRANSIENT=y
CONFIG_LEDS_TRIGGER_CAMERA=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_HIGHBANK_MC=y
CONFIG_EDAC_HIGHBANK_L2=y
CONFIG_RTC_CLASS=y
......
......@@ -635,8 +635,7 @@ CONFIG_LEDS_TRIGGER_GPIO=m
CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
CONFIG_LEDS_TRIGGER_TRANSIENT=m
CONFIG_LEDS_TRIGGER_CAMERA=m
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=m
CONFIG_EDAC=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DEBUG=y
CONFIG_RTC_DRV_DS1307=m
......
......@@ -16,9 +16,8 @@ CONFIG_DAVICOM_PHY=y
CONFIG_DMADEVICES=y
CONFIG_E1000E=y
CONFIG_E1000=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_MPC85XX=y
CONFIG_EDAC=y
CONFIG_EDAC_MPC85XX=y
CONFIG_EEPROM_AT24=y
CONFIG_EEPROM_LEGACY=y
CONFIG_FB_FSL_DIU=y
......
......@@ -155,7 +155,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
CONFIG_USB_STORAGE=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_MPC85XX=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_INTF_PROC is not set
......
......@@ -116,7 +116,6 @@ CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_TIMER=y
CONFIG_LEDS_TRIGGER_HEARTBEAT=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_RTC_DRV_CMOS=y
......
......@@ -179,7 +179,6 @@ CONFIG_INFINIBAND_MTHCA=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_CELL=y
CONFIG_UIO=m
CONFIG_EXT2_FS=y
......
......@@ -142,7 +142,6 @@ CONFIG_USB_UHCI_HCD=y
CONFIG_USB_SL811_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_PASEMI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
......
......@@ -262,7 +262,6 @@ CONFIG_INFINIBAND_IPOIB_CM=y
CONFIG_INFINIBAND_SRP=m
CONFIG_INFINIBAND_ISER=m
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_PASEMI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
......
......@@ -173,7 +173,6 @@ CONFIG_INFINIBAND_MTHCA=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_ISER=m
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_FS_DAX=y
......
......@@ -988,8 +988,7 @@ CONFIG_LEDS_TRIGGER_BACKLIGHT=m
CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
CONFIG_ACCESSIBILITY=y
CONFIG_A11Y_BRAILLE_CONSOLE=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=m
CONFIG_EDAC=m
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_RTC_DRV_DS1307=m
......
......@@ -249,7 +249,6 @@ CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_TILE=y
CONFIG_EXT2_FS=y
......
......@@ -358,7 +358,6 @@ CONFIG_WATCHDOG_NOWAYOUT=y
# CONFIG_VGA_ARB is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_TILE=y
CONFIG_EXT2_FS=y
......
......@@ -222,17 +222,6 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
/*
* On some machines, PCI SERR line is used to report memory
* errors. EDAC makes use of it.
*/
#if defined(CONFIG_EDAC)
if (edac_handler_set()) {
edac_atomic_assert_error();
return;
}
#endif
if (panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
......
......@@ -469,9 +469,8 @@ config ACPI_WATCHDOG
config ACPI_EXTLOG
tristate "Extended Error Log support"
depends on X86_MCE && X86_LOCAL_APIC
depends on X86_MCE && X86_LOCAL_APIC && EDAC
select UEFI_CPER
select RAS
default n
help
Certain usages such as Predictive Failure Analysis (PFA) require
......
......@@ -229,7 +229,7 @@ static int __init extlog_init(void)
if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
return -ENODEV;
if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
if (edac_get_report_status() == EDAC_REPORTING_FORCE) {
pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
return -EPERM;
}
......@@ -285,8 +285,8 @@ static int __init extlog_init(void)
* eMCA event report method has higher priority than EDAC method,
* unless EDAC event report method is mandatory.
*/
old_edac_report_status = get_edac_report_status();
set_edac_report_status(EDAC_REPORTING_DISABLED);
old_edac_report_status = edac_get_report_status();
edac_set_report_status(EDAC_REPORTING_DISABLED);
mce_register_decode_chain(&extlog_mce_dec);
/* enable OS to be involved to take over management from BIOS */
((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
......@@ -308,7 +308,7 @@ static int __init extlog_init(void)
static void __exit extlog_exit(void)
{
set_edac_report_status(old_edac_report_status);
edac_set_report_status(old_edac_report_status);
mce_unregister_decode_chain(&extlog_mce_dec);
((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
if (extlog_l1_addr)
......
......@@ -10,26 +10,16 @@ config EDAC_SUPPORT
bool
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM && EDAC_SUPPORT
tristate "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM && EDAC_SUPPORT && RAS
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
supporting chipset or other subsystems:
EDAC is a subsystem along with hardware-specific drivers designed to
report hardware errors. These are low-level errors that are reported
in the CPU or supporting chipset or other subsystems:
memory errors, cache errors, PCI errors, thermal throttling, etc..
If unsure, select 'Y'.
If this code is reporting problems on your system, please
see the EDAC project web pages for more information at:
<http://bluesmoke.sourceforge.net/>
and:
<http://buttersideup.com/edacwiki>
There is also a mailing list for the EDAC project, which can
be found via the sourceforge page.
The mailing list for the EDAC project is linux-edac@vger.kernel.org.
if EDAC
......@@ -62,21 +52,9 @@ config EDAC_DECODE_MCE
which occur really early upon boot, before the module infrastructure
has been initialized.
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
select RAS
help
Some systems are able to detect and correct errors in main
memory. EDAC can report statistics on memory error
detection and correction (EDAC - or commonly referred to ECC
errors). EDAC will also try to decode where these errors
occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'.
config EDAC_GHES
bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
default y
depends on ACPI_APEI_GHES && (EDAC=y)
help
Not all machines support hardware-driven error report. Some of those
provide a BIOS-driven error report mechanism via ACPI, using the
......@@ -98,7 +76,7 @@ config EDAC_GHES
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64)"
depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE
depends on AMD_NB && EDAC_DECODE_MCE
help
Support for error detection and correction of DRAM ECC errors on
the AMD64 families (>= K8) of memory controllers.
......@@ -124,28 +102,28 @@ config EDAC_AMD64_ERROR_INJECTION
config EDAC_AMD76X
tristate "AMD 76x (760, 762, 768)"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on PCI && X86_32
help
Support for error detection and correction on the AMD 76x
series of chipsets used with the Athlon processor.
config EDAC_E7XXX
tristate "Intel e7xxx (e7205, e7500, e7501, e7505)"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on PCI && X86_32
help
Support for error detection and correction on the Intel
E7205, E7500, E7501 and E7505 server chipsets.
config EDAC_E752X
tristate "Intel e752x (e7520, e7525, e7320) and 3100"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction on the Intel
E7520, E7525, E7320 server chipsets.
config EDAC_I82443BXGX
tristate "Intel 82443BX/GX (440BX/GX)"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on PCI && X86_32
depends on BROKEN
help
Support for error detection and correction on the Intel
......@@ -153,56 +131,56 @@ config EDAC_I82443BXGX
config EDAC_I82875P
tristate "Intel 82875p (D82875P, E7210)"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on PCI && X86_32
help
Support for error detection and correction on the Intel
DP82785P and E7210 server chipsets.
config EDAC_I82975X
tristate "Intel 82975x (D82975x)"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction on the Intel
DP82975x server chipsets.
config EDAC_I3000
tristate "Intel 3000/3010"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction on the Intel
3000 and 3010 server chipsets.
config EDAC_I3200
tristate "Intel 3200"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction on the Intel
3200 and 3210 server chipsets.
config EDAC_IE31200
tristate "Intel e312xx"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction on the Intel
E3-1200 based DRAM controllers.
config EDAC_X38
tristate "Intel X38"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction on the Intel
X38 server chipsets.
config EDAC_I5400
tristate "Intel 5400 (Seaburg) chipsets"
depends on EDAC_MM_EDAC && PCI && X86
depends on PCI && X86
help
Support for error detection and correction the Intel
i5400 MCH chipset (Seaburg).
config EDAC_I7CORE
tristate "Intel i7 Core (Nehalem) processors"
depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
depends on PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction the Intel
i7 Core (Nehalem) Integrated Memory Controller that exists on
......@@ -211,58 +189,56 @@ config EDAC_I7CORE
config EDAC_I82860
tristate "Intel 82860"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on PCI && X86_32
help
Support for error detection and correction on the Intel
82860 chipset.
config EDAC_R82600
tristate "Radisys 82600 embedded chipset"
depends on EDAC_MM_EDAC && PCI && X86_32
depends on PCI && X86_32
help
Support for error detection and correction on the Radisys
82600 embedded chipset.
config EDAC_I5000
tristate "Intel Greencreek/Blackford chipset"
depends on EDAC_MM_EDAC && X86 && PCI
depends on X86 && PCI
help
Support for error detection and correction the Intel
Greekcreek/Blackford chipsets.
config EDAC_I5100
tristate "Intel San Clemente MCH"
depends on EDAC_MM_EDAC && X86 && PCI
depends on X86 && PCI
help
Support for error detection and correction the Intel
San Clemente MCH.
config EDAC_I7300
tristate "Intel Clarksboro MCH"
depends on EDAC_MM_EDAC && X86 && PCI
depends on X86 && PCI
help
Support for error detection and correction the Intel
Clarksboro MCH (Intel 7300 chipset).
config EDAC_SBRIDGE
tristate "Intel Sandy-Bridge/Ivy-Bridge/Haswell Integrated MC"
depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
depends on PCI_MMCONFIG
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
help
Support for error detection and correction the Intel
Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
config EDAC_SKX
tristate "Intel Skylake server Integrated MC"
depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
depends on PCI_MMCONFIG
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
help
Support for error detection and correction the Intel
Skylake server Integrated Memory Controllers.
config EDAC_PND2
tristate "Intel Pondicherry2"
depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
depends on PCI && X86_64 && X86_MCE_INTEL
help
Support for error detection and correction on the Intel
Pondicherry2 Integrated Memory Controller. This SoC IP is
......@@ -271,36 +247,35 @@ config EDAC_PND2
config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC
depends on FSL_SOC
help
Support for error detection and correction on the Freescale
MPC8349, MPC8560, MPC8540, MPC8548, T4240
config EDAC_LAYERSCAPE
tristate "Freescale Layerscape DDR"
depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE
depends on ARCH_LAYERSCAPE
help
Support for error detection and correction on Freescale memory
controllers on Layerscape SoCs.
config EDAC_MV64X60
tristate "Marvell MV64x60"
depends on EDAC_MM_EDAC && MV64X60
depends on MV64X60
help
Support for error detection and correction on the Marvell
MV64360 and MV64460 chipsets.
config EDAC_PASEMI
tristate "PA Semi PWRficient"
depends on EDAC_MM_EDAC && PCI
depends on PPC_PASEMI
depends on PPC_PASEMI && PCI
help
Support for error detection and correction on PA Semi
PWRficient.
config EDAC_CELL
tristate "Cell Broadband Engine memory controller"
depends on EDAC_MM_EDAC && PPC_CELL_COMMON
depends on PPC_CELL_COMMON
help
Support for error detection and correction on the
Cell Broadband Engine internal memory controller
......@@ -308,7 +283,7 @@ config EDAC_CELL
config EDAC_PPC4XX
tristate "PPC4xx IBM DDR2 Memory Controller"
depends on EDAC_MM_EDAC && 4xx
depends on 4xx
help
This enables support for EDAC on the ECC memory used
with the IBM DDR2 memory controller found in various
......@@ -317,7 +292,7 @@ config EDAC_PPC4XX
config EDAC_AMD8131
tristate "AMD8131 HyperTransport PCI-X Tunnel"
depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
depends on PCI && PPC_MAPLE
help
Support for error detection and correction on the
AMD8131 HyperTransport PCI-X Tunnel chip.
......@@ -326,7 +301,7 @@ config EDAC_AMD8131
config EDAC_AMD8111
tristate "AMD8111 HyperTransport I/O Hub"
depends on EDAC_MM_EDAC && PCI && PPC_MAPLE
depends on PCI && PPC_MAPLE
help
Support for error detection and correction on the
AMD8111 HyperTransport I/O Hub chip.
......@@ -335,7 +310,7 @@ config EDAC_AMD8111
config EDAC_CPC925
tristate "IBM CPC925 Memory Controller (PPC970FX)"
depends on EDAC_MM_EDAC && PPC64
depends on PPC64
help
Support for error detection and correction on the
IBM CPC925 Bridge and Memory Controller, which is
......@@ -344,7 +319,7 @@ config EDAC_CPC925
config EDAC_TILE
tristate "Tilera Memory Controller"
depends on EDAC_MM_EDAC && TILE
depends on TILE
default y
help
Support for error detection and correction on the
......@@ -352,49 +327,59 @@ config EDAC_TILE
config EDAC_HIGHBANK_MC
tristate "Highbank Memory Controller"
depends on EDAC_MM_EDAC && ARCH_HIGHBANK
depends on ARCH_HIGHBANK
help
Support for error detection and correction on the
Calxeda Highbank memory controller.
config EDAC_HIGHBANK_L2
tristate "Highbank L2 Cache"
depends on EDAC_MM_EDAC && ARCH_HIGHBANK
depends on ARCH_HIGHBANK
help
Support for error detection and correction on the
Calxeda Highbank memory controller.
config EDAC_OCTEON_PC
tristate "Cavium Octeon Primary Caches"
depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
depends on CPU_CAVIUM_OCTEON
help
Support for error detection and correction on the primary caches of
the cnMIPS cores of Cavium Octeon family SOCs.
config EDAC_OCTEON_L2C
tristate "Cavium Octeon Secondary Caches (L2C)"
depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
depends on CAVIUM_OCTEON_SOC
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_OCTEON_LMC
tristate "Cavium Octeon DRAM Memory Controller (LMC)"
depends on EDAC_MM_EDAC && CAVIUM_OCTEON_SOC
depends on CAVIUM_OCTEON_SOC
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_OCTEON_PCI
tristate "Cavium Octeon PCI Controller"
depends on EDAC_MM_EDAC && PCI && CAVIUM_OCTEON_SOC
depends on PCI && CAVIUM_OCTEON_SOC
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.
config EDAC_THUNDERX
tristate "Cavium ThunderX EDAC"
depends on ARM64
depends on PCI
help
Support for error detection and correction on the
Cavium ThunderX memory controllers (LMC), Cache
Coherent Processor Interconnect (CCPI) and L2 cache
blocks (TAD, CBC, MCI).
config EDAC_ALTERA
bool "Altera SOCFPGA ECC"
depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
depends on EDAC=y && ARCH_SOCFPGA
help
Support for error detection and correction on the
Altera SOCs. This must be selected for SDRAM ECC.
......@@ -460,14 +445,14 @@ config EDAC_ALTERA_SDMMC
config EDAC_SYNOPSYS
tristate "Synopsys DDR Memory Controller"
depends on EDAC_MM_EDAC && ARCH_ZYNQ
depends on ARCH_ZYNQ
help
Support for error detection and correction on the Synopsys DDR
memory controller.
config EDAC_XGENE
tristate "APM X-Gene SoC"
depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST)
depends on (ARM64 || COMPILE_TEST)
help
Support for error detection and correction on the
APM X-Gene family of SOCs.
......
......@@ -6,8 +6,7 @@
# GNU General Public License.
#
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
obj-$(CONFIG_EDAC) := edac_core.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o wq.o
......@@ -74,6 +73,7 @@ obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o
obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o
obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o
obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
......
......@@ -1023,12 +1023,22 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
return ret;
}
static int socfpga_is_a10(void)
{
return of_machine_is_compatible("altr,socfpga-arria10");
}
static int validate_parent_available(struct device_node *np);
static const struct of_device_id altr_edac_a10_device_of_match[];
static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
{
int irq;
struct device_node *child, *np = of_find_compatible_node(NULL, NULL,
struct device_node *child, *np;
if (!socfpga_is_a10())
return -ENODEV;
np = of_find_compatible_node(NULL, NULL,
"altr,socfpga-a10-ecc-manager");
if (!np) {
edac_printk(KERN_ERR, EDAC_DEVICE, "ECC Manager not found\n");
......@@ -1545,8 +1555,12 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = {
static int __init socfpga_init_sdmmc_ecc(void)
{
int rc = -ENODEV;
struct device_node *child = of_find_compatible_node(NULL, NULL,
"altr,socfpga-sdmmc-ecc");
struct device_node *child;
if (!socfpga_is_a10())
return -ENODEV;
child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
if (!child) {
edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
return -ENODEV;
......
......@@ -40,6 +40,11 @@
#define edac_atomic_scrub(va, size) do { } while (0)
#endif
int edac_op_state = EDAC_OPSTATE_INVAL;
EXPORT_SYMBOL_GPL(edac_op_state);
static int edac_report = EDAC_REPORTING_ENABLED;
/* lock to memory controller's control array */
static DEFINE_MUTEX(mem_ctls_mutex);
static LIST_HEAD(mc_devices);
......@@ -52,6 +57,65 @@ static void const *edac_mc_owner;
static struct bus_type mc_bus[EDAC_MAX_MCS];
int edac_get_report_status(void)
{
return edac_report;
}
EXPORT_SYMBOL_GPL(edac_get_report_status);
void edac_set_report_status(int new)
{
if (new == EDAC_REPORTING_ENABLED ||
new == EDAC_REPORTING_DISABLED ||
new == EDAC_REPORTING_FORCE)
edac_report = new;
}
EXPORT_SYMBOL_GPL(edac_set_report_status);
static int edac_report_set(const char *str, const struct kernel_param *kp)
{
if (!str)
return -EINVAL;
if (!strncmp(str, "on", 2))
edac_report = EDAC_REPORTING_ENABLED;
else if (!strncmp(str, "off", 3))
edac_report = EDAC_REPORTING_DISABLED;
else if (!strncmp(str, "force", 5))
edac_report = EDAC_REPORTING_FORCE;
return 0;
}
static int edac_report_get(char *buffer, const struct kernel_param *kp)
{
int ret = 0;
switch (edac_report) {
case EDAC_REPORTING_ENABLED:
ret = sprintf(buffer, "on");
break;
case EDAC_REPORTING_DISABLED:
ret = sprintf(buffer, "off");
break;
case EDAC_REPORTING_FORCE:
ret = sprintf(buffer, "force");
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
static const struct kernel_param_ops edac_report_ops = {
.set = edac_report_set,
.get = edac_report_get,
};
module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
unsigned len)
{
......@@ -504,22 +568,6 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
}
EXPORT_SYMBOL_GPL(find_mci_by_dev);
/*
* handler for EDAC to check if NMI type handler has asserted interrupt
*/
static int edac_mc_assert_error_check_and_clear(void)
{
int old_state;
if (edac_op_state == EDAC_OPSTATE_POLL)
return 1;
old_state = edac_err_assert;
edac_err_assert = 0;
return old_state;
}
/*
* edac_mc_workq_function
* performs the operation scheduled by a workq request
......@@ -536,7 +584,7 @@ static void edac_mc_workq_function(struct work_struct *work_req)
return;
}
if (edac_mc_assert_error_check_and_clear())
if (edac_op_state == EDAC_OPSTATE_POLL)
mci->edac_check(mci);
mutex_unlock(&mem_ctls_mutex);
......@@ -601,7 +649,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
}
list_add_tail_rcu(&mci->link, insert_before);
atomic_inc(&edac_handlers);
return 0;
fail0:
......@@ -619,7 +666,6 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
static int del_mc_from_global_list(struct mem_ctl_info *mci)
{
int handlers = atomic_dec_return(&edac_handlers);
list_del_rcu(&mci->link);
/* these are for safe removal of devices from global list while
......@@ -628,7 +674,7 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci)
synchronize_rcu();
INIT_LIST_HEAD(&mci->link);
return handlers;
return list_empty(&mc_devices);
}
struct mem_ctl_info *edac_mc_find(int idx)
......@@ -763,7 +809,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
/* mark MCI offline: */
mci->op_state = OP_OFFLINE;
if (!del_mc_from_global_list(mci))
if (del_mc_from_global_list(mci))
edac_mc_owner = NULL;
mutex_unlock(&mem_ctls_mutex);
......@@ -1195,8 +1241,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/* Report the error via the trace interface */
grain_bits = fls_long(e->grain) + 1;
if (IS_ENABLED(CONFIG_RAS))
trace_mc_event(type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
mci->mc_idx, e->top_layer, e->mid_layer,
e->low_layer,
(e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
grain_bits, e->syndrome, e->other_detail);
......
/*
* common EDAC components that must be in kernel
*
* Author: Dave Jiang <djiang@mvista.com>
*
* 2007 (c) MontaVista Software, Inc.
* 2010 (c) Advanced Micro Devices Inc.
* Borislav Petkov <bp@alien8.de>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*
*/
#include <linux/module.h>
#include <linux/edac.h>
#include <linux/atomic.h>
#include <linux/device.h>
int edac_op_state = EDAC_OPSTATE_INVAL;
EXPORT_SYMBOL_GPL(edac_op_state);
atomic_t edac_handlers = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(edac_handlers);
int edac_err_assert = 0;
EXPORT_SYMBOL_GPL(edac_err_assert);
int edac_report_status = EDAC_REPORTING_ENABLED;
EXPORT_SYMBOL_GPL(edac_report_status);
static int __init edac_report_setup(char *str)
{
if (!str)
return -EINVAL;
if (!strncmp(str, "on", 2))
set_edac_report_status(EDAC_REPORTING_ENABLED);
else if (!strncmp(str, "off", 3))
set_edac_report_status(EDAC_REPORTING_DISABLED);
else if (!strncmp(str, "force", 5))
set_edac_report_status(EDAC_REPORTING_FORCE);
return 0;
}
__setup("edac_report=", edac_report_setup);
/*
* called to determine if there is an EDAC driver interested in
* knowing an event (such as NMI) occurred
*/
int edac_handler_set(void)
{
if (edac_op_state == EDAC_OPSTATE_POLL)
return 0;
return atomic_read(&edac_handlers);
}
EXPORT_SYMBOL_GPL(edac_handler_set);
/*
* handler for NMI type of interrupts to assert error
*/
void edac_atomic_assert_error(void)
{
edac_err_assert++;
}
EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
......@@ -1349,7 +1349,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
struct dram_addr daddr;
char *type;
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
mci = pnd2_mci;
......
......@@ -3075,7 +3075,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
struct sbridge_pvt *pvt;
char *type;
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
mci = get_mci_for_node_id(mce->socketid);
......@@ -3441,7 +3441,7 @@ static int __init sbridge_init(void)
if (rc >= 0) {
mce_register_decode_chain(&sbridge_mce_dec);
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
return 0;
}
......
......@@ -971,7 +971,7 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
struct mem_ctl_info *mci;
char *type;
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
/* ignore unless this is memory related with an address */
......
/*
* Cavium ThunderX memory controller kernel module
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
*
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/edac.h>
#include <linux/interrupt.h>
#include <linux/string.h>
#include <linux/stop_machine.h>
#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/atomic.h>
#include <linux/bitfield.h>
#include <linux/circ_buf.h>
#include <asm/page.h>
#include "edac_module.h"
#define phys_to_pfn(phys) (PFN_DOWN(phys))
#define THUNDERX_NODE GENMASK(45, 44)
enum {
ERR_CORRECTED = 1,
ERR_UNCORRECTED = 2,
ERR_UNKNOWN = 3,
};
#define MAX_SYNDROME_REGS 4
struct error_syndrome {
u64 reg[MAX_SYNDROME_REGS];
};
struct error_descr {
int type;
u64 mask;
char *descr;
};
static void decode_register(char *str, size_t size,
const struct error_descr *descr,
const uint64_t reg)
{
int ret = 0;
while (descr->type && descr->mask && descr->descr) {
if (reg & descr->mask) {
ret = snprintf(str, size, "\n\t%s, %s",
descr->type == ERR_CORRECTED ?
"Corrected" : "Uncorrected",
descr->descr);
str += ret;
size -= ret;
}
descr++;
}
}
static unsigned long get_bits(unsigned long data, int pos, int width)
{
return (data >> pos) & ((1 << width) - 1);
}
#define L2C_CTL 0x87E080800000
#define L2C_CTL_DISIDXALIAS BIT(0)
#define PCI_DEVICE_ID_THUNDER_LMC 0xa022
#define LMC_FADR 0x20
#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1)
#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1)
#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf)
#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff)
#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff)
#define LMC_NXM_FADR 0x28
#define LMC_ECC_SYND 0x38
#define LMC_ECC_PARITY_TEST 0x108
#define LMC_INT_W1S 0x150
#define LMC_INT_ENA_W1C 0x158
#define LMC_INT_ENA_W1S 0x160
#define LMC_CONFIG 0x188
#define LMC_CONFIG_BG2 BIT(62)
#define LMC_CONFIG_RANK_ENA BIT(42)
#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF)
#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7)
#define LMC_CONTROL 0x190
#define LMC_CONTROL_XOR_BANK BIT(16)
#define LMC_INT 0x1F0
#define LMC_INT_DDR_ERR BIT(11)
#define LMC_INT_DED_ERR (0xFUL << 5)
#define LMC_INT_SEC_ERR (0xFUL << 1)
#define LMC_INT_NXM_WR_MASK BIT(0)
#define LMC_DDR_PLL_CTL 0x258
#define LMC_DDR_PLL_CTL_DDR4 BIT(29)
#define LMC_FADR_SCRAMBLED 0x330
#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
LMC_INT_NXM_WR_MASK)
#define LMC_INT_CE (LMC_INT_SEC_ERR)
static const struct error_descr lmc_errors[] = {
{
.type = ERR_CORRECTED,
.mask = LMC_INT_SEC_ERR,
.descr = "Single-bit ECC error",
},
{
.type = ERR_UNCORRECTED,
.mask = LMC_INT_DDR_ERR,
.descr = "DDR chip error",
},
{
.type = ERR_UNCORRECTED,
.mask = LMC_INT_DED_ERR,
.descr = "Double-bit ECC error",
},
{
.type = ERR_UNCORRECTED,
.mask = LMC_INT_NXM_WR_MASK,
.descr = "Non-existent memory write",
},
{0, 0, NULL},
};
#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4)
#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3)
#define LMC_INT_INTR_DED_ENA BIT(2)
#define LMC_INT_INTR_SEC_ENA BIT(1)
#define LMC_INT_INTR_NXM_WR_ENA BIT(0)
#define LMC_INT_ENA_ALL GENMASK(5, 0)
#define LMC_DDR_PLL_CTL 0x258
#define LMC_DDR_PLL_CTL_DDR4 BIT(29)
#define LMC_CONTROL 0x190
#define LMC_CONTROL_RDIMM BIT(0)
#define LMC_SCRAM_FADR 0x330
#define LMC_CHAR_MASK0 0x228
#define LMC_CHAR_MASK2 0x238
#define RING_ENTRIES 8
struct debugfs_entry {
const char *name;
umode_t mode;
const struct file_operations fops;
};
struct lmc_err_ctx {
u64 reg_int;
u64 reg_fadr;
u64 reg_nxm_fadr;
u64 reg_scram_fadr;
u64 reg_ecc_synd;
};
struct thunderx_lmc {
void __iomem *regs;
struct pci_dev *pdev;
struct msix_entry msix_ent;
atomic_t ecc_int;
u64 mask0;
u64 mask2;
u64 parity_test;
u64 node;
int xbits;
int bank_width;
int pbank_lsb;
int dimm_lsb;
int rank_lsb;
int bank_lsb;
int row_lsb;
int col_hi_lsb;
int xor_bank;
int l2c_alias;
struct page *mem;
struct lmc_err_ctx err_ctx[RING_ENTRIES];
unsigned long ring_head;
unsigned long ring_tail;
};
#define ring_pos(pos, size) ((pos) & (size - 1))
#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \
static struct debugfs_entry debugfs_##_name = { \
.name = __stringify(_name), \
.mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
.fops = { \
.open = simple_open, \
.write = _write, \
.read = _read, \
.llseek = generic_file_llseek, \
}, \
}
#define DEBUGFS_FIELD_ATTR(_type, _field) \
static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \
char __user *data, \
size_t count, loff_t *ppos) \
{ \
struct thunderx_##_type *pdata = file->private_data; \
char buf[20]; \
\
snprintf(buf, count, "0x%016llx", pdata->_field); \
return simple_read_from_buffer(data, count, ppos, \
buf, sizeof(buf)); \
} \
\
static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \
const char __user *data, \
size_t count, loff_t *ppos) \
{ \
struct thunderx_##_type *pdata = file->private_data; \
int res; \
\
res = kstrtoull_from_user(data, count, 0, &pdata->_field); \
\
return res ? res : count; \
} \
\
DEBUGFS_STRUCT(_field, 0600, \
thunderx_##_type##_##_field##_write, \
thunderx_##_type##_##_field##_read) \
#define DEBUGFS_REG_ATTR(_type, _name, _reg) \
static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \
char __user *data, \
size_t count, loff_t *ppos) \
{ \
struct thunderx_##_type *pdata = file->private_data; \
char buf[20]; \
\
sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \
return simple_read_from_buffer(data, count, ppos, \
buf, sizeof(buf)); \
} \
\
static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \
const char __user *data, \
size_t count, loff_t *ppos) \
{ \
struct thunderx_##_type *pdata = file->private_data; \
u64 val; \
int res; \
\
res = kstrtoull_from_user(data, count, 0, &val); \
\
if (!res) { \
writeq(val, pdata->regs + _reg); \
res = count; \
} \
\
return res; \
} \
\
DEBUGFS_STRUCT(_name, 0600, \
thunderx_##_type##_##_name##_write, \
thunderx_##_type##_##_name##_read)
#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field)
/*
* To get an ECC error injected, the following steps are needed:
* - Setup the ECC injection by writing the appropriate parameters:
* echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
* echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
* echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
* - Do the actual injection:
* echo 1 > /sys/kernel/debug/<device number>/inject_ecc
*/
static ssize_t thunderx_lmc_inject_int_write(struct file *file,
const char __user *data,
size_t count, loff_t *ppos)
{
struct thunderx_lmc *lmc = file->private_data;
u64 val;
int res;
res = kstrtoull_from_user(data, count, 0, &val);
if (!res) {
/* Trigger the interrupt */
writeq(val, lmc->regs + LMC_INT_W1S);
res = count;
}
return res;
}
static ssize_t thunderx_lmc_int_read(struct file *file,
char __user *data,
size_t count, loff_t *ppos)
{
struct thunderx_lmc *lmc = file->private_data;
char buf[20];
u64 lmc_int = readq(lmc->regs + LMC_INT);
snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
}
#define TEST_PATTERN 0xa5
static int inject_ecc_fn(void *arg)
{
struct thunderx_lmc *lmc = arg;
uintptr_t addr, phys;
unsigned int cline_size = cache_line_size();
const unsigned int lines = PAGE_SIZE / cline_size;
unsigned int i, cl_idx;
addr = (uintptr_t)page_address(lmc->mem);
phys = (uintptr_t)page_to_phys(lmc->mem);
cl_idx = (phys & 0x7f) >> 4;
lmc->parity_test &= ~(7ULL << 8);
lmc->parity_test |= (cl_idx << 8);
writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
readq(lmc->regs + LMC_CHAR_MASK0);
readq(lmc->regs + LMC_CHAR_MASK2);
readq(lmc->regs + LMC_ECC_PARITY_TEST);
for (i = 0; i < lines; i++) {
memset((void *)addr, TEST_PATTERN, cline_size);
barrier();
/*
* Flush L1 cachelines to the PoC (L2).
* This will cause cacheline eviction to the L2.
*/
asm volatile("dc civac, %0\n"
"dsb sy\n"
: : "r"(addr + i * cline_size));
}
for (i = 0; i < lines; i++) {
/*
* Flush L2 cachelines to the DRAM.
* This will cause cacheline eviction to the DRAM
* and ECC corruption according to the masks set.
*/
__asm__ volatile("sys #0,c11,C1,#2, %0\n"
: : "r"(phys + i * cline_size));
}
for (i = 0; i < lines; i++) {
/*
* Invalidate L2 cachelines.
* The subsequent load will cause cacheline fetch
* from the DRAM and an error interrupt
*/
__asm__ volatile("sys #0,c11,C1,#1, %0"
: : "r"(phys + i * cline_size));
}
for (i = 0; i < lines; i++) {
/*
* Invalidate L1 cachelines.
* The subsequent load will cause cacheline fetch
* from the L2 and/or DRAM
*/
asm volatile("dc ivac, %0\n"
"dsb sy\n"
: : "r"(addr + i * cline_size));
}
return 0;
}
static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
const char __user *data,
size_t count, loff_t *ppos)
{
struct thunderx_lmc *lmc = file->private_data;
unsigned int cline_size = cache_line_size();
u8 tmp[cline_size];
void __iomem *addr;
unsigned int offs, timeout = 100000;
atomic_set(&lmc->ecc_int, 0);
lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
if (!lmc->mem)
return -ENOMEM;
addr = page_address(lmc->mem);
while (!atomic_read(&lmc->ecc_int) && timeout--) {
stop_machine(inject_ecc_fn, lmc, NULL);
for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) {
/*
* Do a load from the previously rigged location
* This should generate an error interrupt.
*/
memcpy(tmp, addr + offs, cline_size);
asm volatile("dsb ld\n");
}
}
__free_pages(lmc->mem, 0);
return count;
}
LMC_DEBUGFS_ENT(mask0);
LMC_DEBUGFS_ENT(mask2);
LMC_DEBUGFS_ENT(parity_test);
DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
struct debugfs_entry *lmc_dfs_ents[] = {
&debugfs_mask0,
&debugfs_mask2,
&debugfs_parity_test,
&debugfs_inject_ecc,
&debugfs_inject_int,
&debugfs_int_w1c,
};
static int thunderx_create_debugfs_nodes(struct dentry *parent,
struct debugfs_entry *attrs[],
void *data,
size_t num)
{
int i;
struct dentry *ent;
if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
return 0;
if (!parent)
return -ENOENT;
for (i = 0; i < num; i++) {
ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
parent, data, &attrs[i]->fops);
if (!ent)
break;
}
return i;
}
static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
{
phys_addr_t addr = 0;
int bank, xbits;
addr |= lmc->node << 40;
addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
if (lmc->xor_bank)
bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
addr |= bank << lmc->bank_lsb;
xbits = PCI_FUNC(lmc->pdev->devfn);
if (lmc->l2c_alias)
xbits ^= get_bits(addr, 20, lmc->xbits) ^
get_bits(addr, 12, lmc->xbits);
addr |= xbits << 7;
return addr;
}
static unsigned int thunderx_get_num_lmcs(unsigned int node)
{
unsigned int number = 0;
struct pci_dev *pdev = NULL;
do {
pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
PCI_DEVICE_ID_THUNDER_LMC,
pdev);
if (pdev) {
#ifdef CONFIG_NUMA
if (pdev->dev.numa_node == node)
number++;
#else
number++;
#endif
}
} while (pdev);
return number;
}
#define LMC_MESSAGE_SIZE 120
#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors))
static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
{
struct mem_ctl_info *mci = dev_id;
struct thunderx_lmc *lmc = mci->pvt_info;
unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
writeq(0, lmc->regs + LMC_CHAR_MASK0);
writeq(0, lmc->regs + LMC_CHAR_MASK2);
writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
ctx->reg_int = readq(lmc->regs + LMC_INT);
ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
lmc->ring_head++;
atomic_set(&lmc->ecc_int, 1);
/* Clear the interrupt */
writeq(ctx->reg_int, lmc->regs + LMC_INT);
return IRQ_WAKE_THREAD;
}
static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
{
struct mem_ctl_info *mci = dev_id;
struct thunderx_lmc *lmc = mci->pvt_info;
phys_addr_t phys_addr;
unsigned long tail;
struct lmc_err_ctx *ctx;
irqreturn_t ret = IRQ_NONE;
char *msg;
char *other;
msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
if (!msg || !other)
goto err_free;
while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
ARRAY_SIZE(lmc->err_ctx))) {
tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
ctx = &lmc->err_ctx[tail];
dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
ctx->reg_int);
dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
ctx->reg_fadr);
dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
ctx->reg_nxm_fadr);
dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
ctx->reg_scram_fadr);
dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
ctx->reg_ecc_synd);
snprintf(msg, LMC_MESSAGE_SIZE,
"DIMM %lld rank %lld bank %lld row %lld col %lld",
LMC_FADR_FDIMM(ctx->reg_scram_fadr),
LMC_FADR_FBUNK(ctx->reg_scram_fadr),
LMC_FADR_FBANK(ctx->reg_scram_fadr),
LMC_FADR_FROW(ctx->reg_scram_fadr),
LMC_FADR_FCOL(ctx->reg_scram_fadr));
decode_register(other, LMC_OTHER_SIZE, lmc_errors,
ctx->reg_int);
phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
if (ctx->reg_int & LMC_INT_UE)
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
phys_to_pfn(phys_addr),
offset_in_page(phys_addr),
0, -1, -1, -1, msg, other);
else if (ctx->reg_int & LMC_INT_CE)
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
phys_to_pfn(phys_addr),
offset_in_page(phys_addr),
0, -1, -1, -1, msg, other);
lmc->ring_tail++;
}
ret = IRQ_HANDLED;
err_free:
kfree(msg);
kfree(other);
return ret;
}
#ifdef CONFIG_PM
static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state)
{
pci_save_state(pdev);
pci_disable_device(pdev);
pci_set_power_state(pdev, pci_choose_state(pdev, state));
return 0;
}
static int thunderx_lmc_resume(struct pci_dev *pdev)
{
pci_set_power_state(pdev, PCI_D0);
pci_enable_wake(pdev, PCI_D0, 0);
pci_restore_state(pdev);
return 0;
}
#endif
static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
{ 0, },
};
static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
{
int node = dev_to_node(&pdev->dev);
int ret = PCI_FUNC(pdev->devfn);
ret += max(node, 0) << 3;
return ret;
}
static int thunderx_lmc_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct thunderx_lmc *lmc;
struct edac_mc_layer layer;
struct mem_ctl_info *mci;
u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
int ret;
u64 lmc_int;
void *l2c_ioaddr;
layer.type = EDAC_MC_LAYER_SLOT;
layer.size = 2;
layer.is_virt_csrow = false;
ret = pcim_enable_device(pdev);
if (ret) {
dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
return ret;
}
ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
if (ret) {
dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
return ret;
}
mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
sizeof(struct thunderx_lmc));
if (!mci)
return -ENOMEM;
mci->pdev = &pdev->dev;
lmc = mci->pvt_info;
pci_set_drvdata(pdev, mci);
lmc->regs = pcim_iomap_table(pdev)[0];
lmc_control = readq(lmc->regs + LMC_CONTROL);
lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
lmc_config = readq(lmc->regs + LMC_CONFIG);
if (lmc_control & LMC_CONTROL_RDIMM) {
mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
lmc_ddr_pll_ctl) ?
MEM_RDDR4 : MEM_RDDR3;
} else {
mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
lmc_ddr_pll_ctl) ?
MEM_DDR4 : MEM_DDR3;
}
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = "thunderx-lmc";
mci->mod_ver = "1";
mci->ctl_name = "thunderx-lmc";
mci->dev_name = dev_name(&pdev->dev);
mci->scrub_mode = SCRUB_NONE;
lmc->pdev = pdev;
lmc->msix_ent.entry = 0;
lmc->ring_head = 0;
lmc->ring_tail = 0;
ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
if (ret) {
dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
goto err_free;
}
ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
thunderx_lmc_err_isr,
thunderx_lmc_threaded_isr, 0,
"[EDAC] ThunderX LMC", mci);
if (ret) {
dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
goto err_free;
}
lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits;
lmc->rank_lsb = lmc->dimm_lsb;
lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
lmc->bank_lsb = 7 + lmc->xbits;
lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node),
PAGE_SIZE);
if (!l2c_ioaddr) {
dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
goto err_free;
}
lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
iounmap(l2c_ioaddr);
ret = edac_mc_add_mc(mci);
if (ret) {
dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
goto err_free;
}
lmc_int = readq(lmc->regs + LMC_INT);
writeq(lmc_int, lmc->regs + LMC_INT);
writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
ret = thunderx_create_debugfs_nodes(mci->debugfs,
lmc_dfs_ents,
lmc,
ARRAY_SIZE(lmc_dfs_ents));
if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
ret, ret >= 0 ? " created" : "");
}
}
return 0;
err_free:
pci_set_drvdata(pdev, NULL);
edac_mc_free(mci);
return ret;
}
static void thunderx_lmc_remove(struct pci_dev *pdev)
{
struct mem_ctl_info *mci = pci_get_drvdata(pdev);
struct thunderx_lmc *lmc = mci->pvt_info;
writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
}
MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
static struct pci_driver thunderx_lmc_driver = {
.name = "thunderx_lmc_edac",
.probe = thunderx_lmc_probe,
.remove = thunderx_lmc_remove,
#ifdef CONFIG_PM
.suspend = thunderx_lmc_suspend,
.resume = thunderx_lmc_resume,
#endif
.id_table = thunderx_lmc_pci_tbl,
};
/*---------------------- OCX driver ---------------------------------*/
#define PCI_DEVICE_ID_THUNDER_OCX 0xa013
#define OCX_LINK_INTS 3
#define OCX_INTS (OCX_LINK_INTS + 1)
#define OCX_RX_LANES 24
#define OCX_RX_LANE_STATS 15
#define OCX_COM_INT 0x100
#define OCX_COM_INT_W1S 0x108
#define OCX_COM_INT_ENA_W1S 0x110
#define OCX_COM_INT_ENA_W1C 0x118
#define OCX_COM_IO_BADID BIT(54)
#define OCX_COM_MEM_BADID BIT(53)
#define OCX_COM_COPR_BADID BIT(52)
#define OCX_COM_WIN_REQ_BADID BIT(51)
#define OCX_COM_WIN_REQ_TOUT BIT(50)
#define OCX_COM_RX_LANE GENMASK(23, 0)
#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \
OCX_COM_MEM_BADID | \
OCX_COM_COPR_BADID | \
OCX_COM_WIN_REQ_BADID | \
OCX_COM_WIN_REQ_TOUT)
static const struct error_descr ocx_com_errors[] = {
{
.type = ERR_CORRECTED,
.mask = OCX_COM_IO_BADID,
.descr = "Invalid IO transaction node ID",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_MEM_BADID,
.descr = "Invalid memory transaction node ID",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_COPR_BADID,
.descr = "Invalid coprocessor transaction node ID",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_WIN_REQ_BADID,
.descr = "Invalid SLI transaction node ID",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_WIN_REQ_TOUT,
.descr = "Window/core request timeout",
},
{0, 0, NULL},
};
#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8)
#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8)
#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8)
#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8)
#define OCX_COM_LINK_BAD_WORD BIT(13)
#define OCX_COM_LINK_ALIGN_FAIL BIT(12)
#define OCX_COM_LINK_ALIGN_DONE BIT(11)
#define OCX_COM_LINK_UP BIT(10)
#define OCX_COM_LINK_STOP BIT(9)
#define OCX_COM_LINK_BLK_ERR BIT(8)
#define OCX_COM_LINK_REINIT BIT(7)
#define OCX_COM_LINK_LNK_DATA BIT(6)
#define OCX_COM_LINK_RXFIFO_DBE BIT(5)
#define OCX_COM_LINK_RXFIFO_SBE BIT(4)
#define OCX_COM_LINK_TXFIFO_DBE BIT(3)
#define OCX_COM_LINK_TXFIFO_SBE BIT(2)
#define OCX_COM_LINK_REPLAY_DBE BIT(1)
#define OCX_COM_LINK_REPLAY_SBE BIT(0)
static const struct error_descr ocx_com_link_errors[] = {
{
.type = ERR_CORRECTED,
.mask = OCX_COM_LINK_REPLAY_SBE,
.descr = "Replay buffer single-bit error",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_LINK_TXFIFO_SBE,
.descr = "TX FIFO single-bit error",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_LINK_RXFIFO_SBE,
.descr = "RX FIFO single-bit error",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_LINK_BLK_ERR,
.descr = "Block code error",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_LINK_ALIGN_FAIL,
.descr = "Link alignment failure",
},
{
.type = ERR_CORRECTED,
.mask = OCX_COM_LINK_BAD_WORD,
.descr = "Bad code word",
},
{
.type = ERR_UNCORRECTED,
.mask = OCX_COM_LINK_REPLAY_DBE,
.descr = "Replay buffer double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = OCX_COM_LINK_TXFIFO_DBE,
.descr = "TX FIFO double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = OCX_COM_LINK_RXFIFO_DBE,
.descr = "RX FIFO double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = OCX_COM_LINK_STOP,
.descr = "Link stopped",
},
{0, 0, NULL},
};
#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \
OCX_COM_LINK_TXFIFO_DBE | \
OCX_COM_LINK_RXFIFO_DBE | \
OCX_COM_LINK_STOP)
#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \
OCX_COM_LINK_TXFIFO_SBE | \
OCX_COM_LINK_RXFIFO_SBE | \
OCX_COM_LINK_BLK_ERR | \
OCX_COM_LINK_ALIGN_FAIL | \
OCX_COM_LINK_BAD_WORD)
#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100)
#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100)
#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100)
#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100)
#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8)
#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8)
#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2)
#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1)
#define OCX_LNE_CFG_RX_STAT_ENA BIT(0)
#define OCX_LANE_BAD_64B67B BIT(8)
#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5)
#define OCX_LANE_SCRM_SYNC_LOSS BIT(4)
#define OCX_LANE_UKWN_CNTL_WORD BIT(3)
#define OCX_LANE_CRC32_ERR BIT(2)
#define OCX_LANE_BDRY_SYNC_LOSS BIT(1)
#define OCX_LANE_SERDES_LOCK_LOSS BIT(0)
#define OCX_COM_LANE_INT_UE (0)
#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \
OCX_LANE_BDRY_SYNC_LOSS | \
OCX_LANE_CRC32_ERR | \
OCX_LANE_UKWN_CNTL_WORD | \
OCX_LANE_SCRM_SYNC_LOSS | \
OCX_LANE_DSKEW_FIFO_OVFL | \
OCX_LANE_BAD_64B67B)
static const struct error_descr ocx_lane_errors[] = {
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_SERDES_LOCK_LOSS,
.descr = "RX SerDes lock lost",
},
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_BDRY_SYNC_LOSS,
.descr = "RX word boundary lost",
},
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_CRC32_ERR,
.descr = "CRC32 error",
},
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_UKWN_CNTL_WORD,
.descr = "Unknown control word",
},
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_SCRM_SYNC_LOSS,
.descr = "Scrambler synchronization lost",
},
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_DSKEW_FIFO_OVFL,
.descr = "RX deskew FIFO overflow",
},
{
.type = ERR_CORRECTED,
.mask = OCX_LANE_BAD_64B67B,
.descr = "Bad 64B/67B codeword",
},
{0, 0, NULL},
};
#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0))
#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0))
#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \
GENMASK(9, 7) | GENMASK(5, 0))
#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000)
#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000)
struct ocx_com_err_ctx {
u64 reg_com_int;
u64 reg_lane_int[OCX_RX_LANES];
u64 reg_lane_stat11[OCX_RX_LANES];
};
struct ocx_link_err_ctx {
u64 reg_com_link_int;
int link;
};
struct thunderx_ocx {
void __iomem *regs;
int com_link;
struct pci_dev *pdev;
struct edac_device_ctl_info *edac_dev;
struct dentry *debugfs;
struct msix_entry msix_ent[OCX_INTS];
struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
unsigned long com_ring_head;
unsigned long com_ring_tail;
unsigned long link_ring_head;
unsigned long link_ring_tail;
};
#define OCX_MESSAGE_SIZE SZ_1K
#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors))
/* This handler is threaded */
static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
msix_ent[msix->entry]);
int lane;
unsigned long head = ring_pos(ocx->com_ring_head,
ARRAY_SIZE(ocx->com_err_ctx));
struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
for (lane = 0; lane < OCX_RX_LANES; lane++) {
ctx->reg_lane_int[lane] =
readq(ocx->regs + OCX_LNE_INT(lane));
ctx->reg_lane_stat11[lane] =
readq(ocx->regs + OCX_LNE_STAT(lane, 11));
writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
}
writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
ocx->com_ring_head++;
return IRQ_WAKE_THREAD;
}
static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
msix_ent[msix->entry]);
irqreturn_t ret = IRQ_NONE;
unsigned long tail;
struct ocx_com_err_ctx *ctx;
int lane;
char *msg;
char *other;
msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
if (!msg || !other)
goto err_free;
while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
ARRAY_SIZE(ocx->com_err_ctx))) {
tail = ring_pos(ocx->com_ring_tail,
ARRAY_SIZE(ocx->com_err_ctx));
ctx = &ocx->com_err_ctx[tail];
snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
ocx->edac_dev->ctl_name, ctx->reg_com_int);
decode_register(other, OCX_OTHER_SIZE,
ocx_com_errors, ctx->reg_com_int);
strncat(msg, other, OCX_MESSAGE_SIZE);
for (lane = 0; lane < OCX_RX_LANES; lane++)
if (ctx->reg_com_int & BIT(lane)) {
snprintf(other, OCX_OTHER_SIZE,
"\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
lane, ctx->reg_lane_int[lane],
lane, ctx->reg_lane_stat11[lane]);
strncat(msg, other, OCX_MESSAGE_SIZE);
decode_register(other, OCX_OTHER_SIZE,
ocx_lane_errors,
ctx->reg_lane_int[lane]);
strncat(msg, other, OCX_MESSAGE_SIZE);
}
if (ctx->reg_com_int & OCX_COM_INT_CE)
edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
ocx->com_ring_tail++;
}
ret = IRQ_HANDLED;
err_free:
kfree(other);
kfree(msg);
return ret;
}
static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
msix_ent[msix->entry]);
unsigned long head = ring_pos(ocx->link_ring_head,
ARRAY_SIZE(ocx->link_err_ctx));
struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
ctx->link = msix->entry;
ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
ocx->link_ring_head++;
return IRQ_WAKE_THREAD;
}
static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
msix_ent[msix->entry]);
irqreturn_t ret = IRQ_NONE;
unsigned long tail;
struct ocx_link_err_ctx *ctx;
char *msg;
char *other;
msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
if (!msg || !other)
goto err_free;
while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
ARRAY_SIZE(ocx->link_err_ctx))) {
tail = ring_pos(ocx->link_ring_head,
ARRAY_SIZE(ocx->link_err_ctx));
ctx = &ocx->link_err_ctx[tail];
snprintf(msg, OCX_MESSAGE_SIZE,
"%s: OCX_COM_LINK_INT[%d]: %016llx",
ocx->edac_dev->ctl_name,
ctx->link, ctx->reg_com_link_int);
decode_register(other, OCX_OTHER_SIZE,
ocx_com_link_errors, ctx->reg_com_link_int);
strncat(msg, other, OCX_MESSAGE_SIZE);
if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
ocx->link_ring_tail++;
}
ret = IRQ_HANDLED;
err_free:
kfree(other);
kfree(msg);
return ret;
}
#define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg)
OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
struct debugfs_entry *ocx_dfs_ents[] = {
&debugfs_tlk0_ecc_ctl,
&debugfs_tlk1_ecc_ctl,
&debugfs_tlk2_ecc_ctl,
&debugfs_rlk0_ecc_ctl,
&debugfs_rlk1_ecc_ctl,
&debugfs_rlk2_ecc_ctl,
&debugfs_com_link0_int,
&debugfs_com_link1_int,
&debugfs_com_link2_int,
&debugfs_lne00_badcnt,
&debugfs_lne01_badcnt,
&debugfs_lne02_badcnt,
&debugfs_lne03_badcnt,
&debugfs_lne04_badcnt,
&debugfs_lne05_badcnt,
&debugfs_lne06_badcnt,
&debugfs_lne07_badcnt,
&debugfs_lne08_badcnt,
&debugfs_lne09_badcnt,
&debugfs_lne10_badcnt,
&debugfs_lne11_badcnt,
&debugfs_lne12_badcnt,
&debugfs_lne13_badcnt,
&debugfs_lne14_badcnt,
&debugfs_lne15_badcnt,
&debugfs_lne16_badcnt,
&debugfs_lne17_badcnt,
&debugfs_lne18_badcnt,
&debugfs_lne19_badcnt,
&debugfs_lne20_badcnt,
&debugfs_lne21_badcnt,
&debugfs_lne22_badcnt,
&debugfs_lne23_badcnt,
&debugfs_com_int,
};
static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
{ 0, },
};
static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
{
int lane, stat, cfg;
for (lane = 0; lane < OCX_RX_LANES; lane++) {
cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
readq(ocx->regs + OCX_LNE_STAT(lane, stat));
}
}
static int thunderx_ocx_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct thunderx_ocx *ocx;
struct edac_device_ctl_info *edac_dev;
char name[32];
int idx;
int i;
int ret;
u64 reg;
ret = pcim_enable_device(pdev);
if (ret) {
dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
return ret;
}
ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
if (ret) {
dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
return ret;
}
idx = edac_device_alloc_index();
snprintf(name, sizeof(name), "OCX%d", idx);
edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
name, 1, "CCPI", 1,
0, NULL, 0, idx);
if (!edac_dev) {
dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
return -ENOMEM;
}
ocx = edac_dev->pvt_info;
ocx->edac_dev = edac_dev;
ocx->com_ring_head = 0;
ocx->com_ring_tail = 0;
ocx->link_ring_head = 0;
ocx->link_ring_tail = 0;
ocx->regs = pcim_iomap_table(pdev)[0];
if (!ocx->regs) {
dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
ret = -ENODEV;
goto err_free;
}
ocx->pdev = pdev;
for (i = 0; i < OCX_INTS; i++) {
ocx->msix_ent[i].entry = i;
ocx->msix_ent[i].vector = 0;
}
ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
if (ret) {
dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
goto err_free;
}
for (i = 0; i < OCX_INTS; i++) {
ret = devm_request_threaded_irq(&pdev->dev,
ocx->msix_ent[i].vector,
(i == 3) ?
thunderx_ocx_com_isr :
thunderx_ocx_lnk_isr,
(i == 3) ?
thunderx_ocx_com_threaded_isr :
thunderx_ocx_lnk_threaded_isr,
0, "[EDAC] ThunderX OCX",
&ocx->msix_ent[i]);
if (ret)
goto err_free;
}
edac_dev->dev = &pdev->dev;
edac_dev->dev_name = dev_name(&pdev->dev);
edac_dev->mod_name = "thunderx-ocx";
edac_dev->ctl_name = "thunderx-ocx";
ret = edac_device_add_device(edac_dev);
if (ret) {
dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
goto err_free;
}
if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
ret = thunderx_create_debugfs_nodes(ocx->debugfs,
ocx_dfs_ents,
ocx,
ARRAY_SIZE(ocx_dfs_ents));
if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
ret, ret >= 0 ? " created" : "");
}
}
pci_set_drvdata(pdev, edac_dev);
thunderx_ocx_clearstats(ocx);
for (i = 0; i < OCX_RX_LANES; i++) {
writeq(OCX_LNE_INT_ENA_ALL,
ocx->regs + OCX_LNE_INT_EN(i));
reg = readq(ocx->regs + OCX_LNE_INT(i));
writeq(reg, ocx->regs + OCX_LNE_INT(i));
}
for (i = 0; i < OCX_LINK_INTS; i++) {
reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
writeq(OCX_COM_LINKX_INT_ENA_ALL,
ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
}
reg = readq(ocx->regs + OCX_COM_INT);
writeq(reg, ocx->regs + OCX_COM_INT);
writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
return 0;
err_free:
edac_device_free_ctl_info(edac_dev);
return ret;
}
static void thunderx_ocx_remove(struct pci_dev *pdev)
{
struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
struct thunderx_ocx *ocx = edac_dev->pvt_info;
int i;
writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
for (i = 0; i < OCX_INTS; i++) {
writeq(OCX_COM_LINKX_INT_ENA_ALL,
ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
}
edac_debugfs_remove_recursive(ocx->debugfs);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(edac_dev);
}
MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
static struct pci_driver thunderx_ocx_driver = {
.name = "thunderx_ocx_edac",
.probe = thunderx_ocx_probe,
.remove = thunderx_ocx_remove,
.id_table = thunderx_ocx_pci_tbl,
};
/*---------------------- L2C driver ---------------------------------*/
#define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
#define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
#define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
#define L2C_TAD_INT_W1C 0x40000
#define L2C_TAD_INT_W1S 0x40008
#define L2C_TAD_INT_ENA_W1C 0x40020
#define L2C_TAD_INT_ENA_W1S 0x40028
#define L2C_TAD_INT_L2DDBE BIT(1)
#define L2C_TAD_INT_SBFSBE BIT(2)
#define L2C_TAD_INT_SBFDBE BIT(3)
#define L2C_TAD_INT_FBFSBE BIT(4)
#define L2C_TAD_INT_FBFDBE BIT(5)
#define L2C_TAD_INT_TAGDBE BIT(9)
#define L2C_TAD_INT_RDDISLMC BIT(15)
#define L2C_TAD_INT_WRDISLMC BIT(16)
#define L2C_TAD_INT_LFBTO BIT(17)
#define L2C_TAD_INT_GSYNCTO BIT(18)
#define L2C_TAD_INT_RTGSBE BIT(32)
#define L2C_TAD_INT_RTGDBE BIT(33)
#define L2C_TAD_INT_RDDISOCI BIT(34)
#define L2C_TAD_INT_WRDISOCI BIT(35)
#define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \
L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
#define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \
L2C_TAD_INT_FBFSBE)
#define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \
L2C_TAD_INT_SBFDBE | \
L2C_TAD_INT_FBFDBE | \
L2C_TAD_INT_TAGDBE | \
L2C_TAD_INT_RTGDBE | \
L2C_TAD_INT_WRDISOCI | \
L2C_TAD_INT_RDDISOCI | \
L2C_TAD_INT_WRDISLMC | \
L2C_TAD_INT_RDDISLMC | \
L2C_TAD_INT_LFBTO | \
L2C_TAD_INT_GSYNCTO)
static const struct error_descr l2_tad_errors[] = {
{
.type = ERR_CORRECTED,
.mask = L2C_TAD_INT_SBFSBE,
.descr = "SBF single-bit error",
},
{
.type = ERR_CORRECTED,
.mask = L2C_TAD_INT_FBFSBE,
.descr = "FBF single-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_L2DDBE,
.descr = "L2D double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_SBFDBE,
.descr = "SBF double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_FBFDBE,
.descr = "FBF double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_TAGDBE,
.descr = "TAG double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_RTGDBE,
.descr = "RTG double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_WRDISOCI,
.descr = "Write to a disabled CCPI",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_RDDISOCI,
.descr = "Read from a disabled CCPI",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_WRDISLMC,
.descr = "Write to a disabled LMC",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_RDDISLMC,
.descr = "Read from a disabled LMC",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_LFBTO,
.descr = "LFB entry timeout",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_TAD_INT_GSYNCTO,
.descr = "Global sync CCPI timeout",
},
{0, 0, NULL},
};
#define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE)
#define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE)
#define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
#define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
#define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
L2C_TAD_INT_RTG | \
L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
L2C_TAD_INT_LFBTO)
#define L2C_TAD_TIMETWO 0x50000
#define L2C_TAD_TIMEOUT 0x50100
#define L2C_TAD_ERR 0x60000
#define L2C_TAD_TQD_ERR 0x60100
#define L2C_TAD_TTG_ERR 0x60200
#define L2C_CBC_INT_W1C 0x60000
#define L2C_CBC_INT_RSDSBE BIT(0)
#define L2C_CBC_INT_RSDDBE BIT(1)
#define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
#define L2C_CBC_INT_MIBSBE BIT(4)
#define L2C_CBC_INT_MIBDBE BIT(5)
#define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
#define L2C_CBC_INT_IORDDISOCI BIT(6)
#define L2C_CBC_INT_IOWRDISOCI BIT(7)
#define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \
L2C_CBC_INT_IOWRDISOCI)
#define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
#define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
static const struct error_descr l2_cbc_errors[] = {
{
.type = ERR_CORRECTED,
.mask = L2C_CBC_INT_RSDSBE,
.descr = "RSD single-bit error",
},
{
.type = ERR_CORRECTED,
.mask = L2C_CBC_INT_MIBSBE,
.descr = "MIB single-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_CBC_INT_RSDDBE,
.descr = "RSD double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_CBC_INT_MIBDBE,
.descr = "MIB double-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_CBC_INT_IORDDISOCI,
.descr = "Read from a disabled CCPI",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_CBC_INT_IOWRDISOCI,
.descr = "Write to a disabled CCPI",
},
{0, 0, NULL},
};
#define L2C_CBC_INT_W1S 0x60008
#define L2C_CBC_INT_ENA_W1C 0x60020
#define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
L2C_CBC_INT_IODISOCI)
#define L2C_CBC_INT_ENA_W1S 0x60028
#define L2C_CBC_IODISOCIERR 0x80008
#define L2C_CBC_IOCERR 0x80010
#define L2C_CBC_RSDERR 0x80018
#define L2C_CBC_MIBERR 0x80020
#define L2C_MCI_INT_W1C 0x0
#define L2C_MCI_INT_VBFSBE BIT(0)
#define L2C_MCI_INT_VBFDBE BIT(1)
static const struct error_descr l2_mci_errors[] = {
{
.type = ERR_CORRECTED,
.mask = L2C_MCI_INT_VBFSBE,
.descr = "VBF single-bit error",
},
{
.type = ERR_UNCORRECTED,
.mask = L2C_MCI_INT_VBFDBE,
.descr = "VBF double-bit error",
},
{0, 0, NULL},
};
#define L2C_MCI_INT_W1S 0x8
#define L2C_MCI_INT_ENA_W1C 0x20
#define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
#define L2C_MCI_INT_ENA_W1S 0x28
#define L2C_MCI_ERR 0x10000
#define L2C_MESSAGE_SIZE SZ_1K
#define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors))
struct l2c_err_ctx {
char *reg_ext_name;
u64 reg_int;
u64 reg_ext;
};
struct thunderx_l2c {
void __iomem *regs;
struct pci_dev *pdev;
struct edac_device_ctl_info *edac_dev;
struct dentry *debugfs;
int index;
struct msix_entry msix_ent;
struct l2c_err_ctx err_ctx[RING_ENTRIES];
unsigned long ring_head;
unsigned long ring_tail;
};
static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
msix_ent);
unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
struct l2c_err_ctx *ctx = &tad->err_ctx[head];
ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
if (ctx->reg_int & L2C_TAD_INT_ECC) {
ctx->reg_ext_name = "TQD_ERR";
ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
} else if (ctx->reg_int & L2C_TAD_INT_TAG) {
ctx->reg_ext_name = "TTG_ERR";
ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
} else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
ctx->reg_ext_name = "TIMEOUT";
ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
} else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
ctx->reg_ext_name = "ERR";
ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
}
writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
tad->ring_head++;
return IRQ_WAKE_THREAD;
}
static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
msix_ent);
unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
if (ctx->reg_int & L2C_CBC_INT_RSD) {
ctx->reg_ext_name = "RSDERR";
ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
} else if (ctx->reg_int & L2C_CBC_INT_MIB) {
ctx->reg_ext_name = "MIBERR";
ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
} else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
ctx->reg_ext_name = "IODISOCIERR";
ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
}
writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
cbc->ring_head++;
return IRQ_WAKE_THREAD;
}
static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
msix_ent);
unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
struct l2c_err_ctx *ctx = &mci->err_ctx[head];
ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
ctx->reg_ext_name = "ERR";
mci->ring_head++;
return IRQ_WAKE_THREAD;
}
static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
{
struct msix_entry *msix = irq_id;
struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
msix_ent);
unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
irqreturn_t ret = IRQ_NONE;
u64 mask_ue, mask_ce;
const struct error_descr *l2_errors;
char *reg_int_name;
char *msg;
char *other;
msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
if (!msg || !other)
goto err_free;
switch (l2c->pdev->device) {
case PCI_DEVICE_ID_THUNDER_L2C_TAD:
reg_int_name = "L2C_TAD_INT";
mask_ue = L2C_TAD_INT_UE;
mask_ce = L2C_TAD_INT_CE;
l2_errors = l2_tad_errors;
break;
case PCI_DEVICE_ID_THUNDER_L2C_CBC:
reg_int_name = "L2C_CBC_INT";
mask_ue = L2C_CBC_INT_UE;
mask_ce = L2C_CBC_INT_CE;
l2_errors = l2_cbc_errors;
break;
case PCI_DEVICE_ID_THUNDER_L2C_MCI:
reg_int_name = "L2C_MCI_INT";
mask_ue = L2C_MCI_INT_VBFDBE;
mask_ce = L2C_MCI_INT_VBFSBE;
l2_errors = l2_mci_errors;
break;
default:
dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
l2c->pdev->device);
return IRQ_NONE;
}
while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
ARRAY_SIZE(l2c->err_ctx))) {
snprintf(msg, L2C_MESSAGE_SIZE,
"%s: %s: %016llx, %s: %016llx",
l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
ctx->reg_ext_name, ctx->reg_ext);
decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
strncat(msg, other, L2C_MESSAGE_SIZE);
if (ctx->reg_int & mask_ue)
edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
else if (ctx->reg_int & mask_ce)
edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
l2c->ring_tail++;
}
return IRQ_HANDLED;
err_free:
kfree(other);
kfree(msg);
return ret;
}
#define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg)
L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
struct debugfs_entry *l2c_tad_dfs_ents[] = {
&debugfs_tad_int,
};
L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
struct debugfs_entry *l2c_cbc_dfs_ents[] = {
&debugfs_cbc_int,
};
L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
struct debugfs_entry *l2c_mci_dfs_ents[] = {
&debugfs_mci_int,
};
static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
{ 0, },
};
static int thunderx_l2c_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct thunderx_l2c *l2c;
struct edac_device_ctl_info *edac_dev;
struct debugfs_entry **l2c_devattr;
size_t dfs_entries;
irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
char name[32];
const char *fmt;
u64 reg_en_offs, reg_en_mask;
int idx;
int ret;
ret = pcim_enable_device(pdev);
if (ret) {
dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
return ret;
}
ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
if (ret) {
dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
return ret;
}
switch (pdev->device) {
case PCI_DEVICE_ID_THUNDER_L2C_TAD:
thunderx_l2c_isr = thunderx_l2c_tad_isr;
l2c_devattr = l2c_tad_dfs_ents;
dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
fmt = "L2C-TAD%d";
reg_en_offs = L2C_TAD_INT_ENA_W1S;
reg_en_mask = L2C_TAD_INT_ENA_ALL;
break;
case PCI_DEVICE_ID_THUNDER_L2C_CBC:
thunderx_l2c_isr = thunderx_l2c_cbc_isr;
l2c_devattr = l2c_cbc_dfs_ents;
dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
fmt = "L2C-CBC%d";
reg_en_offs = L2C_CBC_INT_ENA_W1S;
reg_en_mask = L2C_CBC_INT_ENA_ALL;
break;
case PCI_DEVICE_ID_THUNDER_L2C_MCI:
thunderx_l2c_isr = thunderx_l2c_mci_isr;
l2c_devattr = l2c_mci_dfs_ents;
dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
fmt = "L2C-MCI%d";
reg_en_offs = L2C_MCI_INT_ENA_W1S;
reg_en_mask = L2C_MCI_INT_ENA_ALL;
break;
default:
//Should never ever get here
dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
pdev->device);
return -EINVAL;
}
idx = edac_device_alloc_index();
snprintf(name, sizeof(name), fmt, idx);
edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
name, 1, "L2C", 1, 0,
NULL, 0, idx);
if (!edac_dev) {
dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
return -ENOMEM;
}
l2c = edac_dev->pvt_info;
l2c->edac_dev = edac_dev;
l2c->regs = pcim_iomap_table(pdev)[0];
if (!l2c->regs) {
dev_err(&pdev->dev, "Cannot map PCI resources\n");
ret = -ENODEV;
goto err_free;
}
l2c->pdev = pdev;
l2c->ring_head = 0;
l2c->ring_tail = 0;
l2c->msix_ent.entry = 0;
l2c->msix_ent.vector = 0;
ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
if (ret) {
dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
goto err_free;
}
ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
thunderx_l2c_isr,
thunderx_l2c_threaded_isr,
0, "[EDAC] ThunderX L2C",
&l2c->msix_ent);
if (ret)
goto err_free;
edac_dev->dev = &pdev->dev;
edac_dev->dev_name = dev_name(&pdev->dev);
edac_dev->mod_name = "thunderx-l2c";
edac_dev->ctl_name = "thunderx-l2c";
ret = edac_device_add_device(edac_dev);
if (ret) {
dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
goto err_free;
}
if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
l2c, dfs_entries);
if (ret != dfs_entries) {
dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
ret, ret >= 0 ? " created" : "");
}
}
pci_set_drvdata(pdev, edac_dev);
writeq(reg_en_mask, l2c->regs + reg_en_offs);
return 0;
err_free:
edac_device_free_ctl_info(edac_dev);
return ret;
}
static void thunderx_l2c_remove(struct pci_dev *pdev)
{
struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
struct thunderx_l2c *l2c = edac_dev->pvt_info;
switch (pdev->device) {
case PCI_DEVICE_ID_THUNDER_L2C_TAD:
writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
break;
case PCI_DEVICE_ID_THUNDER_L2C_CBC:
writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
break;
case PCI_DEVICE_ID_THUNDER_L2C_MCI:
writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
break;
}
edac_debugfs_remove_recursive(l2c->debugfs);
edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(edac_dev);
}
MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
static struct pci_driver thunderx_l2c_driver = {
.name = "thunderx_l2c_edac",
.probe = thunderx_l2c_probe,
.remove = thunderx_l2c_remove,
.id_table = thunderx_l2c_pci_tbl,
};
static int __init thunderx_edac_init(void)
{
int rc = 0;
rc = pci_register_driver(&thunderx_lmc_driver);
if (rc)
return rc;
rc = pci_register_driver(&thunderx_ocx_driver);
if (rc)
goto err_lmc;
rc = pci_register_driver(&thunderx_l2c_driver);
if (rc)
goto err_ocx;
return rc;
err_ocx:
pci_unregister_driver(&thunderx_ocx_driver);
err_lmc:
pci_unregister_driver(&thunderx_lmc_driver);
return rc;
}
static void __exit thunderx_edac_exit(void)
{
pci_unregister_driver(&thunderx_l2c_driver);
pci_unregister_driver(&thunderx_ocx_driver);
pci_unregister_driver(&thunderx_lmc_driver);
}
module_init(thunderx_edac_init);
module_exit(thunderx_edac_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Cavium, Inc.");
MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
......@@ -28,12 +28,10 @@ struct device;
#define EDAC_OPSTATE_INT 2
extern int edac_op_state;
extern int edac_err_assert;
extern atomic_t edac_handlers;
extern int edac_handler_set(void);
extern void edac_atomic_assert_error(void);
extern struct bus_type *edac_get_sysfs_subsys(void);
struct bus_type *edac_get_sysfs_subsys(void);
int edac_get_report_status(void);
void edac_set_report_status(int new);
enum {
EDAC_REPORTING_ENABLED,
......@@ -41,28 +39,6 @@ enum {
EDAC_REPORTING_FORCE
};
extern int edac_report_status;
#ifdef CONFIG_EDAC
static inline int get_edac_report_status(void)
{
return edac_report_status;
}
static inline void set_edac_report_status(int new)
{
edac_report_status = new;
}
#else
static inline int get_edac_report_status(void)
{
return EDAC_REPORTING_DISABLED;
}
static inline void set_edac_report_status(int new)
{
}
#endif
static inline void opstate_init(void)
{
switch (edac_op_state) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment