Commit ff881842 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_for_5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:
 "The new thing this time around is that we have three maintainers now
  and a new, old repo. New because it is new for the EDAC tree which is
  hosted there from now on and old because it is Tony's and mine's old
  RAS repo which we still use occasionally when the stuff isn't in tip.

  Summary:

   -  EDAC tree has three maintainers and one new designated reviewer
      now, so that the work can scale better.

   -  New driver for Mellanox' BlueField SoC DDR controller (Shravan
      Kumar Ramani)

   -  AMD Rome support in amd64_edac (Yazen Ghannam and Isaac Vaughn)

   -  Misc fixes, cleanups and code improvements"

* tag 'edac_for_5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/amd64: Add PCI device IDs for family 17h, model 70h
  MAINTAINERS: Add Robert as a EDAC reviewer
  EDAC/mc_sysfs: Make debug messages consistent
  EDAC/mc_sysfs: Remove pointless gotos
  EDAC: Prefer 'unsigned int' to bare use of 'unsigned'
  EDAC/amd64: Support asymmetric dual-rank DIMMs
  EDAC/amd64: Cache secondary Chip Select registers
  EDAC/amd64: Decode syndrome before translating address
  EDAC/amd64: Find Chip Select memory size using Address Mask
  EDAC/amd64: Initialize DIMM info for systems with more than two channels
  EDAC/amd64: Recognize DRAM device type ECC capability
  EDAC/amd64: Support more than two controllers for chip selects handling
  EDAC/mc: Cleanup _edac_mc_free() code
  EDAC, pnd2: Fix ioremap() size in dnv_rd_reg()
  EDAC, mellanox: Add ECC support for BlueField DDR4
  EDAC/altera: Use the proper type for the IRQ status bits
  EDAC/mc: Fix grain_bits calculation
  edac: altera: Move Stratix10 SDRAM ECC to peripheral
  MAINTAINERS: update EDAC entry to reflect current tree and maintainers
parents a7bd4bcf 3e443eb3
......@@ -5761,6 +5761,11 @@ S: Supported
F: drivers/edac/aspeed_edac.c
F: Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt
EDAC-BLUEFIELD
M: Shravan Kumar Ramani <sramani@mellanox.com>
S: Supported
F: drivers/edac/bluefield_edac.c
EDAC-CALXEDA
M: Robert Richter <rric@kernel.org>
L: linux-edac@vger.kernel.org
......@@ -5785,10 +5790,11 @@ F: drivers/edac/thunderx_edac*
EDAC-CORE
M: Borislav Petkov <bp@alien8.de>
M: Mauro Carvalho Chehab <mchehab@kernel.org>
M: Tony Luck <tony.luck@intel.com>
R: James Morse <james.morse@arm.com>
R: Robert Richter <rrichter@marvell.com>
L: linux-edac@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
S: Supported
F: Documentation/admin-guide/ras.rst
F: Documentation/driver-api/edac.rst
......
......@@ -510,4 +510,11 @@ config EDAC_ASPEED
First, ECC must be configured in the bootloader. Then, this driver
will expose error counters via the EDAC kernel framework.
config EDAC_BLUEFIELD
tristate "Mellanox BlueField Memory ECC"
depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST)
help
Support for error detection and correction on the
Mellanox BlueField SoCs.
endif # EDAC
......@@ -85,3 +85,4 @@ obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
obj-$(CONFIG_EDAC_TI) += ti_edac.o
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
......@@ -222,7 +222,6 @@ static unsigned long get_total_mem(void)
static const struct of_device_id altr_sdram_ctrl_of_match[] = {
{ .compatible = "altr,sdram-edac", .data = &c5_data},
{ .compatible = "altr,sdram-edac-a10", .data = &a10_data},
{ .compatible = "altr,sdram-edac-s10", .data = &a10_data},
{},
};
MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
......@@ -1170,6 +1169,24 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
return 0;
}
/*********************** SDRAM EDAC Device Functions *********************/
#ifdef CONFIG_EDAC_ALTERA_SDRAM
static const struct edac_device_prv_data s10_sdramecc_data = {
.setup = altr_check_ecc_deps,
.ce_clear_mask = ALTR_S10_ECC_SERRPENA,
.ue_clear_mask = ALTR_S10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_S10_ECC_EN,
.ecc_en_ofst = ALTR_S10_ECC_CTRL_SDRAM_OFST,
.ce_set_mask = ALTR_S10_ECC_TSERRA,
.ue_set_mask = ALTR_S10_ECC_TDERRA,
.set_err_ofst = ALTR_S10_ECC_INTTEST_OFST,
.ecc_irq_handler = altr_edac_a10_ecc_irq,
.inject_fops = &altr_edac_a10_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_SDRAM */
/*********************** OCRAM EDAC Device Functions *********************/
#ifdef CONFIG_EDAC_ALTERA_OCRAM
......@@ -1758,6 +1775,9 @@ static const struct of_device_id altr_edac_a10_device_of_match[] = {
#endif
#ifdef CONFIG_EDAC_ALTERA_SDMMC
{ .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data },
#endif
#ifdef CONFIG_EDAC_ALTERA_SDRAM
{ .compatible = "altr,sdram-edac-s10", .data = &s10_sdramecc_data },
#endif
{},
};
......@@ -1866,6 +1886,7 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc)
struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
int irq = irq_desc_get_irq(desc);
unsigned long bits;
dberr = (irq == edac->db_irq) ? 1 : 0;
sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST :
......@@ -1875,7 +1896,8 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc)
regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
for_each_set_bit(bit, (unsigned long *)&irq_status, 32) {
bits = irq_status;
for_each_set_bit(bit, &bits, 32) {
irq = irq_linear_revmap(edac->domain, dberr * 32 + bit);
if (irq)
generic_handle_irq(irq);
......@@ -1889,6 +1911,10 @@ static int validate_parent_available(struct device_node *np)
struct device_node *parent;
int ret = 0;
/* SDRAM must be present for Linux (implied parent) */
if (of_device_is_compatible(np, "altr,sdram-edac-s10"))
return 0;
/* Ensure parent device is enabled if parent node exists */
parent = of_parse_phandle(np, "altr,ecc-parent", 0);
if (parent && !of_device_is_available(parent))
......@@ -1898,6 +1924,22 @@ static int validate_parent_available(struct device_node *np)
return ret;
}
static int get_s10_sdram_edac_resource(struct device_node *np,
struct resource *res)
{
struct device_node *parent;
int ret;
parent = of_parse_phandle(np, "altr,sdr-syscon", 0);
if (!parent)
return -ENODEV;
ret = of_address_to_resource(parent, 0, res);
of_node_put(parent);
return ret;
}
static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
struct device_node *np)
{
......@@ -1925,7 +1967,11 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
return -ENOMEM;
rc = of_address_to_resource(np, 0, &res);
if (of_device_is_compatible(np, "altr,sdram-edac-s10"))
rc = get_s10_sdram_edac_resource(np, &res);
else
rc = of_address_to_resource(np, 0, &res);
if (rc < 0) {
edac_printk(KERN_ERR, EDAC_DEVICE,
"%s: no resource address\n", ecc_name);
......@@ -2231,13 +2277,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
of_device_is_compatible(child, "altr,socfpga-dma-ecc") ||
of_device_is_compatible(child, "altr,socfpga-usb-ecc") ||
of_device_is_compatible(child, "altr,socfpga-qspi-ecc") ||
#ifdef CONFIG_EDAC_ALTERA_SDRAM
of_device_is_compatible(child, "altr,sdram-edac-s10") ||
#endif
of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc"))
altr_edac_a10_device_add(edac, child);
#ifdef CONFIG_EDAC_ALTERA_SDRAM
else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) ||
(of_device_is_compatible(child, "altr,sdram-edac-s10")))
else if (of_device_is_compatible(child, "altr,sdram-edac-a10"))
of_platform_populate(pdev->dev.of_node,
altr_sdram_ctrl_of_match,
NULL, &pdev->dev);
......
......@@ -289,6 +289,29 @@ struct altr_sdram_mc_data {
#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000
/************* Stratix10 Defines **************/
#define ALTR_S10_ECC_CTRL_SDRAM_OFST 0x00
#define ALTR_S10_ECC_EN BIT(0)
#define ALTR_S10_ECC_ERRINTEN_OFST 0x10
#define ALTR_S10_ECC_ERRINTENS_OFST 0x14
#define ALTR_S10_ECC_ERRINTENR_OFST 0x18
#define ALTR_S10_ECC_SERRINTEN BIT(0)
#define ALTR_S10_ECC_INTMODE_OFST 0x1C
#define ALTR_S10_ECC_INTMODE BIT(0)
#define ALTR_S10_ECC_INTSTAT_OFST 0x20
#define ALTR_S10_ECC_SERRPENA BIT(0)
#define ALTR_S10_ECC_DERRPENA BIT(8)
#define ALTR_S10_ECC_ERRPENA_MASK (ALTR_S10_ECC_SERRPENA | \
ALTR_S10_ECC_DERRPENA)
#define ALTR_S10_ECC_INTTEST_OFST 0x24
#define ALTR_S10_ECC_TSERRA BIT(0)
#define ALTR_S10_ECC_TDERRA BIT(8)
#define ALTR_S10_ECC_TSERRB BIT(16)
#define ALTR_S10_ECC_TDERRB BIT(24)
#define ALTR_S10_DERR_ADDRA_OFST 0x2C
/* Stratix10 ECC Manager Defines */
......@@ -300,7 +323,7 @@ struct altr_sdram_mc_data {
#define S10_SYSMGR_UE_ADDR_OFST 0x224
#define S10_DDR0_IRQ_MASK BIT(16)
#define S10_DBE_IRQ_MASK 0x3FE
#define S10_DBE_IRQ_MASK 0x3FFFE
/* Define ECC Block Offsets for peripherals */
#define ECC_BLK_ADDRESS_OFST 0x40
......
This diff is collapsed.
......@@ -96,6 +96,7 @@
/* Hardware limit on ChipSelect rows per MC and processors per system */
#define NUM_CHIPSELECTS 8
#define DRAM_RANGES 8
#define NUM_CONTROLLERS 8
#define ON true
#define OFF false
......@@ -119,6 +120,8 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
/*
* Function 1 - Address Map
......@@ -168,7 +171,8 @@
#define DCSM0 0x60
#define DCSM1 0x160
#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE)
#define DRAM_CONTROL 0x78
......@@ -258,7 +262,9 @@
/* UMC CH register offsets */
#define UMCCH_BASE_ADDR 0x0
#define UMCCH_BASE_ADDR_SEC 0x10
#define UMCCH_ADDR_MASK 0x20
#define UMCCH_ADDR_MASK_SEC 0x28
#define UMCCH_ADDR_CFG 0x30
#define UMCCH_DIMM_CFG 0x80
#define UMCCH_UMC_CFG 0x100
......@@ -285,6 +291,7 @@ enum amd_families {
F17_CPUS,
F17_M10H_CPUS,
F17_M30H_CPUS,
F17_M70H_CPUS,
NUM_FAMILIES,
};
......@@ -311,9 +318,11 @@ struct dram_range {
/* A DCT chip selects collection */
struct chip_select {
u32 csbases[NUM_CHIPSELECTS];
u32 csbases_sec[NUM_CHIPSELECTS];
u8 b_cnt;
u32 csmasks[NUM_CHIPSELECTS];
u32 csmasks_sec[NUM_CHIPSELECTS];
u8 m_cnt;
};
......@@ -351,8 +360,8 @@ struct amd64_pvt {
u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
/* one for each DCT */
struct chip_select csels[2];
/* one for each DCT/UMC */
struct chip_select csels[NUM_CONTROLLERS];
/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
struct dram_range ranges[DRAM_RANGES];
......
// SPDX-License-Identifier: GPL-2.0
/*
* Bluefield-specific EDAC driver.
*
* Copyright (c) 2019 Mellanox Technologies.
*/
#include <linux/acpi.h>
#include <linux/arm-smccc.h>
#include <linux/bitfield.h>
#include <linux/edac.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include "edac_module.h"
#define DRIVER_NAME "bluefield-edac"
/*
* Mellanox BlueField EMI (External Memory Interface) register definitions.
*/
#define MLXBF_ECC_CNT 0x340
#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
#define MLXBF_ECC_ERR 0x348
#define MLXBF_ECC_ERR__SECC BIT(0)
#define MLXBF_ECC_ERR__DECC BIT(16)
#define MLXBF_ECC_LATCH_SEL 0x354
#define MLXBF_ECC_LATCH_SEL__START BIT(24)
#define MLXBF_ERR_ADDR_0 0x358
#define MLXBF_ERR_ADDR_1 0x37c
#define MLXBF_SYNDROM 0x35c
#define MLXBF_SYNDROM__DERR BIT(0)
#define MLXBF_SYNDROM__SERR BIT(1)
#define MLXBF_SYNDROM__SYN GENMASK(25, 16)
#define MLXBF_ADD_INFO 0x364
#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
#define MLXBF_EDAC_MAX_DIMM_PER_MC 2
#define MLXBF_EDAC_ERROR_GRAIN 8
/*
* Request MLNX_SIP_GET_DIMM_INFO
*
* Retrieve information about DIMM on a certain slot.
*
* Call register usage:
* a0: MLNX_SIP_GET_DIMM_INFO
* a1: (Memory controller index) << 16 | (Dimm index in memory controller)
* a2-7: not used.
*
* Return status:
* a0: MLXBF_DIMM_INFO defined below describing the DIMM.
* a1-3: not used.
*/
#define MLNX_SIP_GET_DIMM_INFO 0x82000008
/* Format for the SMC response about the memory information */
#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
struct bluefield_edac_priv {
int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
void __iomem *emi_base;
int dimm_per_mc;
};
static u64 smc_call1(u64 smc_op, u64 smc_arg)
{
struct arm_smccc_res res;
arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
/*
* Gather the ECC information from the External Memory Interface registers
* and report it to the edac handler.
*/
static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
int error_cnt,
int is_single_ecc)
{
struct bluefield_edac_priv *priv = mci->pvt_info;
u32 dram_additional_info, err_prank, edea0, edea1;
u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
enum hw_event_mc_err_type ecc_type;
u64 ecc_dimm_addr;
int ecc_dimm;
ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
HW_EVENT_ERR_UNCORRECTED;
/*
* Tell the External Memory Interface to populate the relevant
* registers with information about the last ECC error occurrence.
*/
ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL);
/*
* Verify that the ECC reported info in the registers is of the
* same type as the one asked to report. If not, just report the
* error without the detailed information.
*/
dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM);
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) {
edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
0, 0, -1, mci->ctl_name, "");
return;
}
dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO);
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0);
edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1);
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
edac_mc_handle_error(ecc_type, mci, error_cnt,
PFN_DOWN(ecc_dimm_addr),
offset_in_page(ecc_dimm_addr),
syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
}
static void bluefield_edac_check(struct mem_ctl_info *mci)
{
struct bluefield_edac_priv *priv = mci->pvt_info;
u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
/*
* The memory controller might not be initialized by the firmware
* when there isn't memory, which may lead to bad register readings.
*/
if (mci->edac_cap == EDAC_FLAG_NONE)
return;
ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT);
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
if (single_error_count) {
ecc_error |= MLXBF_ECC_ERR__SECC;
bluefield_gather_report_ecc(mci, single_error_count, 1);
}
if (double_error_count) {
ecc_error |= MLXBF_ECC_ERR__DECC;
bluefield_gather_report_ecc(mci, double_error_count, 0);
}
/* Write to clear reported errors. */
if (ecc_count)
writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR);
}
/* Initialize the DIMMs information for the given memory controller. */
static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
{
struct bluefield_edac_priv *priv = mci->pvt_info;
int mem_ctrl_idx = mci->mc_idx;
struct dimm_info *dimm;
u64 smc_info, smc_arg;
int is_empty = 1, i;
for (i = 0; i < priv->dimm_per_mc; i++) {
dimm = mci->dimms[i];
smc_arg = mem_ctrl_idx << 16 | i;
smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg);
if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
dimm->mtype = MEM_EMPTY;
continue;
}
is_empty = 0;
dimm->edac_mode = EDAC_SECDED;
if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info))
dimm->mtype = MEM_NVDIMM;
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info))
dimm->mtype = MEM_LRDDR4;
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info))
dimm->mtype = MEM_RDDR4;
else
dimm->mtype = MEM_DDR4;
dimm->nr_pages =
FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) *
(SZ_1G / PAGE_SIZE);
dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
/* Mem controller for BlueField only supports x4, x8 and x16 */
switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) {
case 4:
dimm->dtype = DEV_X4;
break;
case 8:
dimm->dtype = DEV_X8;
break;
case 16:
dimm->dtype = DEV_X16;
break;
default:
dimm->dtype = DEV_UNKNOWN;
}
priv->dimm_ranks[i] =
FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info);
}
if (is_empty)
mci->edac_cap = EDAC_FLAG_NONE;
else
mci->edac_cap = EDAC_FLAG_SECDED;
}
static int bluefield_edac_mc_probe(struct platform_device *pdev)
{
struct bluefield_edac_priv *priv;
struct device *dev = &pdev->dev;
struct edac_mc_layer layers[1];
struct mem_ctl_info *mci;
struct resource *emi_res;
unsigned int mc_idx, dimm_count;
int rc, ret;
/* Read the MSS (Memory SubSystem) index from ACPI table. */
if (device_property_read_u32(dev, "mss_number", &mc_idx)) {
dev_warn(dev, "bf_edac: MSS number unknown\n");
return -EINVAL;
}
/* Read the DIMMs per MC from ACPI table. */
if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) {
dev_warn(dev, "bf_edac: DIMMs per MC unknown\n");
return -EINVAL;
}
if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) {
dev_warn(dev, "bf_edac: DIMMs per MC not valid\n");
return -EINVAL;
}
emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!emi_res)
return -EINVAL;
layers[0].type = EDAC_MC_LAYER_SLOT;
layers[0].size = dimm_count;
layers[0].is_virt_csrow = true;
mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv));
if (!mci)
return -ENOMEM;
priv = mci->pvt_info;
priv->dimm_per_mc = dimm_count;
priv->emi_base = devm_ioremap_resource(dev, emi_res);
if (IS_ERR(priv->emi_base)) {
dev_err(dev, "failed to map EMI IO resource\n");
ret = PTR_ERR(priv->emi_base);
goto err;
}
mci->pdev = dev;
mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->mod_name = DRIVER_NAME;
mci->ctl_name = "BlueField_Memory_Controller";
mci->dev_name = dev_name(dev);
mci->edac_check = bluefield_edac_check;
/* Initialize mci with the actual populated DIMM information. */
bluefield_edac_init_dimms(mci);
platform_set_drvdata(pdev, mci);
/* Register with EDAC core */
rc = edac_mc_add_mc(mci);
if (rc) {
dev_err(dev, "failed to register with EDAC core\n");
ret = rc;
goto err;
}
/* Only POLL mode supported so far. */
edac_op_state = EDAC_OPSTATE_POLL;
return 0;
err:
edac_mc_free(mci);
return ret;
}
static int bluefield_edac_mc_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
edac_mc_del_mc(&pdev->dev);
edac_mc_free(mci);
return 0;
}
static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
{"MLNXBF08", 0},
{}
};
MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids);
static struct platform_driver bluefield_edac_mc_driver = {
.driver = {
.name = DRIVER_NAME,
.acpi_match_table = bluefield_mc_acpi_ids,
},
.probe = bluefield_edac_mc_probe,
.remove = bluefield_edac_mc_remove,
};
module_platform_driver(bluefield_edac_mc_driver);
MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
MODULE_AUTHOR("Mellanox Technologies");
MODULE_LICENSE("GPL v2");
......@@ -114,8 +114,8 @@ static const struct kernel_param_ops edac_report_ops = {
module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
unsigned len)
unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
unsigned int len)
{
struct mem_ctl_info *mci = dimm->mci;
int i, n, count = 0;
......@@ -236,9 +236,9 @@ EXPORT_SYMBOL_GPL(edac_mem_types);
* At return, the pointer 'p' will be incremented to be used on a next call
* to this function.
*/
void *edac_align_ptr(void **p, unsigned size, int n_elems)
void *edac_align_ptr(void **p, unsigned int size, int n_elems)
{
unsigned align, r;
unsigned int align, r;
void *ptr = *p;
*p += size * n_elems;
......@@ -275,38 +275,37 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems)
static void _edac_mc_free(struct mem_ctl_info *mci)
{
int i, chn, row;
struct csrow_info *csr;
const unsigned int tot_dimms = mci->tot_dimms;
const unsigned int tot_channels = mci->num_cschannel;
const unsigned int tot_csrows = mci->nr_csrows;
int i, chn, row;
if (mci->dimms) {
for (i = 0; i < tot_dimms; i++)
for (i = 0; i < mci->tot_dimms; i++)
kfree(mci->dimms[i]);
kfree(mci->dimms);
}
if (mci->csrows) {
for (row = 0; row < tot_csrows; row++) {
for (row = 0; row < mci->nr_csrows; row++) {
csr = mci->csrows[row];
if (csr) {
if (csr->channels) {
for (chn = 0; chn < tot_channels; chn++)
kfree(csr->channels[chn]);
kfree(csr->channels);
}
kfree(csr);
if (!csr)
continue;
if (csr->channels) {
for (chn = 0; chn < mci->num_cschannel; chn++)
kfree(csr->channels[chn]);
kfree(csr->channels);
}
kfree(csr);
}
kfree(mci->csrows);
}
kfree(mci);
}
struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
unsigned n_layers,
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
unsigned int n_layers,
struct edac_mc_layer *layers,
unsigned sz_pvt)
unsigned int sz_pvt)
{
struct mem_ctl_info *mci;
struct edac_mc_layer *layer;
......@@ -314,9 +313,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
struct rank_info *chan;
struct dimm_info *dimm;
u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
unsigned pos[EDAC_MAX_LAYERS];
unsigned size, tot_dimms = 1, count = 1;
unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
unsigned int pos[EDAC_MAX_LAYERS];
unsigned int size, tot_dimms = 1, count = 1;
unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
void *pvt, *p, *ptr = NULL;
int i, j, row, chn, n, len, off;
bool per_rank = false;
......@@ -1235,9 +1234,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
if (p > e->location)
*(p - 1) = '\0';
/* Report the error via the trace interface */
grain_bits = fls_long(e->grain) + 1;
/* Sanity-check driver-supplied grain value. */
if (WARN_ON_ONCE(!e->grain))
e->grain = 1;
grain_bits = fls_long(e->grain - 1);
/* Report the error via the trace interface */
if (IS_ENABLED(CONFIG_RAS))
trace_mc_event(type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer,
......
......@@ -122,10 +122,10 @@ do { \
* On success, return a pointer to struct mem_ctl_info pointer;
* %NULL otherwise
*/
struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
unsigned n_layers,
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
unsigned int n_layers,
struct edac_mc_layer *layers,
unsigned sz_pvt);
unsigned int sz_pvt);
/**
* edac_get_owner - Return the owner's mod_name of EDAC MC
......
......@@ -131,7 +131,7 @@ static const char * const edac_caps[] = {
struct dev_ch_attribute {
struct device_attribute attr;
int channel;
unsigned int channel;
};
#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \
......@@ -200,7 +200,7 @@ static ssize_t channel_dimm_label_show(struct device *dev,
char *data)
{
struct csrow_info *csrow = to_csrow(dev);
unsigned chan = to_channel(mattr);
unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
/* if field has not been initialized, there is nothing to send */
......@@ -216,7 +216,7 @@ static ssize_t channel_dimm_label_store(struct device *dev,
const char *data, size_t count)
{
struct csrow_info *csrow = to_csrow(dev);
unsigned chan = to_channel(mattr);
unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
size_t copy_count = count;
......@@ -240,7 +240,7 @@ static ssize_t channel_ce_count_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
unsigned chan = to_channel(mattr);
unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
return sprintf(data, "%u\n", rank->ce_count);
......@@ -278,7 +278,7 @@ static void csrow_attr_release(struct device *dev)
{
struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev));
edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(csrow);
}
......@@ -414,14 +414,16 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
dev_set_name(&csrow->dev, "csrow%d", index);
dev_set_drvdata(&csrow->dev, csrow);
edac_dbg(0, "creating (virtual) csrow node %s\n",
dev_name(&csrow->dev));
err = device_add(&csrow->dev);
if (err)
if (err) {
edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev));
put_device(&csrow->dev);
return err;
}
return err;
edac_dbg(0, "device %s created\n", dev_name(&csrow->dev));
return 0;
}
/* Create a CSROW object under specifed edac_mc_device */
......@@ -435,12 +437,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
if (!nr_pages_per_csrow(csrow))
continue;
err = edac_create_csrow_object(mci, mci->csrows[i], i);
if (err < 0) {
edac_dbg(1,
"failure: create csrow objects for csrow %d\n",
i);
if (err < 0)
goto error;
}
}
return 0;
......@@ -624,7 +622,7 @@ static void dimm_attr_release(struct device *dev)
{
struct dimm_info *dimm = container_of(dev, struct dimm_info, dev);
edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev));
edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(dimm);
}
......@@ -653,12 +651,21 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
pm_runtime_forbid(&mci->dev);
err = device_add(&dimm->dev);
if (err)
if (err) {
edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev));
put_device(&dimm->dev);
return err;
}
edac_dbg(0, "created rank/dimm device %s\n", dev_name(&dimm->dev));
if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
char location[80];
return err;
edac_dimm_info_location(dimm, location, sizeof(location));
edac_dbg(0, "device %s created at location %s\n",
dev_name(&dimm->dev), location);
}
return 0;
}
/*
......@@ -901,7 +908,7 @@ static void mci_attr_release(struct device *dev)
{
struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev));
edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(mci);
}
......@@ -933,14 +940,15 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
dev_set_drvdata(&mci->dev, mci);
pm_runtime_forbid(&mci->dev);
edac_dbg(0, "creating device %s\n", dev_name(&mci->dev));
err = device_add(&mci->dev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
put_device(&mci->dev);
goto out;
return err;
}
edac_dbg(0, "device %s created\n", dev_name(&mci->dev));
/*
* Create the dimm/rank devices
*/
......@@ -950,22 +958,9 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
if (!dimm->nr_pages)
continue;
#ifdef CONFIG_EDAC_DEBUG
edac_dbg(1, "creating dimm%d, located at ", i);
if (edac_debug_level >= 1) {
int lay;
for (lay = 0; lay < mci->n_layers; lay++)
printk(KERN_CONT "%s %d ",
edac_layer_name[mci->layers[lay].type],
dimm->location[lay]);
printk(KERN_CONT "\n");
}
#endif
err = edac_create_dimm_object(mci, dimm, i);
if (err) {
edac_dbg(1, "failure: create dimm %d obj\n", i);
if (err)
goto fail_unregister_dimm;
}
}
#ifdef CONFIG_EDAC_LEGACY_SYSFS
......@@ -987,7 +982,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
}
device_unregister(&mci->dev);
out:
return err;
}
......@@ -1011,14 +1005,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
struct dimm_info *dimm = mci->dimms[i];
if (dimm->nr_pages == 0)
continue;
edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev));
edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
device_unregister(&dimm->dev);
}
}
void edac_unregister_sysfs(struct mem_ctl_info *mci)
{
edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev));
device_unregister(&mci->dev);
}
......@@ -1029,7 +1023,7 @@ static void mc_attr_release(struct device *dev)
* parent device, used to create the /sys/devices/mc sysfs node.
* So, there are no attributes on it.
*/
edac_dbg(1, "Releasing device %s\n", dev_name(dev));
edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(dev);
}
......@@ -1044,10 +1038,8 @@ int __init edac_mc_sysfs_init(void)
int err;
mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
if (!mci_pdev) {
err = -ENOMEM;
goto out;
}
if (!mci_pdev)
return -ENOMEM;
mci_pdev->bus = edac_get_sysfs_subsys();
mci_pdev->type = &mc_attr_type;
......@@ -1055,17 +1047,15 @@ int __init edac_mc_sysfs_init(void)
dev_set_name(mci_pdev, "mc");
err = device_add(mci_pdev);
if (err < 0)
goto out_put_device;
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
put_device(mci_pdev);
return err;
}
edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
return 0;
out_put_device:
put_device(mci_pdev);
out:
return err;
}
void edac_mc_sysfs_exit(void)
......
......@@ -68,7 +68,7 @@ struct memdev_dmi_entry {
struct ghes_edac_dimm_fill {
struct mem_ctl_info *mci;
unsigned count;
unsigned int count;
};
static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
......
......@@ -417,7 +417,8 @@ static const char *i5100_err_msg(unsigned err)
}
/* convert csrow index into a rank (per channel -- 0..5) */
static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci,
unsigned int csrow)
{
const struct i5100_priv *priv = mci->pvt_info;
......@@ -425,7 +426,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
}
/* convert csrow index into a channel (0..1) */
static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow)
static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci,
unsigned int csrow)
{
const struct i5100_priv *priv = mci->pvt_info;
......@@ -653,11 +655,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor,
return ret;
}
static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow)
static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow)
{
struct i5100_priv *priv = mci->pvt_info;
const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow);
const unsigned chan = i5100_csrow_to_chan(mci, csrow);
const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow);
const unsigned int chan = i5100_csrow_to_chan(mci, csrow);
unsigned addr_lines;
/* dimm present? */
......@@ -852,8 +854,8 @@ static void i5100_init_csrows(struct mem_ctl_info *mci)
for (i = 0; i < mci->tot_dimms; i++) {
struct dimm_info *dimm;
const unsigned long npages = i5100_npages(mci, i);
const unsigned chan = i5100_csrow_to_chan(mci, i);
const unsigned rank = i5100_csrow_to_rank(mci, i);
const unsigned int chan = i5100_csrow_to_chan(mci, i);
const unsigned int rank = i5100_csrow_to_rank(mci, i);
if (!npages)
continue;
......
......@@ -260,11 +260,14 @@ static u64 get_sideband_reg_base_addr(void)
}
}
#define DNV_MCHBAR_SIZE 0x8000
#define DNV_SB_PORT_SIZE 0x10000
static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
{
struct pci_dev *pdev;
char *base;
u64 addr;
unsigned long size;
if (op == 4) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL);
......@@ -279,15 +282,17 @@ static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *na
addr = get_mem_ctrl_hub_base_addr();
if (!addr)
return -ENODEV;
size = DNV_MCHBAR_SIZE;
} else {
/* MMIO via sideband register base address */
addr = get_sideband_reg_base_addr();
if (!addr)
return -ENODEV;
addr += (port << 16);
size = DNV_SB_PORT_SIZE;
}
base = ioremap((resource_size_t)addr, 0x10000);
base = ioremap((resource_size_t)addr, size);
if (!base)
return -ENODEV;
......
......@@ -440,7 +440,7 @@ struct dimm_info {
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
/* Memory location data */
unsigned location[EDAC_MAX_LAYERS];
unsigned int location[EDAC_MAX_LAYERS];
struct mem_ctl_info *mci; /* the parent */
......@@ -451,7 +451,7 @@ struct dimm_info {
u32 nr_pages; /* number of pages on this dimm */
unsigned csrow, cschannel; /* Points to the old API data */
unsigned int csrow, cschannel; /* Points to the old API data */
u16 smbios_handle; /* Handle for SMBIOS type 17 */
};
......@@ -597,7 +597,7 @@ struct mem_ctl_info {
unsigned long page);
int mc_idx;
struct csrow_info **csrows;
unsigned nr_csrows, num_cschannel;
unsigned int nr_csrows, num_cschannel;
/*
* Memory Controller hierarchy
......@@ -608,14 +608,14 @@ struct mem_ctl_info {
* of the recent drivers enumerate memories per DIMM, instead.
* When the memory controller is per rank, csbased is true.
*/
unsigned n_layers;
unsigned int n_layers;
struct edac_mc_layer *layers;
bool csbased;
/*
* DIMM info. Will eventually remove the entire csrows_info some day
*/
unsigned tot_dimms;
unsigned int tot_dimms;
struct dimm_info **dimms;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment