Commit 0b21f21a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull more EDAC updates from Borislav Petkov:
 "The second part of the EDAC pile which contains the ADXL user and a
  build fix which addresses a not-so-sensical .config but fixes
  randconfig builds people do:

   - skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

   - ACPI_ADXL build fix"

[ I don't think "sensical" is a word, particularly when used in the
  context of actually meaning "nonsensical", but I like it   - Linus ]

* tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, skx: Fix randconfig builds
  EDAC, skx_edac: Add address translation for non-volatile DIMMs
parents 54480aa7 a324e939
...@@ -234,6 +234,7 @@ config EDAC_SKX ...@@ -234,6 +234,7 @@ config EDAC_SKX
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
select DMI select DMI
select ACPI_ADXL if ACPI
help help
Support for error detection and correction the Intel Support for error detection and correction the Intel
Skylake server Integrated Memory Controllers. If your Skylake server Integrated Memory Controllers. If your
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/mod_devicetable.h> #include <linux/mod_devicetable.h>
#include <linux/adxl.h>
#include <acpi/nfit.h> #include <acpi/nfit.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
...@@ -35,6 +36,7 @@ ...@@ -35,6 +36,7 @@
#include "edac_module.h" #include "edac_module.h"
#define EDAC_MOD_STR "skx_edac" #define EDAC_MOD_STR "skx_edac"
#define MSG_SIZE 1024
/* /*
* Debug macros * Debug macros
...@@ -54,6 +56,29 @@ ...@@ -54,6 +56,29 @@
static LIST_HEAD(skx_edac_list); static LIST_HEAD(skx_edac_list);
static u64 skx_tolm, skx_tohm; static u64 skx_tolm, skx_tohm;
static char *skx_msg;
static unsigned int nvdimm_count;
enum {
INDEX_SOCKET,
INDEX_MEMCTRL,
INDEX_CHANNEL,
INDEX_DIMM,
INDEX_MAX
};
static const char * const component_names[] = {
[INDEX_SOCKET] = "ProcessorSocketId",
[INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
#define NUM_IMC 2 /* memory controllers per socket */ #define NUM_IMC 2 /* memory controllers per socket */
#define NUM_CHANNELS 3 /* channels per memory controller */ #define NUM_CHANNELS 3 /* channels per memory controller */
...@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, ...@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
u16 flags; u16 flags;
u64 size = 0; u64 size = 0;
nvdimm_count++;
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc, dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0); imc->src_id, 0);
...@@ -941,12 +968,46 @@ static void teardown_skx_debug(void) ...@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
} }
#endif /*CONFIG_EDAC_DEBUG*/ #endif /*CONFIG_EDAC_DEBUG*/
static bool skx_adxl_decode(struct decoded_addr *res)
{
int i, len = 0;
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
res->addr < BIT_ULL(32))) {
edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
return false;
}
if (adxl_decode(res->addr, adxl_values)) {
edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
return false;
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
adxl_component_names[i], adxl_values[i]);
if (MSG_SIZE - len <= 0)
break;
}
return true;
}
static void skx_mce_output_error(struct mem_ctl_info *mci, static void skx_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m, const struct mce *m,
struct decoded_addr *res) struct decoded_addr *res)
{ {
enum hw_event_mc_err_type tp_event; enum hw_event_mc_err_type tp_event;
char *type, *optype, msg[256]; char *type, *optype;
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62); bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
...@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, ...@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break; break;
} }
} }
if (adxl_component_count) {
snprintf(msg, sizeof(msg), snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg);
} else {
snprintf(skx_msg, MSG_SIZE,
"%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
overflow ? " OVERFLOW" : "", overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "", (uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, mscod, errcode,
res->socket, res->imc, res->rank, res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column); res->bank_group, res->bank_address, res->row, res->column);
}
edac_dbg(0, "%s\n", msg); edac_dbg(0, "%s\n", skx_msg);
/* Call the helper to output message */ /* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt, edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
res->channel, res->dimm, -1, res->channel, res->dimm, -1,
optype, msg); optype, skx_msg);
}
static struct mem_ctl_info *get_mci(int src_id, int lmc)
{
struct skx_dev *d;
if (lmc > NUM_IMC - 1) {
skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
return NULL;
}
list_for_each_entry(d, &skx_edac_list, list) {
if (d->imc[0].src_id == src_id)
return d->imc[lmc].mci;
}
skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
return NULL;
} }
static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
...@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE; return NOTIFY_DONE;
memset(&res, 0, sizeof(res));
res.addr = mce->addr; res.addr = mce->addr;
if (adxl_component_count) {
if (!skx_adxl_decode(&res))
return NOTIFY_DONE;
mci = get_mci(res.socket, res.imc);
} else {
if (!skx_decode(&res)) if (!skx_decode(&res))
return NOTIFY_DONE; return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci; mci = res.dev->imc[res.imc].mci;
}
if (!mci)
return NOTIFY_DONE;
if (mce->mcgstatus & MCG_STATUS_MCIP) if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception"; type = "Exception";
...@@ -1094,6 +1193,62 @@ static void skx_remove(void) ...@@ -1094,6 +1193,62 @@ static void skx_remove(void)
} }
} }
static void __init skx_adxl_get(void)
{
const char * const *names;
int i, j;
names = adxl_get_component_names();
if (!names) {
skx_printk(KERN_NOTICE, "No firmware support for address translation.");
skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
return;
}
for (i = 0; i < INDEX_MAX; i++) {
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
break;
}
}
if (!names[j])
goto err;
}
adxl_component_names = names;
while (*names++)
adxl_component_count++;
adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
GFP_KERNEL);
if (!adxl_values) {
adxl_component_count = 0;
return;
}
adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!adxl_msg) {
adxl_component_count = 0;
kfree(adxl_values);
}
return;
err:
skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
component_names[i]);
for (j = 0; names[j]; j++)
skx_printk(KERN_CONT, "%s ", names[j]);
skx_printk(KERN_CONT, "\n");
}
static void __exit skx_adxl_put(void)
{
kfree(adxl_values);
kfree(adxl_msg);
}
/* /*
* skx_init: * skx_init:
* make sure we are running on the correct cpu model * make sure we are running on the correct cpu model
...@@ -1158,6 +1313,15 @@ static int __init skx_init(void) ...@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
} }
} }
skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!skx_msg) {
rc = -ENOMEM;
goto fail;
}
if (nvdimm_count)
skx_adxl_get();
/* Ensure that the OPSTATE is set correctly for POLL or NMI */ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init(); opstate_init();
...@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void) ...@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
edac_dbg(2, "\n"); edac_dbg(2, "\n");
mce_unregister_decode_chain(&skx_mce_dec); mce_unregister_decode_chain(&skx_mce_dec);
skx_remove(); skx_remove();
if (nvdimm_count)
skx_adxl_put();
kfree(skx_msg);
teardown_skx_debug(); teardown_skx_debug();
} }
......
...@@ -7,7 +7,12 @@ ...@@ -7,7 +7,12 @@
#ifndef _LINUX_ADXL_H #ifndef _LINUX_ADXL_H
#define _LINUX_ADXL_H #define _LINUX_ADXL_H
#ifdef CONFIG_ACPI_ADXL
const char * const *adxl_get_component_names(void); const char * const *adxl_get_component_names(void);
int adxl_decode(u64 addr, u64 component_values[]); int adxl_decode(u64 addr, u64 component_values[]);
#else
static inline const char * const *adxl_get_component_names(void) { return NULL; }
static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; }
#endif
#endif /* _LINUX_ADXL_H */ #endif /* _LINUX_ADXL_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment