Commit df5f0f0a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Ingo Molnar:
 "The main changes in this development cycle were:

   - more AMD northbridge support work, mostly in preparation for Fam17h
     CPUs (Yazen Ghannam, Borislav Petkov)

   - cleanups/refactorings and fixes (Borislav Petkov, Tony Luck,
     Yinghai Lu)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Include the PPIN in MCE records when available
  x86/mce/AMD: Add system physical address translation for AMD Fam17h
  x86/amd_nb: Add SMN and Indirect Data Fabric access for AMD Fam17h
  x86/amd_nb: Add Fam17h Data Fabric as "Northbridge"
  x86/amd_nb: Make all exports EXPORT_SYMBOL_GPL
  x86/amd_nb: Make amd_northbridges internal to amd_nb.c
  x86/mce/AMD: Reset Threshold Limit after logging error
  x86/mce/AMD: Fix HWID_MCATYPE calculation by grouping arguments
  x86/MCE: Correct TSC timestamping of error records
  x86/RAS: Hide SMCA bank names
  x86/RAS: Rename smca_bank_names to smca_names
  x86/RAS: Simplify SMCA HWID descriptor struct
  x86/RAS: Simplify SMCA bank descriptor struct
  x86/MCE: Dump MCE to dmesg if no consumers
  x86/RAS: Add TSC timestamp to the injected MCE
  x86/MCE: Do not look at panic_on_oops in the severity grading
parents cbaa1576 3f5a7896
...@@ -1046,7 +1046,7 @@ config X86_MCE_INTEL ...@@ -1046,7 +1046,7 @@ config X86_MCE_INTEL
config X86_MCE_AMD config X86_MCE_AMD
def_bool y def_bool y
prompt "AMD MCE features" prompt "AMD MCE features"
depends on X86_MCE && X86_LOCAL_APIC depends on X86_MCE && X86_LOCAL_APIC && AMD_NB
---help--- ---help---
Additional support for AMD specific MCE features such as Additional support for AMD specific MCE features such as
the DRAM Error Threshold. the DRAM Error Threshold.
......
...@@ -21,6 +21,10 @@ extern int amd_numa_init(void); ...@@ -21,6 +21,10 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int); extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long); extern int amd_set_subcaches(int, unsigned long);
extern int amd_smn_read(u16 node, u32 address, u32 *value);
extern int amd_smn_write(u16 node, u32 address, u32 value);
extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
struct amd_l3_cache { struct amd_l3_cache {
unsigned indices; unsigned indices;
u8 subcaches[4]; u8 subcaches[4];
...@@ -55,6 +59,7 @@ struct threshold_bank { ...@@ -55,6 +59,7 @@ struct threshold_bank {
}; };
struct amd_northbridge { struct amd_northbridge {
struct pci_dev *root;
struct pci_dev *misc; struct pci_dev *misc;
struct pci_dev *link; struct pci_dev *link;
struct amd_l3_cache l3_cache; struct amd_l3_cache l3_cache;
...@@ -66,7 +71,6 @@ struct amd_northbridge_info { ...@@ -66,7 +71,6 @@ struct amd_northbridge_info {
u64 flags; u64 flags;
struct amd_northbridge *nb; struct amd_northbridge *nb;
}; };
extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART BIT(0) #define AMD_NB_GART BIT(0)
#define AMD_NB_L3_INDEX_DISABLE BIT(1) #define AMD_NB_L3_INDEX_DISABLE BIT(1)
...@@ -74,20 +78,9 @@ extern struct amd_northbridge_info amd_northbridges; ...@@ -74,20 +78,9 @@ extern struct amd_northbridge_info amd_northbridges;
#ifdef CONFIG_AMD_NB #ifdef CONFIG_AMD_NB
static inline u16 amd_nb_num(void) u16 amd_nb_num(void);
{ bool amd_nb_has_feature(unsigned int feature);
return amd_northbridges.num; struct amd_northbridge *node_to_amd_nb(int node);
}
static inline bool amd_nb_has_feature(unsigned feature)
{
return ((amd_northbridges.flags & feature) == feature);
}
static inline struct amd_northbridge *node_to_amd_nb(int node)
{
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
{ {
......
...@@ -193,6 +193,7 @@ ...@@ -193,6 +193,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
......
...@@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {} ...@@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {}
#ifdef CONFIG_X86_MCE_AMD #ifdef CONFIG_X86_MCE_AMD
void mce_amd_feature_init(struct cpuinfo_x86 *c); void mce_amd_feature_init(struct cpuinfo_x86 *c);
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
#else #else
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif #endif
int mce_available(struct cpuinfo_x86 *c); int mce_available(struct cpuinfo_x86 *c);
...@@ -356,28 +358,23 @@ enum smca_bank_types { ...@@ -356,28 +358,23 @@ enum smca_bank_types {
N_SMCA_BANK_TYPES N_SMCA_BANK_TYPES
}; };
struct smca_bank_name { #define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
const char *name; /* Short name for sysfs */
const char *long_name; /* Long name for pretty-printing */
};
extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
struct smca_hwid_mcatype { struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */ u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
}; };
struct smca_bank_info { struct smca_bank {
struct smca_hwid_mcatype *type; struct smca_hwid *hwid;
u32 type_instance; /* Instance ID */
u32 id;
}; };
extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; extern struct smca_bank smca_banks[MAX_NR_BANKS];
extern const char *smca_get_long_name(enum smca_bank_types t);
#endif #endif
#endif /* _ASM_X86_MCE_H */ #endif /* _ASM_X86_MCE_H */
...@@ -37,6 +37,10 @@ ...@@ -37,6 +37,10 @@
#define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */ /* Intel MSRs. Some also available on other CPUs */
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
#define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd #define MSR_FSB_FREQ 0x000000cd
......
...@@ -28,6 +28,7 @@ struct mce { ...@@ -28,6 +28,7 @@ struct mce {
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
}; };
#define MCE_GET_RECORD_LEN _IOR('M', 1, int) #define MCE_GET_RECORD_LEN _IOR('M', 1, int)
......
...@@ -13,8 +13,20 @@ ...@@ -13,8 +13,20 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <asm/amd_nb.h> #include <asm/amd_nb.h>
#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
static u32 *flush_words; static u32 *flush_words;
static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
{}
};
const struct pci_device_id amd_nb_misc_ids[] = { const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
...@@ -24,9 +36,10 @@ const struct pci_device_id amd_nb_misc_ids[] = { ...@@ -24,9 +36,10 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{} {}
}; };
EXPORT_SYMBOL(amd_nb_misc_ids); EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = { static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
...@@ -34,6 +47,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { ...@@ -34,6 +47,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{} {}
}; };
...@@ -44,8 +58,25 @@ const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { ...@@ -44,8 +58,25 @@ const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ } { }
}; };
struct amd_northbridge_info amd_northbridges; static struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
u16 amd_nb_num(void)
{
return amd_northbridges.num;
}
EXPORT_SYMBOL_GPL(amd_nb_num);
bool amd_nb_has_feature(unsigned int feature)
{
return ((amd_northbridges.flags & feature) == feature);
}
EXPORT_SYMBOL_GPL(amd_nb_has_feature);
struct amd_northbridge *node_to_amd_nb(int node)
{
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
EXPORT_SYMBOL_GPL(node_to_amd_nb);
static struct pci_dev *next_northbridge(struct pci_dev *dev, static struct pci_dev *next_northbridge(struct pci_dev *dev,
const struct pci_device_id *ids) const struct pci_device_id *ids)
...@@ -58,13 +89,106 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev, ...@@ -58,13 +89,106 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev,
return dev; return dev;
} }
static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
{
struct pci_dev *root;
int err = -ENODEV;
if (node >= amd_northbridges.num)
goto out;
root = node_to_amd_nb(node)->root;
if (!root)
goto out;
mutex_lock(&smn_mutex);
err = pci_write_config_dword(root, 0x60, address);
if (err) {
pr_warn("Error programming SMN address 0x%x.\n", address);
goto out_unlock;
}
err = (write ? pci_write_config_dword(root, 0x64, *value)
: pci_read_config_dword(root, 0x64, value));
if (err)
pr_warn("Error %s SMN address 0x%x.\n",
(write ? "writing to" : "reading from"), address);
out_unlock:
mutex_unlock(&smn_mutex);
out:
return err;
}
int amd_smn_read(u16 node, u32 address, u32 *value)
{
return __amd_smn_rw(node, address, value, false);
}
EXPORT_SYMBOL_GPL(amd_smn_read);
int amd_smn_write(u16 node, u32 address, u32 value)
{
return __amd_smn_rw(node, address, &value, true);
}
EXPORT_SYMBOL_GPL(amd_smn_write);
/*
* Data Fabric Indirect Access uses FICAA/FICAD.
*
* Fabric Indirect Configuration Access Address (FICAA): Constructed based
* on the device's Instance Id and the PCI function and register offset of
* the desired register.
*
* Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
* and FICAD HI registers but so far we only need the LO register.
*/
int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
{
struct pci_dev *F4;
u32 ficaa;
int err = -ENODEV;
if (node >= amd_northbridges.num)
goto out;
F4 = node_to_amd_nb(node)->link;
if (!F4)
goto out;
ficaa = 1;
ficaa |= reg & 0x3FC;
ficaa |= (func & 0x7) << 11;
ficaa |= instance_id << 16;
mutex_lock(&smn_mutex);
err = pci_write_config_dword(F4, 0x5C, ficaa);
if (err) {
pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
goto out_unlock;
}
err = pci_read_config_dword(F4, 0x98, lo);
if (err)
pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
out_unlock:
mutex_unlock(&smn_mutex);
out:
return err;
}
EXPORT_SYMBOL_GPL(amd_df_indirect_read);
int amd_cache_northbridges(void) int amd_cache_northbridges(void)
{ {
u16 i = 0; u16 i = 0;
struct amd_northbridge *nb; struct amd_northbridge *nb;
struct pci_dev *misc, *link; struct pci_dev *root, *misc, *link;
if (amd_nb_num()) if (amd_northbridges.num)
return 0; return 0;
misc = NULL; misc = NULL;
...@@ -74,15 +198,17 @@ int amd_cache_northbridges(void) ...@@ -74,15 +198,17 @@ int amd_cache_northbridges(void)
if (!i) if (!i)
return -ENODEV; return -ENODEV;
nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); nb = kcalloc(i, sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb) if (!nb)
return -ENOMEM; return -ENOMEM;
amd_northbridges.nb = nb; amd_northbridges.nb = nb;
amd_northbridges.num = i; amd_northbridges.num = i;
link = misc = NULL; link = misc = root = NULL;
for (i = 0; i != amd_nb_num(); i++) { for (i = 0; i != amd_northbridges.num; i++) {
node_to_amd_nb(i)->root = root =
next_northbridge(root, amd_root_ids);
node_to_amd_nb(i)->misc = misc = node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids); next_northbridge(misc, amd_nb_misc_ids);
node_to_amd_nb(i)->link = link = node_to_amd_nb(i)->link = link =
...@@ -139,7 +265,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res) ...@@ -139,7 +265,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
{ {
u32 address; u32 address;
u64 base, msr; u64 base, msr;
unsigned segn_busn_bits; unsigned int segn_busn_bits;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return NULL; return NULL;
...@@ -226,14 +352,14 @@ static void amd_cache_gart(void) ...@@ -226,14 +352,14 @@ static void amd_cache_gart(void)
if (!amd_nb_has_feature(AMD_NB_GART)) if (!amd_nb_has_feature(AMD_NB_GART))
return; return;
flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); flush_words = kmalloc_array(amd_northbridges.num, sizeof(u32), GFP_KERNEL);
if (!flush_words) { if (!flush_words) {
amd_northbridges.flags &= ~AMD_NB_GART; amd_northbridges.flags &= ~AMD_NB_GART;
pr_notice("Cannot initialize GART flush words, GART support disabled\n"); pr_notice("Cannot initialize GART flush words, GART support disabled\n");
return; return;
} }
for (i = 0; i != amd_nb_num(); i++) for (i = 0; i != amd_northbridges.num; i++)
pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]); pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]);
} }
...@@ -246,18 +372,20 @@ void amd_flush_garts(void) ...@@ -246,18 +372,20 @@ void amd_flush_garts(void)
if (!amd_nb_has_feature(AMD_NB_GART)) if (!amd_nb_has_feature(AMD_NB_GART))
return; return;
/* Avoid races between AGP and IOMMU. In theory it's not needed /*
but I'm not sure if the hardware won't lose flush requests * Avoid races between AGP and IOMMU. In theory it's not needed
when another is pending. This whole thing is so expensive anyways * but I'm not sure if the hardware won't lose flush requests
that it doesn't matter to serialize more. -AK */ * when another is pending. This whole thing is so expensive anyways
* that it doesn't matter to serialize more. -AK
*/
spin_lock_irqsave(&gart_lock, flags); spin_lock_irqsave(&gart_lock, flags);
flushed = 0; flushed = 0;
for (i = 0; i < amd_nb_num(); i++) { for (i = 0; i < amd_northbridges.num; i++) {
pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
flush_words[i] | 1); flush_words[i] | 1);
flushed++; flushed++;
} }
for (i = 0; i < amd_nb_num(); i++) { for (i = 0; i < amd_northbridges.num; i++) {
u32 w; u32 w;
/* Make sure the hardware actually executed the flush*/ /* Make sure the hardware actually executed the flush*/
for (;;) { for (;;) {
......
...@@ -311,7 +311,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e ...@@ -311,7 +311,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
*msg = s->msg; *msg = s->msg;
s->covered = 1; s->covered = 1;
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
if (panic_on_oops || tolerant < 1) if (tolerant < 1)
return MCE_PANIC_SEVERITY; return MCE_PANIC_SEVERITY;
} }
return s->sev; return s->sev;
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
#include <asm/intel-family.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/traps.h> #include <asm/traps.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -135,6 +136,9 @@ void mce_setup(struct mce *m) ...@@ -135,6 +136,9 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id; m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid; m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
} }
DEFINE_PER_CPU(struct mce, injectm); DEFINE_PER_CPU(struct mce, injectm);
...@@ -207,8 +211,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log); ...@@ -207,8 +211,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb; static struct notifier_block mce_srao_nb;
static atomic_t num_notifiers;
void mce_register_decode_chain(struct notifier_block *nb) void mce_register_decode_chain(struct notifier_block *nb)
{ {
atomic_inc(&num_notifiers);
/* Ensure SRAO notifier has the highest priority in the decode chain. */ /* Ensure SRAO notifier has the highest priority in the decode chain. */
if (nb != &mce_srao_nb && nb->priority == INT_MAX) if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1; nb->priority -= 1;
...@@ -219,6 +227,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain); ...@@ -219,6 +227,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain);
void mce_unregister_decode_chain(struct notifier_block *nb) void mce_unregister_decode_chain(struct notifier_block *nb)
{ {
atomic_dec(&num_notifiers);
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
} }
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
...@@ -270,12 +280,12 @@ struct mca_msr_regs msr_ops = { ...@@ -270,12 +280,12 @@ struct mca_msr_regs msr_ops = {
.misc = misc_reg .misc = misc_reg
}; };
static void print_mce(struct mce *m) static void __print_mce(struct mce *m)
{ {
int ret = 0; pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu,
pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
m->extcpu, m->mcgstatus, m->bank, m->status); m->mcgstatus, m->bank, m->status);
if (m->ip) { if (m->ip) {
pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
...@@ -308,6 +318,13 @@ static void print_mce(struct mce *m) ...@@ -308,6 +318,13 @@ static void print_mce(struct mce *m)
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
cpu_data(m->extcpu).microcode); cpu_data(m->extcpu).microcode);
}
static void print_mce(struct mce *m)
{
int ret = 0;
__print_mce(m);
/* /*
* Print out human-readable details about the MCE error, * Print out human-readable details about the MCE error,
...@@ -569,6 +586,32 @@ static struct notifier_block mce_srao_nb = { ...@@ -569,6 +586,32 @@ static struct notifier_block mce_srao_nb = {
.priority = INT_MAX, .priority = INT_MAX,
}; };
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *m = (struct mce *)data;
if (!m)
return NOTIFY_DONE;
/*
* Run the default notifier if we have only the SRAO
* notifier and us registered.
*/
if (atomic_read(&num_notifiers) > 2)
return NOTIFY_DONE;
__print_mce(m);
return NOTIFY_DONE;
}
static struct notifier_block mce_default_nb = {
.notifier_call = mce_default_notifier,
/* lowest prio, we want it to run last. */
.priority = 0,
};
/* /*
* Read ADDR and MISC registers. * Read ADDR and MISC registers.
*/ */
...@@ -667,6 +710,15 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -667,6 +710,15 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL); mce_gather_info(&m, NULL);
/*
* m.tsc was set in mce_setup(). Clear it if not requested.
*
* FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid
* that dance.
*/
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
for (i = 0; i < mca_cfg.banks; i++) { for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b)) if (!mce_banks[i].ctl || !test_bit(i, *b))
continue; continue;
...@@ -674,14 +726,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -674,14 +726,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.misc = 0; m.misc = 0;
m.addr = 0; m.addr = 0;
m.bank = i; m.bank = i;
m.tsc = 0;
barrier(); barrier();
m.status = mce_rdmsrl(msr_ops.status(i)); m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL)) if (!(m.status & MCI_STATUS_VAL))
continue; continue;
/* /*
* Uncorrected or signalled events are handled by the exception * Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here. * handler when it is enabled, so don't process those here.
...@@ -696,9 +746,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -696,9 +746,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_read_aux(&m, i); mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
...@@ -1355,7 +1402,7 @@ static void mce_timer_fn(unsigned long data) ...@@ -1355,7 +1402,7 @@ static void mce_timer_fn(unsigned long data)
iv = __this_cpu_read(mce_next_interval); iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) { if (mce_available(this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks)); machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
if (mce_intel_cmci_poll()) { if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv); iv = mce_adjust_timer(iv);
...@@ -2138,6 +2185,7 @@ int __init mcheck_init(void) ...@@ -2138,6 +2185,7 @@ int __init mcheck_init(void)
{ {
mcheck_intel_therm_init(); mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb); mce_register_decode_chain(&mce_srao_nb);
mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity(); mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_process_work); INIT_WORK(&mce_work, mce_process_work);
......
...@@ -69,7 +69,12 @@ static const char * const smca_umc_block_names[] = { ...@@ -69,7 +69,12 @@ static const char * const smca_umc_block_names[] = {
"misc_umc" "misc_umc"
}; };
struct smca_bank_name smca_bank_names[] = { struct smca_bank_name {
const char *name; /* Short name for sysfs */
const char *long_name; /* Long name for pretty-printing */
};
static struct smca_bank_name smca_names[] = {
[SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_LS] = { "load_store", "Load Store Unit" },
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
...@@ -84,9 +89,25 @@ struct smca_bank_name smca_bank_names[] = { ...@@ -84,9 +89,25 @@ struct smca_bank_name smca_bank_names[] = {
[SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_PSP] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" }, [SMCA_SMU] = { "smu", "System Management Unit" },
}; };
EXPORT_SYMBOL_GPL(smca_bank_names);
static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
return NULL;
return smca_names[t].name;
}
const char *smca_get_long_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
return NULL;
return smca_names[t].long_name;
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */ /* { bank_type, hwid_mcatype, xec_bitmap } */
/* ZN Core (HWID=0xB0) MCA types */ /* ZN Core (HWID=0xB0) MCA types */
...@@ -116,7 +137,7 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { ...@@ -116,7 +137,7 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
}; };
struct smca_bank_info smca_banks[MAX_NR_BANKS]; struct smca_bank smca_banks[MAX_NR_BANKS];
EXPORT_SYMBOL_GPL(smca_banks); EXPORT_SYMBOL_GPL(smca_banks);
/* /*
...@@ -142,35 +163,34 @@ static void default_deferred_error_interrupt(void) ...@@ -142,35 +163,34 @@ static void default_deferred_error_interrupt(void)
} }
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
/*
* CPU Initialization
*/
static void get_smca_bank_info(unsigned int bank) static void get_smca_bank_info(unsigned int bank)
{ {
unsigned int i, hwid_mcatype, cpu = smp_processor_id(); unsigned int i, hwid_mcatype, cpu = smp_processor_id();
struct smca_hwid_mcatype *type; struct smca_hwid *s_hwid;
u32 high, instanceId; u32 high, instance_id;
u16 hwid, mcatype;
/* Collect bank_info using CPU 0 for now. */ /* Collect bank_info using CPU 0 for now. */
if (cpu) if (cpu)
return; return;
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) { if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank); pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return; return;
} }
hwid = high & MCI_IPID_HWID; hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
mcatype = (high & MCI_IPID_MCATYPE) >> 16; (high & MCI_IPID_MCATYPE) >> 16);
hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
type = &smca_hwid_mcatypes[i]; s_hwid = &smca_hwid_mcatypes[i];
if (hwid_mcatype == type->hwid_mcatype) { if (hwid_mcatype == s_hwid->hwid_mcatype) {
smca_banks[bank].type = type;
smca_banks[bank].type_instance = instanceId; WARN(smca_banks[bank].hwid,
"Bank %s already initialized!\n",
smca_get_name(s_hwid->bank_type));
smca_banks[bank].hwid = s_hwid;
smca_banks[bank].id = instance_id;
break; break;
} }
} }
...@@ -533,6 +553,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) ...@@ -533,6 +553,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c); deferred_error_interrupt_enable(c);
} }
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
{
u64 dram_base_addr, dram_limit_addr, dram_hole_base;
/* We start from the normalized address */
u64 ret_addr = norm_addr;
u32 tmp;
u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
u8 intlv_addr_sel, intlv_addr_bit;
u8 num_intlv_bits, hashed_bit;
u8 lgcy_mmio_hole_en, base = 0;
u8 cs_mask, cs_id = 0;
bool hash_enabled = false;
/* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
goto out_err;
/* Remove HiAddrOffset from normalized address, if enabled: */
if (tmp & BIT(0)) {
u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
if (norm_addr >= hi_addr_offset) {
ret_addr -= hi_addr_offset;
base = 1;
}
}
/* Read D18F0x110 (DramBaseAddress). */
if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp))
goto out_err;
/* Check if address range is valid. */
if (!(tmp & BIT(0))) {
pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
__func__, tmp);
goto out_err;
}
lgcy_mmio_hole_en = tmp & BIT(1);
intlv_num_chan = (tmp >> 4) & 0xF;
intlv_addr_sel = (tmp >> 8) & 0x7;
dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16;
/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
if (intlv_addr_sel > 3) {
pr_err("%s: Invalid interleave address select %d.\n",
__func__, intlv_addr_sel);
goto out_err;
}
/* Read D18F0x114 (DramLimitAddress). */
if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
goto out_err;
intlv_num_sockets = (tmp >> 8) & 0x1;
intlv_num_dies = (tmp >> 10) & 0x3;
dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
intlv_addr_bit = intlv_addr_sel + 8;
/* Re-use intlv_num_chan by setting it equal to log2(#channels) */
switch (intlv_num_chan) {
case 0: intlv_num_chan = 0; break;
case 1: intlv_num_chan = 1; break;
case 3: intlv_num_chan = 2; break;
case 5: intlv_num_chan = 3; break;
case 7: intlv_num_chan = 4; break;
case 8: intlv_num_chan = 1;
hash_enabled = true;
break;
default:
pr_err("%s: Invalid number of interleaved channels %d.\n",
__func__, intlv_num_chan);
goto out_err;
}
num_intlv_bits = intlv_num_chan;
if (intlv_num_dies > 2) {
pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
__func__, intlv_num_dies);
goto out_err;
}
num_intlv_bits += intlv_num_dies;
/* Add a bit if sockets are interleaved. */
num_intlv_bits += intlv_num_sockets;
/* Assert num_intlv_bits <= 4 */
if (num_intlv_bits > 4) {
pr_err("%s: Invalid interleave bits %d.\n",
__func__, num_intlv_bits);
goto out_err;
}
if (num_intlv_bits > 0) {
u64 temp_addr_x, temp_addr_i, temp_addr_y;
u8 die_id_bit, sock_id_bit, cs_fabric_id;
/*
* Read FabricBlockInstanceInformation3_CS[BlockFabricID].
* This is the fabric id for this coherent slave. Use
* umc/channel# as instance id of the coherent slave
* for FICAA.
*/
if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
goto out_err;
cs_fabric_id = (tmp >> 8) & 0xFF;
die_id_bit = 0;
/* If interleaved over more than 1 channel: */
if (intlv_num_chan) {
die_id_bit = intlv_num_chan;
cs_mask = (1 << die_id_bit) - 1;
cs_id = cs_fabric_id & cs_mask;
}
sock_id_bit = die_id_bit;
/* Read D18F1x208 (SystemFabricIdMask). */
if (intlv_num_dies || intlv_num_sockets)
if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
goto out_err;
/* If interleaved over more than 1 die. */
if (intlv_num_dies) {
sock_id_bit = die_id_bit + intlv_num_dies;
die_id_shift = (tmp >> 24) & 0xF;
die_id_mask = (tmp >> 8) & 0xFF;
cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
}
/* If interleaved over more than 1 socket. */
if (intlv_num_sockets) {
socket_id_shift = (tmp >> 28) & 0xF;
socket_id_mask = (tmp >> 16) & 0xFF;
cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
}
/*
* The pre-interleaved address consists of XXXXXXIIIYYYYY
* where III is the ID for this CS, and XXXXXXYYYYY are the
* address bits from the post-interleaved address.
* "num_intlv_bits" has been calculated to tell us how many "I"
* bits there are. "intlv_addr_bit" tells us how many "Y" bits
* there are (where "I" starts).
*/
temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
temp_addr_i = (cs_id << intlv_addr_bit);
temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
}
/* Add dram base address */
ret_addr += dram_base_addr;
/* If legacy MMIO hole enabled */
if (lgcy_mmio_hole_en) {
if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp))
goto out_err;
dram_hole_base = tmp & GENMASK(31, 24);
if (ret_addr >= dram_hole_base)
ret_addr += (BIT_ULL(32) - dram_hole_base);
}
if (hash_enabled) {
/* Save some parentheses and grab ls-bit at the end. */
hashed_bit = (ret_addr >> 12) ^
(ret_addr >> 18) ^
(ret_addr >> 21) ^
(ret_addr >> 30) ^
cs_id;
hashed_bit &= BIT(0);
if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
ret_addr ^= BIT(intlv_addr_bit);
}
/* Is calculated system address is above DRAM limit address? */
if (ret_addr > dram_limit_addr)
goto out_err;
*sys_addr = ret_addr;
return 0;
out_err:
return -EINVAL;
}
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
static void static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{ {
...@@ -645,6 +865,7 @@ static void amd_threshold_interrupt(void) ...@@ -645,6 +865,7 @@ static void amd_threshold_interrupt(void)
{ {
u32 low = 0, high = 0, address = 0; u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id(); unsigned int bank, block, cpu = smp_processor_id();
struct thresh_restart tr;
/* assume first bank caused it */ /* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
...@@ -681,6 +902,11 @@ static void amd_threshold_interrupt(void) ...@@ -681,6 +902,11 @@ static void amd_threshold_interrupt(void)
log: log:
__log_error(bank, false, true, ((u64)high << 32) | low); __log_error(bank, false, true, ((u64)high << 32) | low);
/* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr));
tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
threshold_restart_bank(&tr);
} }
/* /*
...@@ -826,10 +1052,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) ...@@ -826,10 +1052,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank]; return th_names[bank];
} }
if (!smca_banks[bank].type) if (!smca_banks[bank].hwid)
return NULL; return NULL;
bank_type = smca_banks[bank].type->bank_type; bank_type = smca_banks[bank].hwid->bank_type;
if (b && bank_type == SMCA_UMC) { if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names)) if (b->block < ARRAY_SIZE(smca_umc_block_names))
...@@ -838,8 +1064,8 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) ...@@ -838,8 +1064,8 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
} }
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
"%s_%x", smca_bank_names[bank_type].name, "%s_%x", smca_get_name(bank_type),
smca_banks[bank].type_instance); smca_banks[bank].id);
return buf_mcatype; return buf_mcatype;
} }
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/intel-family.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/mce.h> #include <asm/mce.h>
...@@ -130,7 +132,7 @@ bool mce_intel_cmci_poll(void) ...@@ -130,7 +132,7 @@ bool mce_intel_cmci_poll(void)
* Reset the counter if we've logged an error in the last poll * Reset the counter if we've logged an error in the last poll
* during the storm. * during the storm.
*/ */
if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
else else
this_cpu_dec(cmci_backoff_cnt); this_cpu_dec(cmci_backoff_cnt);
...@@ -342,7 +344,7 @@ void cmci_recheck(void) ...@@ -342,7 +344,7 @@ void cmci_recheck(void)
return; return;
local_irq_save(flags); local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -464,11 +466,46 @@ static void intel_clear_lmce(void) ...@@ -464,11 +466,46 @@ static void intel_clear_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val); wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
} }
static void intel_ppin_init(struct cpuinfo_x86 *c)
{
unsigned long long val;
/*
* Even if testing the presence of the MSR would be enough, we don't
* want to risk the situation where other models reuse this MSR for
* other purposes.
*/
switch (c->x86_model) {
case INTEL_FAM6_IVYBRIDGE_X:
case INTEL_FAM6_HASWELL_X:
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
if (rdmsrl_safe(MSR_PPIN_CTL, &val))
return;
if ((val & 3UL) == 1UL) {
/* PPIN available but disabled: */
return;
}
/* If PPIN is disabled, but not locked, try to enable: */
if (!(val & 3UL)) {
wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
rdmsrl_safe(MSR_PPIN_CTL, &val);
}
if ((val & 3UL) == 2UL)
set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
}
}
void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_init(struct cpuinfo_x86 *c)
{ {
intel_init_thermal(c); intel_init_thermal(c);
intel_init_cmci(); intel_init_cmci();
intel_init_lmce(); intel_init_lmce();
intel_ppin_init(c);
} }
void mce_intel_feature_clear(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c)
......
...@@ -275,6 +275,8 @@ static void do_inject(void) ...@@ -275,6 +275,8 @@ static void do_inject(void)
unsigned int cpu = i_mce.extcpu; unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank; u8 b = i_mce.bank;
rdtscll(i_mce.tsc);
if (i_mce.misc) if (i_mce.misc)
i_mce.status |= MCI_STATUS_MISCV; i_mce.status |= MCI_STATUS_MISCV;
......
...@@ -851,7 +851,7 @@ static void decode_mc6_mce(struct mce *m) ...@@ -851,7 +851,7 @@ static void decode_mc6_mce(struct mce *m)
/* Decode errors according to Scalable MCA specification */ /* Decode errors according to Scalable MCA specification */
static void decode_smca_errors(struct mce *m) static void decode_smca_errors(struct mce *m)
{ {
struct smca_hwid_mcatype *type; struct smca_hwid *hwid;
unsigned int bank_type; unsigned int bank_type;
const char *ip_name; const char *ip_name;
u8 xec = XEC(m->status, xec_mask); u8 xec = XEC(m->status, xec_mask);
...@@ -862,18 +862,18 @@ static void decode_smca_errors(struct mce *m) ...@@ -862,18 +862,18 @@ static void decode_smca_errors(struct mce *m)
if (boot_cpu_data.x86 >= 0x17 && m->bank == 4) if (boot_cpu_data.x86 >= 0x17 && m->bank == 4)
pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n"); pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
type = smca_banks[m->bank].type; hwid = smca_banks[m->bank].hwid;
if (!type) if (!hwid)
return; return;
bank_type = type->bank_type; bank_type = hwid->bank_type;
ip_name = smca_bank_names[bank_type].long_name; ip_name = smca_get_long_name(bank_type);
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
/* Only print the decode of valid error codes */ /* Only print the decode of valid error codes */
if (xec < smca_mce_descs[bank_type].num_descs && if (xec < smca_mce_descs[bank_type].num_descs &&
(type->xec_bitmap & BIT_ULL(xec))) { (hwid->xec_bitmap & BIT_ULL(xec))) {
pr_emerg(HW_ERR "%s Error: ", ip_name); pr_emerg(HW_ERR "%s Error: ", ip_name);
pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment