Commit cf6ed9a6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "Main changes in this cycle were:

   - AMD MCE/RAS handling updates (Yazen Ghannam, Aravind
     Gopalakrishnan)

   - Cleanups (Borislav Petkov)

   - logging fix (Tony Luck)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/RAS: Add SMCA support to AMD Error Injector
  EDAC, mce_amd: Detect SMCA using X86_FEATURE_SMCA
  x86/mce: Update AMD mcheck init to use cpu_has() facilities
  x86/cpu: Add detection of AMD RAS Capabilities
  x86/mce/AMD: Save an indentation level in prepare_threshold_block()
  x86/mce/AMD: Disable LogDeferredInMcaStat for SMCA systems
  x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers
  x86/mce: Detect local MCEs properly
  x86/mce: Look in genpool instead of mcelog for pending error records
  x86/mce: Detect and use SMCA-specific msr_ops
  x86/mce: Define vendor-specific MSR accessors
  x86/mce: Carve out writes to MCx_STATUS and MCx_CTL
  x86/mce: Grade uncorrected errors for SMCA-enabled systems
  x86/mce: Log MCEs after a warm rest on AMD, Fam17h and later
  x86/mce: Remove explicit smp_rmb() when starting CPUs sync
  x86/RAS: Rename AMD MCE injector config item
parents 36db171c 754a9230
...@@ -27,6 +27,7 @@ enum cpuid_leafs ...@@ -27,6 +27,7 @@ enum cpuid_leafs
CPUID_6_EAX, CPUID_6_EAX,
CPUID_8000_000A_EDX, CPUID_8000_000A_EDX,
CPUID_7_ECX, CPUID_7_ECX,
CPUID_8000_0007_EBX,
}; };
#ifdef CONFIG_X86_FEATURE_NAMES #ifdef CONFIG_X86_FEATURE_NAMES
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* /*
* Defines x86 CPU feature bits * Defines x86 CPU feature bits
*/ */
#define NCAPINTS 17 /* N 32-bit words worth of info */ #define NCAPINTS 18 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */ #define NBUGINTS 1 /* N 32-bit bug flags */
/* /*
...@@ -282,6 +282,11 @@ ...@@ -282,6 +282,11 @@
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
/* /*
* BUG word(s) * BUG word(s)
*/ */
......
...@@ -104,13 +104,23 @@ ...@@ -104,13 +104,23 @@
#define MCE_LOG_SIGNATURE "MACHINECHECK" #define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */ /* AMD Scalable MCA */
#define MSR_AMD64_SMCA_MC0_CTL 0xc0002000
#define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001
#define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
/* /*
...@@ -168,9 +178,18 @@ struct mce_vendor_flags { ...@@ -168,9 +178,18 @@ struct mce_vendor_flags {
__reserved_0 : 61; __reserved_0 : 61;
}; };
struct mca_msr_regs {
u32 (*ctl) (int bank);
u32 (*status) (int bank);
u32 (*addr) (int bank);
u32 (*misc) (int bank);
};
extern struct mce_vendor_flags mce_flags; extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg; extern struct mca_config mca_cfg;
extern struct mca_msr_regs msr_ops;
extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb);
......
...@@ -717,6 +717,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) ...@@ -717,6 +717,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
} }
} }
if (c->extended_cpuid_level >= 0x80000007) {
cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_8000_0007_EBX] = ebx;
c->x86_power = edx;
}
if (c->extended_cpuid_level >= 0x80000008) { if (c->extended_cpuid_level >= 0x80000008) {
cpuid(0x80000008, &eax, &ebx, &ecx, &edx); cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
...@@ -729,9 +736,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) ...@@ -729,9 +736,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36; c->x86_phys_bits = 36;
#endif #endif
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
if (c->extended_cpuid_level >= 0x8000000a) if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
......
...@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool; ...@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist); static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ]; static char gen_pool_buf[MCE_POOLSZ];
/*
* Compare the record "t" with each of the records on list "l" to see if
* an equivalent one is present in the list.
*/
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{
struct mce_evt_llist *node;
struct mce *m1, *m2;
m1 = &t->mce;
llist_for_each_entry(node, &l->llnode, llnode) {
m2 = &node->mce;
if (!mce_cmp(m1, m2))
return true;
}
return false;
}
/*
* The system has panicked - we'd like to peruse the list of MCE records
* that have been queued, but not seen by anyone yet. The list is in
* reverse time order, so we need to reverse it. While doing that we can
* also drop duplicate records (these were logged because some banks are
* shared between cores or by all threads on a socket).
*/
struct llist_node *mce_gen_pool_prepare_records(void)
{
struct llist_node *head;
LLIST_HEAD(new_head);
struct mce_evt_llist *node, *t;
head = llist_del_all(&mce_event_llist);
if (!head)
return NULL;
/* squeeze out duplicates while reversing order */
llist_for_each_entry_safe(node, t, head, llnode) {
if (!is_duplicate_mce_record(node, t))
llist_add(&node->llnode, &new_head);
}
return new_head.first;
}
void mce_gen_pool_process(void) void mce_gen_pool_process(void)
{ {
struct llist_node *head; struct llist_node *head;
......
...@@ -35,6 +35,7 @@ void mce_gen_pool_process(void); ...@@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void); bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce); int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void); int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void); struct dentry *mce_get_debugfs_dir(void);
...@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id) ...@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
#endif #endif
void mce_inject_log(struct mce *m); void mce_inject_log(struct mce *m);
/*
* We consider records to be equivalent if bank+status+addr+misc all match.
* This is only used when the system is going down because of a fatal error
* to avoid cluttering the console log with essentially repeated information.
* In normal processing all errors seen are logged.
*/
static inline bool mce_cmp(struct mce *m1, struct mce *m2)
{
return m1->bank != m2->bank ||
m1->status != m2->status ||
m1->addr != m2->addr ||
m1->misc != m2->misc;
}
...@@ -204,6 +204,33 @@ static int error_context(struct mce *m) ...@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
return IN_KERNEL; return IN_KERNEL;
} }
static int mce_severity_amd_smca(struct mce *m, int err_ctx)
{
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
u32 low, high;
/*
* We need to look at the following bits:
* - "succor" bit (data poisoning support), and
* - TCC bit (Task Context Corrupt)
* in MCi_STATUS to determine error severity.
*/
if (!mce_flags.succor)
return MCE_PANIC_SEVERITY;
if (rdmsr_safe(addr, &low, &high))
return MCE_PANIC_SEVERITY;
/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
if ((low & MCI_CONFIG_MCAX) &&
(m->status & MCI_STATUS_TCC) &&
(err_ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
/* ...otherwise invoke hwpoison handler. */
return MCE_AR_SEVERITY;
}
/* /*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example * See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
...@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc ...@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
* to at least kill process to prolong system operation. * to at least kill process to prolong system operation.
*/ */
if (mce_flags.overflow_recov) { if (mce_flags.overflow_recov) {
if (mce_flags.smca)
return mce_severity_amd_smca(m, ctx);
/* software can try to contain */ /* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY; return MCE_PANIC_SEVERITY;
......
...@@ -161,7 +161,6 @@ void mce_log(struct mce *mce) ...@@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
if (!mce_gen_pool_add(mce)) if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work); irq_work_queue(&mce_irq_work);
mce->finished = 0;
wmb(); wmb();
for (;;) { for (;;) {
entry = mce_log_get_idx_check(mcelog.next); entry = mce_log_get_idx_check(mcelog.next);
...@@ -194,7 +193,6 @@ void mce_log(struct mce *mce) ...@@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
mcelog.entry[entry].finished = 1; mcelog.entry[entry].finished = 1;
wmb(); wmb();
mce->finished = 1;
set_bit(0, &mce_need_notify); set_bit(0, &mce_need_notify);
} }
...@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb) ...@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
} }
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
static inline u32 ctl_reg(int bank)
{
return MSR_IA32_MCx_CTL(bank);
}
static inline u32 status_reg(int bank)
{
return MSR_IA32_MCx_STATUS(bank);
}
static inline u32 addr_reg(int bank)
{
return MSR_IA32_MCx_ADDR(bank);
}
static inline u32 misc_reg(int bank)
{
return MSR_IA32_MCx_MISC(bank);
}
static inline u32 smca_ctl_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_CTL(bank);
}
static inline u32 smca_status_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_STATUS(bank);
}
static inline u32 smca_addr_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_ADDR(bank);
}
static inline u32 smca_misc_reg(int bank)
{
return MSR_AMD64_SMCA_MCx_MISC(bank);
}
struct mca_msr_regs msr_ops = {
.ctl = ctl_reg,
.status = status_reg,
.addr = addr_reg,
.misc = misc_reg
};
static void print_mce(struct mce *m) static void print_mce(struct mce *m)
{ {
int ret = 0; int ret = 0;
...@@ -290,7 +335,9 @@ static void wait_for_panic(void) ...@@ -290,7 +335,9 @@ static void wait_for_panic(void)
static void mce_panic(const char *msg, struct mce *final, char *exp) static void mce_panic(const char *msg, struct mce *final, char *exp)
{ {
int i, apei_err = 0; int apei_err = 0;
struct llist_node *pending;
struct mce_evt_llist *l;
if (!fake_panic) { if (!fake_panic) {
/* /*
...@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) ...@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
if (atomic_inc_return(&mce_fake_panicked) > 1) if (atomic_inc_return(&mce_fake_panicked) > 1)
return; return;
} }
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */ /* First print corrected ones that are still unlogged */
for (i = 0; i < MCE_LOG_LEN; i++) { llist_for_each_entry(l, pending, llnode) {
struct mce *m = &mcelog.entry[i]; struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_VAL))
continue;
if (!(m->status & MCI_STATUS_UC)) { if (!(m->status & MCI_STATUS_UC)) {
print_mce(m); print_mce(m);
if (!apei_err) if (!apei_err)
...@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) ...@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
} }
} }
/* Now print uncorrected but with the final one last */ /* Now print uncorrected but with the final one last */
for (i = 0; i < MCE_LOG_LEN; i++) { llist_for_each_entry(l, pending, llnode) {
struct mce *m = &mcelog.entry[i]; struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_VAL))
continue;
if (!(m->status & MCI_STATUS_UC)) if (!(m->status & MCI_STATUS_UC))
continue; continue;
if (!final || memcmp(m, final, sizeof(struct mce))) { if (!final || mce_cmp(m, final)) {
print_mce(m); print_mce(m);
if (!apei_err) if (!apei_err)
apei_err = apei_write_mce(m); apei_err = apei_write_mce(m);
...@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr) ...@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr) if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip); return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank)) if (msr == msr_ops.status(bank))
return offsetof(struct mce, status); return offsetof(struct mce, status);
if (msr == MSR_IA32_MCx_ADDR(bank)) if (msr == msr_ops.addr(bank))
return offsetof(struct mce, addr); return offsetof(struct mce, addr);
if (msr == MSR_IA32_MCx_MISC(bank)) if (msr == msr_ops.misc(bank))
return offsetof(struct mce, misc); return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS) if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus); return offsetof(struct mce, mcgstatus);
...@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = { ...@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
static void mce_read_aux(struct mce *m, int i) static void mce_read_aux(struct mce *m, int i)
{ {
if (m->status & MCI_STATUS_MISCV) if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); m->misc = mce_rdmsrl(msr_ops.misc(i));
if (m->status & MCI_STATUS_ADDRV) { if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); m->addr = mce_rdmsrl(msr_ops.addr(i));
/* /*
* Mask the reported address by the reported granularity. * Mask the reported address by the reported granularity.
...@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0; m.tsc = 0;
barrier(); barrier();
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL)) if (!(m.status & MCI_STATUS_VAL))
continue; continue;
...@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/* /*
* Clear state for this bank. * Clear state for this bank.
*/ */
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); mce_wrmsrl(msr_ops.status(i), 0);
} }
/* /*
...@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, ...@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp; char *tmp;
for (i = 0; i < mca_cfg.banks; i++) { for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); m->status = mce_rdmsrl(msr_ops.status(i));
if (m->status & MCI_STATUS_VAL) { if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp); __set_bit(i, validp);
if (quirk_no_way_out) if (quirk_no_way_out)
...@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out) ...@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
atomic_add(*no_way_out, &global_nwo); atomic_add(*no_way_out, &global_nwo);
/* /*
* global_nwo should be updated before mce_callin * Rely on the implied barrier below, such that global_nwo
* is updated before mce_callin.
*/ */
smp_wmb();
order = atomic_inc_return(&mce_callin); order = atomic_inc_return(&mce_callin);
/* /*
...@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear) ...@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < mca_cfg.banks; i++) { for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear)) if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); mce_wrmsrl(msr_ops.status(i), 0);
} }
} }
...@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int i; int i;
int worst = 0; int worst = 0;
int severity; int severity;
/* /*
* Establish sequential order between the CPUs entering the machine * Establish sequential order between the CPUs entering the machine
* check handler. * check handler.
*/ */
int order; int order = -1;
/* /*
* If no_way_out gets set, there is no safe way to recover from this * If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
...@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown"; char *msg = "Unknown";
int lmce = 0;
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
int lmce = 1;
/* If this CPU is offline, just bail out. */ /* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) { if (cpu_is_offline(smp_processor_id())) {
...@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1; kill_it = 1;
/* /*
* Check if this MCE is signaled to only this logical processor * Check if this MCE is signaled to only this logical processor,
* on Intel only.
*/ */
if (m.mcgstatus & MCG_STATUS_LMCES) if (m.cpuvendor == X86_VENDOR_INTEL)
lmce = 1; lmce = m.mcgstatus & MCG_STATUS_LMCES;
else {
/* /*
* Go through all the banks in exclusion of the other CPUs. * Go through all banks in exclusion of the other CPUs. This way we
* This way we don't report duplicated events on shared banks * don't report duplicated events on shared banks because the first one
* because the first one to see it will clear it. * to see it will clear it. If this is a Local MCE, then no need to
* If this is a Local MCE, then no need to perform rendezvous. * perform rendezvous.
*/ */
if (!lmce)
order = mce_start(&no_way_out); order = mce_start(&no_way_out);
}
for (i = 0; i < cfg->banks; i++) { for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear); __clear_bit(i, toclear);
...@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
m.addr = 0; m.addr = 0;
m.bank = i; m.bank = i;
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); m.status = mce_rdmsrl(msr_ops.status(i));
if ((m.status & MCI_STATUS_VAL) == 0) if ((m.status & MCI_STATUS_VAL) == 0)
continue; continue;
...@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void) ...@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
enum mcp_flags m_fl = 0; enum mcp_flags m_fl = 0;
mce_banks_t all_banks; mce_banks_t all_banks;
u64 cap; u64 cap;
int i;
if (!mca_cfg.bootlog) if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG; m_fl = MCP_DONTLOG;
...@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void) ...@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
rdmsrl(MSR_IA32_MCG_CAP, cap); rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P) if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
static void __mcheck_cpu_init_clear_banks(void)
{
int i;
for (i = 0; i < mca_cfg.banks; i++) { for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (!b->init) if (!b->init)
continue; continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); wrmsrl(msr_ops.ctl(i), b->ctl);
wrmsrl(MSR_IA32_MCx_STATUS(i), 0); wrmsrl(msr_ops.status(i), 0);
} }
} }
...@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) ...@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/ */
clear_bit(10, (unsigned long *)&mce_banks[4].ctl); clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
} }
if (c->x86 <= 17 && cfg->bootlog < 0) { if (c->x86 < 17 && cfg->bootlog < 0) {
/* /*
* Lots of broken BIOS around that don't clear them * Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log: * by default and leave crap in there. Don't log:
...@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) ...@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break; break;
case X86_VENDOR_AMD: { case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007); mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.overflow_recov = !!(ebx & BIT(0)); /*
mce_flags.succor = !!(ebx & BIT(1)); * Install proper ops for Scalable MCA enabled processors
mce_flags.smca = !!(ebx & BIT(3)); */
if (mce_flags.smca) {
msr_ops.ctl = smca_ctl_reg;
msr_ops.status = smca_status_reg;
msr_ops.addr = smca_addr_reg;
msr_ops.misc = smca_misc_reg;
}
mce_amd_feature_init(c); mce_amd_feature_init(c);
break; break;
...@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) ...@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic(); __mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c); __mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer(); __mcheck_cpu_init_timer();
} }
...@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void) ...@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0); wrmsrl(msr_ops.ctl(i), 0);
} }
return; return;
} }
...@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void) ...@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
{ {
__mcheck_cpu_init_generic(); __mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
__mcheck_cpu_init_clear_banks();
} }
static struct syscore_ops mce_syscore_ops = { static struct syscore_ops mce_syscore_ops = {
...@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data) ...@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info))) if (!mce_available(raw_cpu_ptr(&cpu_info)))
return; return;
__mcheck_cpu_init_generic(); __mcheck_cpu_init_generic();
__mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer(); __mcheck_cpu_init_timer();
} }
...@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h) ...@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); wrmsrl(msr_ops.ctl(i), b->ctl);
} }
} }
......
...@@ -54,14 +54,6 @@ ...@@ -54,14 +54,6 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */ /* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000 #define SMCA_THR_LVT_OFF 0xF000
/*
* OS is required to set the MCAX bit to acknowledge that it is now using the
* new MSR ranges and new registers under each bank. It also means that the OS
* will configure deferred errors in the new MCx_CONFIG register. If the bit is
* not set, uncorrectable errors will cause a system panic.
*/
#define SMCA_MCAX_EN_OFF 0x1
static const char * const th_names[] = { static const char * const th_names[] = {
"load_store", "load_store",
"insn_fetch", "insn_fetch",
...@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, ...@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */ /* Fall back to method we used for older processors: */
switch (block) { switch (block) {
case 0: case 0:
addr = MSR_IA32_MCx_MISC(bank); addr = msr_ops.misc(bank);
break; break;
case 1: case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21); offset = ((low & MASK_BLKPTR_LO) >> 21);
...@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, ...@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high) int offset, u32 misc_high)
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
u32 smca_low, smca_high, smca_addr;
struct threshold_block b; struct threshold_block b;
int new; int new;
...@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, ...@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
b.interrupt_enable = 1; b.interrupt_enable = 1;
if (mce_flags.smca) { if (!mce_flags.smca) {
u32 smca_low, smca_high; new = (misc_high & MASK_LVTOFF_HI) >> 20;
u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank); goto set_offset;
}
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) { smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
smca_high |= SMCA_MCAX_EN_OFF;
wrmsr(smca_addr, smca_low, smca_high);
}
/* Gather LVT offset for thresholding: */ if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) /*
goto out; * OS is required to set the MCAX bit to acknowledge that it is
* now using the new MSR ranges and new registers under each
* bank. It also means that the OS will configure deferred
* errors in the new MCx_CONFIG register. If the bit is not set,
* uncorrectable errors will cause a system panic.
*
* MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
*/
smca_high |= BIT(0);
new = (smca_low & SMCA_THR_LVT_OFF) >> 12; /*
} else { * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
new = (misc_high & MASK_LVTOFF_HI) >> 20; * registers with the option of additionally logging to
* MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
*
* This bit is usually set by BIOS to retain the old behavior
* for OSes that don't use the new registers. Linux supports the
* new registers so let's disable that additional logging here.
*
* MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
* portion of the MSR).
*/
smca_high &= ~BIT(2);
wrmsr(smca_addr, smca_low, smca_high);
} }
/* Gather LVT offset for thresholding: */
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
goto out;
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
set_offset:
offset = setup_APIC_mce_threshold(offset, new); offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
...@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) ...@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c); deferred_error_interrupt_enable(c);
} }
static void __log_error(unsigned int bank, bool threshold_err, u64 misc) static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{ {
u32 msr_status = msr_ops.status(bank);
u32 msr_addr = msr_ops.addr(bank);
struct mce m; struct mce m;
u64 status; u64 status;
rdmsrl(MSR_IA32_MCx_STATUS(bank), status); WARN_ON_ONCE(deferred_err && threshold_err);
if (deferred_err && mce_flags.smca) {
msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
}
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL)) if (!(status & MCI_STATUS_VAL))
return; return;
...@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc) ...@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc; m.misc = misc;
if (m.status & MCI_STATUS_ADDRV) if (m.status & MCI_STATUS_ADDRV)
rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); rdmsrl(msr_addr, m.addr);
mce_log(&m); mce_log(&m);
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
wrmsrl(msr_status, 0);
} }
static inline void __smp_deferred_error_interrupt(void) static inline void __smp_deferred_error_interrupt(void)
...@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void) ...@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */ /* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void) static void amd_deferred_error_interrupt(void)
{ {
u64 status;
unsigned int bank; unsigned int bank;
u32 msr_status;
u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
rdmsrl(MSR_IA32_MCx_STATUS(bank), status); msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
: msr_ops.status(bank);
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL) || if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED)) !(status & MCI_STATUS_DEFERRED))
continue; continue;
__log_error(bank, false, 0); __log_error(bank, true, false, 0);
break; break;
} }
} }
...@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void) ...@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
return; return;
log: log:
__log_error(bank, true, ((u64)high << 32) | low); __log_error(bank, false, true, ((u64)high << 32) | low);
} }
/* /*
......
config AMD_MCE_INJ config MCE_AMD_INJ
tristate "Simple MCE injection interface for AMD processors" tristate "Simple MCE injection interface for AMD processors"
depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
default n default n
......
obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o obj-$(CONFIG_MCE_AMD_INJ) += mce_amd_inj.o
...@@ -290,14 +290,33 @@ static void do_inject(void) ...@@ -290,14 +290,33 @@ static void do_inject(void)
wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
(u32)mcg_status, (u32)(mcg_status >> 32)); (u32)mcg_status, (u32)(mcg_status >> 32));
wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), if (boot_cpu_has(X86_FEATURE_SMCA)) {
(u32)i_mce.status, (u32)(i_mce.status >> 32)); if (inj_type == DFR_INT_INJ) {
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
(u32)i_mce.addr, (u32)(i_mce.addr >> 32));
} else {
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
(u32)i_mce.addr, (u32)(i_mce.addr >> 32));
}
wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
(u32)i_mce.misc, (u32)(i_mce.misc >> 32));
} else {
wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
(u32)i_mce.status, (u32)(i_mce.status >> 32));
wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
(u32)i_mce.addr, (u32)(i_mce.addr >> 32)); (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
(u32)i_mce.misc, (u32)(i_mce.misc >> 32)); (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
}
toggle_hw_mce_inject(cpu, false); toggle_hw_mce_inject(cpu, false);
......
...@@ -1052,7 +1052,6 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ...@@ -1052,7 +1052,6 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
struct mce *m = (struct mce *)data; struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &cpu_data(m->extcpu); struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc; int ecc;
u32 ebx = cpuid_ebx(0x80000007);
if (amd_filter_mce(m)) if (amd_filter_mce(m))
return NOTIFY_STOP; return NOTIFY_STOP;
...@@ -1075,7 +1074,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ...@@ -1075,7 +1074,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
if (!!(ebx & BIT(3))) { if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high; u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
...@@ -1094,7 +1093,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ...@@ -1094,7 +1093,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (m->status & MCI_STATUS_ADDRV) if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr); pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
if (!!(ebx & BIT(3))) { if (boot_cpu_has(X86_FEATURE_SMCA)) {
decode_smca_errors(m); decode_smca_errors(m);
goto err_code; goto err_code;
} }
...@@ -1149,7 +1148,6 @@ static struct notifier_block amd_mce_dec_nb = { ...@@ -1149,7 +1148,6 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void) static int __init mce_amd_init(void)
{ {
struct cpuinfo_x86 *c = &boot_cpu_data; struct cpuinfo_x86 *c = &boot_cpu_data;
u32 ebx;
if (c->x86_vendor != X86_VENDOR_AMD) if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV; return -ENODEV;
...@@ -1205,9 +1203,8 @@ static int __init mce_amd_init(void) ...@@ -1205,9 +1203,8 @@ static int __init mce_amd_init(void)
break; break;
case 0x17: case 0x17:
ebx = cpuid_ebx(0x80000007);
xec_mask = 0x3f; xec_mask = 0x3f;
if (!(ebx & BIT(3))) { if (!boot_cpu_has(X86_FEATURE_SMCA)) {
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
goto err_out; goto err_out;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment