Commit 07f2d8c6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS changes from Ingo Molnar:
 "The main changes in this cycle were:

   - Simplify the CMCI storm logic on Intel CPUs after yet another
     report about a race in the code (Borislav Petkov)

   - Enable the MCE threshold irq on AMD CPUs by default (Aravind
     Gopalakrishnan)

   - Add AMD-specific MCE-severity grading function.  Further error
     recovery actions will be based on its output (Aravind Gopalakrishnan)

   - Documentation updates (Borislav Petkov)

   - ... assorted fixes and cleanups"

* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce/severity: Fix warning about indented braces
  x86/mce: Define mce_severity function pointer
  x86/mce: Add an AMD severities-grading function
  x86/mce: Reindent __mcheck_cpu_apply_quirks() properly
  x86/mce: Use safe MSR accesses for AMD quirk
  x86/MCE/AMD: Enable thresholding interrupts by default if supported
  x86/MCE: Make mce_panic() fatal machine check msg in the same pattern
  x86/MCE/intel: Cleanup CMCI storm logic
  Documentation/acpi/einj: Correct and streamline text
  x86/MCE/AMD: Drop bogus const modifier from AMD's bank4_names()
parents ee799f41 cee8f5a6
This diff is collapsed.
...@@ -116,6 +116,12 @@ struct mca_config { ...@@ -116,6 +116,12 @@ struct mca_config {
u32 rip_msr; u32 rip_msr;
}; };
struct mce_vendor_flags {
__u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
__reserved_0 : 63;
};
extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg; extern struct mca_config mca_cfg;
extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb);
...@@ -128,9 +134,11 @@ extern int mce_p5_enabled; ...@@ -128,9 +134,11 @@ extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
int mcheck_init(void); int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_vendor_init_severity(void);
#else #else
static inline int mcheck_init(void) { return 0; } static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_vendor_init_severity(void) {}
#endif #endif
#ifdef CONFIG_X86_ANCIENT_MCE #ifdef CONFIG_X86_ANCIENT_MCE
...@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); ...@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags { enum mcp_flags {
MCP_TIMESTAMP = (1 << 0), /* log time stamp */ MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = (1 << 1), /* log uncorrected errors */ MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = (1 << 2), /* only clear, don't log */ MCP_DONTLOG = BIT(2), /* only clear, don't log */
}; };
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
int mce_notify_irq(void); int mce_notify_irq(void);
......
...@@ -14,6 +14,7 @@ enum severity_level { ...@@ -14,6 +14,7 @@ enum severity_level {
}; };
#define ATTR_LEN 16 #define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
/* One object for each MCE bank, shared by all CPUs */ /* One object for each MCE bank, shared by all CPUs */
struct mce_bank { struct mce_bank {
...@@ -23,20 +24,20 @@ struct mce_bank { ...@@ -23,20 +24,20 @@ struct mce_bank {
char attrname[ATTR_LEN]; /* attribute name */ char attrname[ATTR_LEN]; /* attribute name */
}; };
int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void); struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks; extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled; extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval); unsigned long cmci_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void); bool mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu); void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank); void cmci_disable_bank(int bank);
#else #else
# define mce_intel_adjust_timer mce_adjust_timer_default # define cmci_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { } static inline bool mce_intel_cmci_poll(void) { return false; }
static inline void mce_intel_hcpu_update(unsigned long cpu) { } static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { } static inline void cmci_disable_bank(int bank) { }
#endif #endif
......
...@@ -186,7 +186,61 @@ static int error_context(struct mce *m) ...@@ -186,7 +186,61 @@ static int error_context(struct mce *m)
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
} }
int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) /*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
{
enum context ctx = error_context(m);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
return MCE_PANIC_SEVERITY;
if (m->status & MCI_STATUS_UC) {
/*
* On older systems where overflow_recov flag is not present, we
* should simply panic if an error overflow occurs. If
* overflow_recov flag is present and set, then software can try
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
/* kill current process */
return MCE_AR_SEVERITY;
} else {
/* at least one error was not logged */
if (m->status & MCI_STATUS_OVER)
return MCE_PANIC_SEVERITY;
}
/*
* For any other case, return MCE_UC_SEVERITY so that we log the
* error and exit #MC handler.
*/
return MCE_UC_SEVERITY;
}
/*
* deferred error: poll handler catches these and adds to mce_ring so
* memory-failure can take recovery actions.
*/
if (m->status & MCI_STATUS_DEFERRED)
return MCE_DEFERRED_SEVERITY;
/*
* corrected error: poll handler catches these and passes responsibility
* of decoding the error to EDAC
*/
return MCE_KEEP_SEVERITY;
}
static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
{ {
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m); enum context ctx = error_context(m);
...@@ -216,6 +270,16 @@ int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) ...@@ -216,6 +270,16 @@ int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
} }
} }
/* Default to mce_severity_intel */
int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
mce_severity = mce_severity_amd;
}
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
static void *s_start(struct seq_file *f, loff_t *pos) static void *s_start(struct seq_file *f, loff_t *pos)
{ {
......
...@@ -60,11 +60,12 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); ...@@ -60,11 +60,12 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/mce.h> #include <trace/events/mce.h>
#define SPINUNIT 100 /* 100ns */ #define SPINUNIT 100 /* 100ns */
DEFINE_PER_CPU(unsigned, mce_exception_count); DEFINE_PER_CPU(unsigned, mce_exception_count);
struct mce_bank *mce_banks __read_mostly; struct mce_bank *mce_banks __read_mostly;
struct mce_vendor_flags mce_flags __read_mostly;
struct mca_config mca_cfg __read_mostly = { struct mca_config mca_cfg __read_mostly = {
.bootlog = -1, .bootlog = -1,
...@@ -89,9 +90,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); ...@@ -89,9 +90,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen); static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing; static int cpu_missing;
/* CMCI storm detection filter */
static DEFINE_PER_CPU(unsigned long, mce_polled_error);
/* /*
* MCA banks polled by the period polling timer for corrected events. * MCA banks polled by the period polling timer for corrected events.
* With Intel CMCI, this only has MCA banks which do not support CMCI (if any). * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
...@@ -622,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); ...@@ -622,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* is already totally * confused. In this case it's likely it will * is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either. * not fully execute the machine check handler either.
*/ */
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{ {
bool error_logged = false;
struct mce m; struct mce m;
int severity; int severity;
int i; int i;
...@@ -646,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -646,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!(m.status & MCI_STATUS_VAL)) if (!(m.status & MCI_STATUS_VAL))
continue; continue;
this_cpu_write(mce_polled_error, 1);
/* /*
* Uncorrected or signalled events are handled by the exception * Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here. * handler when it is enabled, so don't process those here.
...@@ -679,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -679,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to * Don't get the IP here because it's unlikely to
* have anything to do with the actual error location. * have anything to do with the actual error location.
*/ */
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
error_logged = true;
mce_log(&m); mce_log(&m);
}
/* /*
* Clear state for this bank. * Clear state for this bank.
...@@ -694,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -694,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
*/ */
sync_core(); sync_core();
return error_logged;
} }
EXPORT_SYMBOL_GPL(machine_check_poll); EXPORT_SYMBOL_GPL(machine_check_poll);
...@@ -813,7 +816,7 @@ static void mce_reign(void) ...@@ -813,7 +816,7 @@ static void mce_reign(void)
* other CPUs. * other CPUs.
*/ */
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Fatal Machine check", m, msg); mce_panic("Fatal machine check", m, msg);
/* /*
* For UC somewhere we let the CPU who detects it handle it. * For UC somewhere we let the CPU who detects it handle it.
...@@ -826,7 +829,7 @@ static void mce_reign(void) ...@@ -826,7 +829,7 @@ static void mce_reign(void)
* source or one CPU is hung. Panic. * source or one CPU is hung. Panic.
*/ */
if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL); mce_panic("Fatal machine check from unknown source", NULL, NULL);
/* /*
* Now clear all the mces_seen so that they don't reappear on * Now clear all the mces_seen so that they don't reappear on
...@@ -1258,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status) ...@@ -1258,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more * poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds). * errors, poll 2x slower (up to check_interval seconds).
*/ */
static unsigned long check_interval = 5 * 60; /* 5 minutes */ static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer); static DEFINE_PER_CPU(struct timer_list, mce_timer);
...@@ -1268,49 +1271,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) ...@@ -1268,49 +1271,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
return interval; return interval;
} }
static unsigned long (*mce_adjust_timer)(unsigned long interval) = static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
mce_adjust_timer_default;
static int cmc_error_seen(void) static void __restart_timer(struct timer_list *t, unsigned long interval)
{ {
unsigned long *v = this_cpu_ptr(&mce_polled_error); unsigned long when = jiffies + interval;
unsigned long flags;
local_irq_save(flags);
return test_and_clear_bit(0, v); if (timer_pending(t)) {
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
local_irq_restore(flags);
} }
static void mce_timer_fn(unsigned long data) static void mce_timer_fn(unsigned long data)
{ {
struct timer_list *t = this_cpu_ptr(&mce_timer); struct timer_list *t = this_cpu_ptr(&mce_timer);
int cpu = smp_processor_id();
unsigned long iv; unsigned long iv;
int notify;
WARN_ON(smp_processor_id() != data); WARN_ON(cpu != data);
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) { if (mce_available(this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP, machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
this_cpu_ptr(&mce_poll_banks));
mce_intel_cmci_poll(); if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);
goto done;
}
} }
/* /*
* Alert userspace if needed. If we logged an MCE, reduce the * Alert userspace if needed. If we logged an MCE, reduce the polling
* polling interval, otherwise increase the polling interval. * interval, otherwise increase the polling interval.
*/ */
iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq())
notify = mce_notify_irq();
notify |= cmc_error_seen();
if (notify) {
iv = max(iv / 2, (unsigned long) HZ/100); iv = max(iv / 2, (unsigned long) HZ/100);
} else { else
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
iv = mce_adjust_timer(iv);
} done:
__this_cpu_write(mce_next_interval, iv); __this_cpu_write(mce_next_interval, iv);
/* Might have become 0 after CMCI storm subsided */ __restart_timer(t, iv);
if (iv) {
t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
} }
/* /*
...@@ -1319,16 +1330,10 @@ static void mce_timer_fn(unsigned long data) ...@@ -1319,16 +1330,10 @@ static void mce_timer_fn(unsigned long data)
void mce_timer_kick(unsigned long interval) void mce_timer_kick(unsigned long interval)
{ {
struct timer_list *t = this_cpu_ptr(&mce_timer); struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long when = jiffies + interval;
unsigned long iv = __this_cpu_read(mce_next_interval); unsigned long iv = __this_cpu_read(mce_next_interval);
if (timer_pending(t)) { __restart_timer(t, interval);
if (time_before(when, t->expires))
mod_timer_pinned(t, when);
} else {
t->expires = round_jiffies(when);
add_timer_on(t, smp_processor_id());
}
if (interval < iv) if (interval < iv)
__this_cpu_write(mce_next_interval, interval); __this_cpu_write(mce_next_interval, interval);
} }
...@@ -1525,45 +1530,46 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) ...@@ -1525,45 +1530,46 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* Various K7s with broken bank 0 around. Always disable * Various K7s with broken bank 0 around. Always disable
* by default. * by default.
*/ */
if (c->x86 == 6 && cfg->banks > 0) if (c->x86 == 6 && cfg->banks > 0)
mce_banks[0].ctl = 0; mce_banks[0].ctl = 0;
/* /*
* Turn off MC4_MISC thresholding banks on those models since * overflow_recov is supported for F15h Models 00h-0fh
* they're not supported there. * even though we don't have a CPUID bit for it.
*/ */
if (c->x86 == 0x15 && if (c->x86 == 0x15 && c->x86_model <= 0xf)
(c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { mce_flags.overflow_recov = 1;
int i;
u64 val, hwcr; /*
bool need_toggle; * Turn off MC4_MISC thresholding banks on those models since
u32 msrs[] = { * they're not supported there.
*/
if (c->x86 == 0x15 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */ 0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */ 0xc0000408, /* MC4_MISC1 */
}; };
rdmsrl(MSR_K7_HWCR, hwcr); rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */ /* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18)); need_toggle = !(hwcr & BIT(18));
if (need_toggle) if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
for (i = 0; i < ARRAY_SIZE(msrs); i++) { /* Clear CntP bit safely */
rdmsrl(msrs[i], val); for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* CntP bit set? */ /* restore old settings */
if (val & BIT_64(62)) { if (need_toggle)
val &= ~BIT_64(62); wrmsrl(MSR_K7_HWCR, hwcr);
wrmsrl(msrs[i], val); }
}
}
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
} }
if (c->x86_vendor == X86_VENDOR_INTEL) { if (c->x86_vendor == X86_VENDOR_INTEL) {
...@@ -1629,10 +1635,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) ...@@ -1629,10 +1635,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) { switch (c->x86_vendor) {
case X86_VENDOR_INTEL: case X86_VENDOR_INTEL:
mce_intel_feature_init(c); mce_intel_feature_init(c);
mce_adjust_timer = mce_intel_adjust_timer; mce_adjust_timer = cmci_intel_adjust_timer;
break; break;
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
mce_amd_feature_init(c); mce_amd_feature_init(c);
mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
break; break;
default: default:
break; break;
...@@ -2017,6 +2024,7 @@ __setup("mce", mcheck_enable); ...@@ -2017,6 +2024,7 @@ __setup("mce", mcheck_enable);
int __init mcheck_init(void) int __init mcheck_init(void)
{ {
mcheck_intel_therm_init(); mcheck_intel_therm_init();
mcheck_vendor_init_severity();
return 0; return 0;
} }
......
...@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank) ...@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank)
return (bank == 4); return (bank == 4);
} }
static const char * const bank4_names(struct threshold_block *b) static const char *bank4_names(const struct threshold_block *b)
{ {
switch (b->address) { switch (b->address) {
/* MSR4_MISC0 */ /* MSR4_MISC0 */
...@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) ...@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (!b.interrupt_capable) if (!b.interrupt_capable)
goto init; goto init;
b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20; new = (high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce(offset, new); offset = setup_APIC_mce(offset, new);
...@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void) ...@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void)
log: log:
mce_setup(&m); mce_setup(&m);
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
if (!(m.status & MCI_STATUS_VAL))
return;
m.misc = ((u64)high << 32) | low; m.misc = ((u64)high << 32) | low;
m.bank = bank; m.bank = bank;
mce_log(&m); mce_log(&m);
...@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, ...@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
b->interrupt_capable = lvt_interrupt_supported(bank, high); b->interrupt_capable = lvt_interrupt_supported(bank, high);
b->threshold_limit = THRESHOLD_MAX; b->threshold_limit = THRESHOLD_MAX;
if (b->interrupt_capable) if (b->interrupt_capable) {
threshold_ktype.default_attrs[2] = &interrupt_enable.attr; threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
else b->interrupt_enable = 1;
} else {
threshold_ktype.default_attrs[2] = NULL; threshold_ktype.default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj); INIT_LIST_HEAD(&b->miscj);
......
...@@ -38,6 +38,15 @@ ...@@ -38,6 +38,15 @@
*/ */
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
* CMCI storm detection backoff counter
*
* During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
* encountered an error. If not, we decrement it by one. We signal the end of
* the CMCI storm when it reaches 0.
*/
static DEFINE_PER_CPU(int, cmci_backoff_cnt);
/* /*
* cmci_discover_lock protects against parallel discovery attempts * cmci_discover_lock protects against parallel discovery attempts
* which could race against each other. * which could race against each other.
...@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); ...@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1 #define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ) #define CMCI_POLL_INTERVAL (30 * HZ)
#define CMCI_STORM_INTERVAL (1 * HZ) #define CMCI_STORM_INTERVAL (HZ)
#define CMCI_STORM_THRESHOLD 15 #define CMCI_STORM_THRESHOLD 15
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
...@@ -82,11 +91,21 @@ static int cmci_supported(int *banks) ...@@ -82,11 +91,21 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P); return !!(cap & MCG_CMCI_P);
} }
void mce_intel_cmci_poll(void) bool mce_intel_cmci_poll(void)
{ {
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
return; return false;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
/*
* Reset the counter if we've logged an error in the last poll
* during the storm.
*/
if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
else
this_cpu_dec(cmci_backoff_cnt);
return true;
} }
void mce_intel_hcpu_update(unsigned long cpu) void mce_intel_hcpu_update(unsigned long cpu)
...@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu) ...@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu)
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
} }
unsigned long mce_intel_adjust_timer(unsigned long interval) unsigned long cmci_intel_adjust_timer(unsigned long interval)
{ {
int r; if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
(__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
if (interval < CMCI_POLL_INTERVAL) mce_notify_irq();
return interval; return CMCI_STORM_INTERVAL;
}
switch (__this_cpu_read(cmci_storm_state)) { switch (__this_cpu_read(cmci_storm_state)) {
case CMCI_STORM_ACTIVE: case CMCI_STORM_ACTIVE:
/* /*
* We switch back to interrupt mode once the poll timer has * We switch back to interrupt mode once the poll timer has
* silenced itself. That means no events recorded and the * silenced itself. That means no events recorded and the timer
* timer interval is back to our poll interval. * interval is back to our poll interval.
*/ */
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
r = atomic_sub_return(1, &cmci_storm_on_cpus); if (!atomic_sub_return(1, &cmci_storm_on_cpus))
if (r == 0)
pr_notice("CMCI storm subsided: switching to interrupt mode\n"); pr_notice("CMCI storm subsided: switching to interrupt mode\n");
/* FALLTHROUGH */ /* FALLTHROUGH */
case CMCI_STORM_SUBSIDED: case CMCI_STORM_SUBSIDED:
/* /*
* We wait for all cpus to go back to SUBSIDED * We wait for all CPUs to go back to SUBSIDED state. When that
* state. When that happens we switch back to * happens we switch back to interrupt mode.
* interrupt mode.
*/ */
if (!atomic_read(&cmci_storm_on_cpus)) { if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
...@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval) ...@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
} }
return CMCI_POLL_INTERVAL; return CMCI_POLL_INTERVAL;
default: default:
/*
* We have shiny weather. Let the poll do whatever it /* We have shiny weather. Let the poll do whatever it thinks. */
* thinks.
*/
return interval; return interval;
} }
} }
...@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void) ...@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void)
cmci_storm_disable_banks(); cmci_storm_disable_banks();
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus); r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_POLL_INTERVAL); mce_timer_kick(CMCI_STORM_INTERVAL);
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
if (r == 1) if (r == 1)
pr_notice("CMCI storm detected: switching to poll mode\n"); pr_notice("CMCI storm detected: switching to poll mode\n");
...@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void) ...@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void)
{ {
if (cmci_storm_detect()) if (cmci_storm_detect())
return; return;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
mce_notify_irq(); mce_notify_irq();
} }
...@@ -286,6 +306,7 @@ void cmci_recheck(void) ...@@ -286,6 +306,7 @@ void cmci_recheck(void)
if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return; return;
local_irq_save(flags); local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags); local_irq_restore(flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment