Commit 786f02b7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull x86/mce merge window patches from Tony Luck:
 "Including two that make error_context() checks less sucky"

* tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  x86/mce: Add instruction recovery signatures to mce-severity table
  x86/mce: Fix check for processor context when machine check was taken.
  MCE: Fix vm86 handling for 32bit mce handler
  x86/mce Add validation check before GHES error is recorded
  x86/mce: Avoid reading every machine check bank register twice.
parents fa2af6e4 37c3459b
...@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) ...@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
struct mce m; struct mce m;
/* Only corrected MC is reported */ /* Only corrected MC is reported */
if (!corrected) if (!corrected || !(mem_err->validation_bits &
CPER_MEM_VALID_PHYSICAL_ADDRESS))
return; return;
mce_setup(&m); mce_setup(&m);
......
...@@ -126,6 +126,16 @@ static struct severity { ...@@ -126,6 +126,16 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER USER
), ),
MCESEV(
KEEP, "HT thread notices Action required: instruction fetch error",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
MCGMASK(MCG_STATUS_EIPV, 0)
),
MCESEV(
AR, "Action required: instruction fetch error",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
#endif #endif
MCESEV( MCESEV(
PANIC, "Action required: unknown MCACOD", PANIC, "Action required: unknown MCACOD",
...@@ -165,15 +175,19 @@ static struct severity { ...@@ -165,15 +175,19 @@ static struct severity {
}; };
/* /*
* If the EIPV bit is set, it means the saved IP is the * If mcgstatus indicated that ip/cs on the stack were
* instruction which caused the MCE. * no good, then "m->cs" will be zero and we will have
* to assume the worst case (IN_KERNEL) as we actually
* have no idea what we were executing when the machine
* check hit.
* If we do have a good "m->cs" (or a faked one in the
* case we were executing in VM86 mode) we can use it to
* distinguish an exception taken in user from from one
* taken in the kernel.
*/ */
static int error_context(struct mce *m) static int error_context(struct mce *m)
{ {
if (m->mcgstatus & MCG_STATUS_EIPV) return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
/* Unknown, assume kernel */
return IN_KERNEL;
} }
int mce_severity(struct mce *m, int tolerant, char **msg) int mce_severity(struct mce *m, int tolerant, char **msg)
......
...@@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) ...@@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
m->ip = regs->ip; m->ip = regs->ip;
m->cs = regs->cs; m->cs = regs->cs;
/*
* When in VM86 mode make the cs look like ring 3
* always. This is a lie, but it's better than passing
* the additional vm86 bit around everywhere.
*/
if (v8086_mode(regs))
m->cs |= 3;
} }
/* Use accurate RIP reporting if available. */ /* Use accurate RIP reporting if available. */
if (rip_msr) if (rip_msr)
...@@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); ...@@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic. * Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them. * This decides if we keep the events around or clear them.
*/ */
static int mce_no_way_out(struct mce *m, char **msg) static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
{ {
int i; int i, ret = 0;
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (m->status & MCI_STATUS_VAL)
__set_bit(i, validp);
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
return 1; ret = 1;
} }
return 0; return ret;
} }
/* /*
...@@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/ */
int kill_it = 0; int kill_it = 0;
DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown"; char *msg = "Unknown";
atomic_inc(&mce_entry); atomic_inc(&mce_entry);
...@@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
final = &__get_cpu_var(mces_seen); final = &__get_cpu_var(mces_seen);
*final = m; *final = m;
no_way_out = mce_no_way_out(&m, &msg); memset(valid_banks, 0, sizeof(valid_banks));
no_way_out = mce_no_way_out(&m, &msg, valid_banks);
barrier(); barrier();
...@@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out); order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
__clear_bit(i, toclear); __clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
if (!mce_banks[i].ctl) if (!mce_banks[i].ctl)
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment