Commit d6e8a150 authored by Mahesh Salgaonkar's avatar Mahesh Salgaonkar Committed by Michael Ellerman

powerpc/powernv/mce: Reduce MCE console logs to lesser lines.

Also add cpu number while displaying MCE log. This will help cleaner
logs when MCE hits on multiple cpus simultaneously.

Before the changes the MCE output was:

  Severe Machine check interrupt [Recovered]
    NIP [d00000000ba80280]: insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb]
    Initiator: CPU
    Error type: SLB [Multihit]
      Effective address: d00000000ba80280

After this patch series changes the MCE output will be:

  MCE: CPU80: machine check (Warning) Host SLB Multihit [Recovered]
  MCE: CPU80: NIP: [d00000000b550280] insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb]
  MCE: CPU80: Probable software error (some chance of hardware cause)

UE in host application:

  MCE: CPU48: machine check (Severe) Host UE Load/Store DAR: 00007fffc6079a80 paddr: 0000000f8e260000 [Not recovered]
  MCE: CPU48: PID: 4584 Comm: find NIP: [0000000010023368]
  MCE: CPU48: Hardware error

and for MCE in Guest:

  MCE: CPU80: machine check (Warning) Guest SLB Multihit DAR: 000001001b6e0320 [Recovered]
  MCE: CPU80: PID: 24765 Comm: qemu-system-ppc Guest NIP: [00007fffa309dc60]
  MCE: CPU80: Probable software error (some chance of hardware cause)
Signed-off-by: default avatarMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 5b2a1529
......@@ -116,7 +116,7 @@ struct machine_check_event {
enum MCE_Initiator initiator:8; /* 0x03 */
enum MCE_ErrorType error_type:8; /* 0x04 */
enum MCE_Disposition disposition:8; /* 0x05 */
uint8_t reserved_1[2]; /* 0x06 */
uint16_t cpu; /* 0x06 */
uint64_t gpr3; /* 0x08 */
uint64_t srr0; /* 0x10 */
uint64_t srr1; /* 0x18 */
......
......@@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
mce->cpu = get_paca()->paca_index;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
......@@ -310,7 +311,11 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
const char *level, *sevstr, *subtype;
const char *level, *sevstr, *subtype, *err_type;
uint64_t ea = 0, pa = 0;
int n = 0;
char dar_str[50];
char pa_str[50];
static const char *mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
......@@ -384,101 +389,103 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
if (in_guest) {
printk("%s Guest NIP: %016llx\n", level, evt->srr0);
} else if (user_mode) {
printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
evt->srr0, current->pid, current->comm);
} else {
printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
(void *)evt->srr0);
}
printk("%s Initiator: %s\n", level,
evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
err_type = "UE";
subtype = evt->u.ue_error.ue_error_type <
ARRAY_SIZE(mc_ue_types) ?
mc_ue_types[evt->u.ue_error.ue_error_type]
: "Unknown";
printk("%s Error type: UE [%s]\n", level, subtype);
if (evt->u.ue_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.ue_error.effective_address);
ea = evt->u.ue_error.effective_address;
if (evt->u.ue_error.physical_address_provided)
printk("%s Physical address: %016llx\n",
level, evt->u.ue_error.physical_address);
pa = evt->u.ue_error.physical_address;
break;
case MCE_ERROR_TYPE_SLB:
err_type = "SLB";
subtype = evt->u.slb_error.slb_error_type <
ARRAY_SIZE(mc_slb_types) ?
mc_slb_types[evt->u.slb_error.slb_error_type]
: "Unknown";
printk("%s Error type: SLB [%s]\n", level, subtype);
if (evt->u.slb_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.slb_error.effective_address);
ea = evt->u.slb_error.effective_address;
break;
case MCE_ERROR_TYPE_ERAT:
err_type = "ERAT";
subtype = evt->u.erat_error.erat_error_type <
ARRAY_SIZE(mc_erat_types) ?
mc_erat_types[evt->u.erat_error.erat_error_type]
: "Unknown";
printk("%s Error type: ERAT [%s]\n", level, subtype);
if (evt->u.erat_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.erat_error.effective_address);
ea = evt->u.erat_error.effective_address;
break;
case MCE_ERROR_TYPE_TLB:
err_type = "TLB";
subtype = evt->u.tlb_error.tlb_error_type <
ARRAY_SIZE(mc_tlb_types) ?
mc_tlb_types[evt->u.tlb_error.tlb_error_type]
: "Unknown";
printk("%s Error type: TLB [%s]\n", level, subtype);
if (evt->u.tlb_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.tlb_error.effective_address);
ea = evt->u.tlb_error.effective_address;
break;
case MCE_ERROR_TYPE_USER:
err_type = "User";
subtype = evt->u.user_error.user_error_type <
ARRAY_SIZE(mc_user_types) ?
mc_user_types[evt->u.user_error.user_error_type]
: "Unknown";
printk("%s Error type: User [%s]\n", level, subtype);
if (evt->u.user_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.user_error.effective_address);
ea = evt->u.user_error.effective_address;
break;
case MCE_ERROR_TYPE_RA:
err_type = "Real address";
subtype = evt->u.ra_error.ra_error_type <
ARRAY_SIZE(mc_ra_types) ?
mc_ra_types[evt->u.ra_error.ra_error_type]
: "Unknown";
printk("%s Error type: Real address [%s]\n", level, subtype);
if (evt->u.ra_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.ra_error.effective_address);
ea = evt->u.ra_error.effective_address;
break;
case MCE_ERROR_TYPE_LINK:
err_type = "Link";
subtype = evt->u.link_error.link_error_type <
ARRAY_SIZE(mc_link_types) ?
mc_link_types[evt->u.link_error.link_error_type]
: "Unknown";
printk("%s Error type: Link [%s]\n", level, subtype);
if (evt->u.link_error.effective_address_provided)
printk("%s Effective address: %016llx\n",
level, evt->u.link_error.effective_address);
ea = evt->u.link_error.effective_address;
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
printk("%s Error type: Unknown\n", level);
err_type = "Unknown";
subtype = "";
break;
}
dar_str[0] = pa_str[0] = '\0';
if (ea && evt->srr0 != ea) {
/* Load/Store address */
n = sprintf(dar_str, "DAR: %016llx ", ea);
if (pa)
sprintf(dar_str + n, "paddr: %016llx ", pa);
} else if (pa) {
sprintf(pa_str, " paddr: %016llx", pa);
}
printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
err_type, subtype, dar_str,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
if (in_guest || user_mode) {
printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
level, evt->cpu, current->pid, current->comm,
in_guest ? "Guest " : "", evt->srr0, pa_str);
} else {
printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
}
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment