Commit cbd4ebcb authored by H. Peter Anvin's avatar H. Peter Anvin

Merge tag 'please-pull-extlog-trace' into x86/ras

Report extended error information ("extlog") using
a trace/event.  Provide a mechanism for a smart
daemon collecting this information to tell the kernel
to skip logging corrected errors to the console.
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parents 27c93415 7c76bb5f
......@@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig"
source "drivers/mcb/Kconfig"
source "drivers/ras/Kconfig"
endmenu
......@@ -158,3 +158,4 @@ obj-$(CONFIG_NTB) += ntb/
obj-$(CONFIG_FMC) += fmc/
obj-$(CONFIG_POWERCAP) += powercap/
obj-$(CONFIG_MCB) += mcb/
obj-$(CONFIG_RAS) += ras/
......@@ -370,6 +370,7 @@ config ACPI_EXTLOG
tristate "Extended Error Log support"
depends on X86_MCE && X86_LOCAL_APIC
select UEFI_CPER
select RAS
default n
help
Certain usages such as Predictive Failure Analysis (PFA) require
......@@ -384,6 +385,7 @@ config ACPI_EXTLOG
Enhanced MCA Logging allows firmware to provide additional error
information to system software, synchronous with MCE or CMCI. This
driver adds support for that functionality.
driver adds support for that functionality with corresponding
tracepoint which carries that information to userspace.
endif # ACPI
......@@ -12,10 +12,12 @@
#include <linux/cper.h>
#include <linux/ratelimit.h>
#include <linux/edac.h>
#include <linux/ras.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include "apei/apei-internal.h"
#include <ras/ras_event.h>
#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
......@@ -137,8 +139,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data;
int bank = mce->bank;
int cpu = mce->extcpu;
struct acpi_generic_status *estatus;
int rc;
struct acpi_generic_status *estatus, *tmp;
struct acpi_generic_data *gdata;
const uuid_le *fru_id = &NULL_UUID_LE;
char *fru_text = "";
uuid_le *sec_type;
static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank);
if (estatus == NULL)
......@@ -148,8 +154,29 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
/* clear record status to enable BIOS to update it again */
estatus->block_status = 0;
rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
tmp = (struct acpi_generic_status *)elog_buf;
if (!ras_userspace_consumers()) {
print_extlog_rcd(NULL, tmp, cpu);
goto out;
}
/* log event via trace */
err_seq++;
gdata = (struct acpi_generic_data *)(tmp + 1);
if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
fru_id = (uuid_le *)gdata->fru_id;
if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
fru_text = gdata->fru_text;
sec_type = (uuid_le *)gdata->section_type;
if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem = (void *)(gdata + 1);
if (gdata->error_data_length >= sizeof(*mem))
trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
(u8)gdata->error_severity);
}
out:
return NOTIFY_STOP;
}
......@@ -196,19 +223,16 @@ static int __init extlog_init(void)
u64 cap;
int rc;
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
return -ENODEV;
if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
return -EPERM;
}
rc = -ENODEV;
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (!(cap & MCG_ELOG_P))
return rc;
if (!extlog_get_l1addr())
return rc;
rc = -EINVAL;
/* get L1 header to fetch necessary information */
l1_hdr_size = sizeof(struct extlog_l1_head);
......
......@@ -72,6 +72,7 @@ config EDAC_MCE_INJ
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
select RAS
help
Some systems are able to detect and correct errors in main
memory. EDAC can report statistics on memory error
......
......@@ -33,9 +33,6 @@
#include <asm/edac.h>
#include "edac_core.h"
#include "edac_module.h"
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
/* lock to memory controller's control array */
......
This diff is collapsed.
......@@ -5,6 +5,7 @@
config PCIEAER
boolean "Root Port Advanced Error Reporting support"
depends on PCIEPORTBUS
select RAS
default y
help
This enables PCI Express Root Port Advanced Error Reporting
......
......@@ -22,9 +22,7 @@
#include <linux/cper.h>
#include "aerdrv.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ras.h>
#include <ras/ras_event.h>
#define AER_AGENT_RECEIVER 0
#define AER_AGENT_REQUESTER 1
......
config RAS
bool
obj-$(CONFIG_RAS) += ras.o debugfs.o
#include <linux/debugfs.h>
static struct dentry *ras_debugfs_dir;
static atomic_t trace_count = ATOMIC_INIT(0);
int ras_userspace_consumers(void)
{
return atomic_read(&trace_count);
}
EXPORT_SYMBOL_GPL(ras_userspace_consumers);
static int trace_show(struct seq_file *m, void *v)
{
return atomic_read(&trace_count);
}
static int trace_open(struct inode *inode, struct file *file)
{
atomic_inc(&trace_count);
return single_open(file, trace_show, NULL);
}
static int trace_release(struct inode *inode, struct file *file)
{
atomic_dec(&trace_count);
return single_release(inode, file);
}
static const struct file_operations trace_fops = {
.open = trace_open,
.read = seq_read,
.llseek = seq_lseek,
.release = trace_release,
};
int __init ras_add_daemon_trace(void)
{
struct dentry *fentry;
if (!ras_debugfs_dir)
return -ENOENT;
fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
NULL, &trace_fops);
if (!fentry)
return -ENODEV;
return 0;
}
void __init ras_debugfs_init(void)
{
ras_debugfs_dir = debugfs_create_dir("ras", NULL);
}
/*
* Copyright (C) 2014 Intel Corporation
*
* Authors:
* Chen, Gong <gong.chen@linux.intel.com>
*/
#include <linux/init.h>
#include <linux/ras.h>
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
static int __init ras_init(void)
{
int rc = 0;
ras_debugfs_init();
rc = ras_add_daemon_trace();
return rc;
}
subsys_initcall(ras_init);
#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
......@@ -22,6 +22,7 @@
#define LINUX_CPER_H
#include <linux/uuid.h>
#include <linux/trace_seq.h>
/* CPER record signature and the size */
#define CPER_SIG_RECORD "CPER"
......@@ -35,6 +36,13 @@
*/
#define CPER_RECORD_REV 0x0100
/*
* CPER record length contains the CPER fields which are relevant for further
* handling of a memory error in userspace (we don't carry all the fields
* defined in the UEFI spec because some of them don't make any sense.)
* Currently, a length of 256 should be more than enough.
*/
#define CPER_REC_LEN 256
/*
* Severity difinition for error_severity in struct cper_record_header
* and section_severity in struct cper_section_descriptor
......@@ -356,6 +364,24 @@ struct cper_sec_mem_err {
__u16 mem_dev_handle; /* module handle in UEFI 2.4 */
};
struct cper_mem_err_compact {
__u64 validation_bits;
__u16 node;
__u16 card;
__u16 module;
__u16 bank;
__u16 device;
__u16 row;
__u16 column;
__u16 bit_pos;
__u64 requestor_id;
__u64 responder_id;
__u64 target_id;
__u16 rank;
__u16 mem_array_handle;
__u16 mem_dev_handle;
};
struct cper_sec_pcie {
__u64 validation_bits;
__u32 port_type;
......@@ -395,7 +421,13 @@ struct cper_sec_pcie {
#pragma pack()
u64 cper_next_record_id(void);
const char *cper_severity_str(unsigned int);
const char *cper_mem_err_type_str(unsigned int);
void cper_print_bits(const char *prefix, unsigned int bits,
const char * const strs[], unsigned int strs_size);
void cper_mem_err_pack(const struct cper_sec_mem_err *,
struct cper_mem_err_compact *);
const char *cper_mem_err_unpack(struct trace_seq *,
struct cper_mem_err_compact *);
#endif
#ifndef __RAS_H__
#define __RAS_H__
#ifdef CONFIG_DEBUG_FS
int ras_userspace_consumers(void);
void ras_debugfs_init(void);
int ras_add_daemon_trace(void);
#else
static inline int ras_userspace_consumers(void) { return 0; }
static inline void ras_debugfs_init(void) { return; }
static inline int ras_add_daemon_trace(void) { return 0; }
#endif
#endif
......@@ -8,6 +8,71 @@
#include <linux/tracepoint.h>
#include <linux/edac.h>
#include <linux/ktime.h>
#include <linux/aer.h>
#include <linux/cper.h>
/*
* MCE Extended Error Log trace event
*
* These events are generated when hardware detects a corrected or
* uncorrected event.
*/
/* memory trace event */
#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
TRACE_EVENT(extlog_mem_event,
TP_PROTO(struct cper_sec_mem_err *mem,
u32 err_seq,
const uuid_le *fru_id,
const char *fru_text,
u8 sev),
TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
TP_STRUCT__entry(
__field(u32, err_seq)
__field(u8, etype)
__field(u8, sev)
__field(u64, pa)
__field(u8, pa_mask_lsb)
__field_struct(uuid_le, fru_id)
__string(fru_text, fru_text)
__field_struct(struct cper_mem_err_compact, data)
),
TP_fast_assign(
__entry->err_seq = err_seq;
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
__entry->etype = mem->error_type;
else
__entry->etype = ~0;
__entry->sev = sev;
if (mem->validation_bits & CPER_MEM_VALID_PA)
__entry->pa = mem->physical_addr;
else
__entry->pa = ~0ull;
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
else
__entry->pa_mask_lsb = ~0;
__entry->fru_id = *fru_id;
__assign_str(fru_text, fru_text);
cper_mem_err_pack(mem, &__entry->data);
),
TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
__entry->err_seq,
cper_severity_str(__entry->sev),
cper_mem_err_type_str(__entry->etype),
__entry->pa,
__entry->pa_mask_lsb,
cper_mem_err_unpack(p, &__entry->data),
&__entry->fru_id,
__get_str(fru_text))
);
#endif
/*
* Hardware Events Report
......@@ -94,6 +159,69 @@ TRACE_EVENT(mc_event,
__get_str(driver_detail))
);
/*
* PCIe AER Trace event
*
* These events are generated when hardware detects a corrected or
* uncorrected event on a PCIe device. The event report has
* the following structure:
*
* char * dev_name - The name of the slot where the device resides
* ([domain:]bus:device.function).
* u32 status - Either the correctable or uncorrectable register
* indicating what error or errors have been seen
* u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
*/
#define aer_correctable_errors \
{BIT(0), "Receiver Error"}, \
{BIT(6), "Bad TLP"}, \
{BIT(7), "Bad DLLP"}, \
{BIT(8), "RELAY_NUM Rollover"}, \
{BIT(12), "Replay Timer Timeout"}, \
{BIT(13), "Advisory Non-Fatal"}
#define aer_uncorrectable_errors \
{BIT(4), "Data Link Protocol"}, \
{BIT(12), "Poisoned TLP"}, \
{BIT(13), "Flow Control Protocol"}, \
{BIT(14), "Completion Timeout"}, \
{BIT(15), "Completer Abort"}, \
{BIT(16), "Unexpected Completion"}, \
{BIT(17), "Receiver Overflow"}, \
{BIT(18), "Malformed TLP"}, \
{BIT(19), "ECRC"}, \
{BIT(20), "Unsupported Request"}
TRACE_EVENT(aer_event,
TP_PROTO(const char *dev_name,
const u32 status,
const u8 severity),
TP_ARGS(dev_name, status, severity),
TP_STRUCT__entry(
__string( dev_name, dev_name )
__field( u32, status )
__field( u8, severity )
),
TP_fast_assign(
__assign_str(dev_name, dev_name);
__entry->status = status;
__entry->severity = severity;
),
TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
__get_str(dev_name),
__entry->severity == AER_CORRECTABLE ? "Corrected" :
__entry->severity == AER_FATAL ?
"Fatal" : "Uncorrected, non-fatal",
__entry->severity == AER_CORRECTABLE ?
__print_flags(__entry->status, "|", aer_correctable_errors) :
__print_flags(__entry->status, "|", aer_uncorrectable_errors))
);
#endif /* _TRACE_HW_EVENT_MC_H */
/* This part must be outside protection */
......
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ras
#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_AER_H
#include <linux/tracepoint.h>
#include <linux/aer.h>
/*
* PCIe AER Trace event
*
* These events are generated when hardware detects a corrected or
* uncorrected event on a PCIe device. The event report has
* the following structure:
*
* char * dev_name - The name of the slot where the device resides
* ([domain:]bus:device.function).
* u32 status - Either the correctable or uncorrectable register
* indicating what error or errors have been seen
* u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
*/
#define aer_correctable_errors \
{BIT(0), "Receiver Error"}, \
{BIT(6), "Bad TLP"}, \
{BIT(7), "Bad DLLP"}, \
{BIT(8), "RELAY_NUM Rollover"}, \
{BIT(12), "Replay Timer Timeout"}, \
{BIT(13), "Advisory Non-Fatal"}
#define aer_uncorrectable_errors \
{BIT(4), "Data Link Protocol"}, \
{BIT(12), "Poisoned TLP"}, \
{BIT(13), "Flow Control Protocol"}, \
{BIT(14), "Completion Timeout"}, \
{BIT(15), "Completer Abort"}, \
{BIT(16), "Unexpected Completion"}, \
{BIT(17), "Receiver Overflow"}, \
{BIT(18), "Malformed TLP"}, \
{BIT(19), "ECRC"}, \
{BIT(20), "Unsupported Request"}
TRACE_EVENT(aer_event,
TP_PROTO(const char *dev_name,
const u32 status,
const u8 severity),
TP_ARGS(dev_name, status, severity),
TP_STRUCT__entry(
__string( dev_name, dev_name )
__field( u32, status )
__field( u8, severity )
),
TP_fast_assign(
__assign_str(dev_name, dev_name);
__entry->status = status;
__entry->severity = severity;
),
TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
__get_str(dev_name),
__entry->severity == AER_CORRECTABLE ? "Corrected" :
__entry->severity == AER_FATAL ?
"Fatal" : "Uncorrected, non-fatal",
__entry->severity == AER_CORRECTABLE ?
__print_flags(__entry->status, "|", aer_correctable_errors) :
__print_flags(__entry->status, "|", aer_uncorrectable_errors))
);
#endif /* _TRACE_AER_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
......@@ -44,6 +44,12 @@
#undef __field_ext
#define __field_ext(type, item, filter_type) type item;
#undef __field_struct
#define __field_struct(type, item) type item;
#undef __field_struct_ext
#define __field_struct_ext(type, item, filter_type) type item;
#undef __array
#define __array(type, item, len) type item[len];
......@@ -122,6 +128,12 @@
#undef __field_ext
#define __field_ext(type, item, filter_type)
#undef __field_struct
#define __field_struct(type, item)
#undef __field_struct_ext
#define __field_struct_ext(type, item, filter_type)
#undef __array
#define __array(type, item, len)
......@@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \
if (ret) \
return ret;
#undef __field_struct_ext
#define __field_struct_ext(type, item, filter_type) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
0, filter_type); \
if (ret) \
return ret;
#undef __field
#define __field(type, item) __field_ext(type, item, FILTER_OTHER)
#undef __field_struct
#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER)
#undef __array
#define __array(type, item, len) \
do { \
......@@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
#undef __field_ext
#define __field_ext(type, item, filter_type)
#undef __field_struct
#define __field_struct(type, item)
#undef __field_struct_ext
#define __field_struct_ext(type, item, filter_type)
#undef __array
#define __array(type, item, len)
......@@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call( \
#undef __field
#define __field(type, item)
#undef __field_struct
#define __field_struct(type, item)
#undef __array
#define __array(type, item, len)
......
......@@ -4,6 +4,7 @@
#include <linux/tracepoint.h>
#include <linux/unistd.h>
#include <linux/ftrace_event.h>
#include <linux/thread_info.h>
#include <asm/ptrace.h>
......@@ -32,4 +33,18 @@ struct syscall_metadata {
struct ftrace_event_call *exit_event;
};
#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
static inline void syscall_tracepoint_update(struct task_struct *p)
{
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
else
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
}
#else
static inline void syscall_tracepoint_update(struct task_struct *p)
{
}
#endif
#endif /* _TRACE_SYSCALL_H */
......@@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
total_forks++;
spin_unlock(&current->sighand->siglock);
syscall_tracepoint_update(p);
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
cgroup_post_fork(p);
if (clone_flags & CLONE_THREAD)
......
......@@ -492,33 +492,29 @@ static int sys_tracepoint_refcount;
void syscall_regfunc(void)
{
unsigned long flags;
struct task_struct *g, *t;
struct task_struct *p, *t;
if (!sys_tracepoint_refcount) {
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
/* Skip kernel threads. */
if (t->mm)
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
}
sys_tracepoint_refcount++;
}
void syscall_unregfunc(void)
{
unsigned long flags;
struct task_struct *g, *t;
struct task_struct *p, *t;
sys_tracepoint_refcount--;
if (!sys_tracepoint_refcount) {
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
}
read_unlock(&tasklist_lock);
}
}
#endif
......@@ -56,7 +56,8 @@
* struct: This defines the way the data will be stored in the ring buffer.
* There are currently two types of elements. __field and __array.
* a __field is broken up into (type, name). Where type can be any
* type but an array.
* primitive type (integer, long or pointer). __field_struct() can
* be any static complex data value (struct, union, but not an array).
* For an array. there are three fields. (type, name, size). The
* type of elements in the array, the name of the field and the size
* of the array.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment