Commit c3481b6b authored by Tony Luck's avatar Tony Luck Committed by Rafael J. Wysocki

ACPI: APEI: Better fix to avoid spamming the console with old error logs

The fix in commit 3f8dec11 ("ACPI/APEI: Limit printable size of BERT
table data") does not work as intended on systems where the BIOS has a
fixed size block of memory for the BERT table, relying on s/w to quit
when it finds a record with estatus->block_status == 0. On these systems
all errors are suppressed because the check:

	if (region_len < ACPI_BERT_PRINT_MAX_LEN)

always fails.

New scheme skips individual CPER records that are too large, and also
limits the total number of records that will be printed to 5.

Fixes: 3f8dec11 ("ACPI/APEI: Limit printable size of BERT table data")
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent 55b35052
...@@ -29,16 +29,26 @@ ...@@ -29,16 +29,26 @@
#undef pr_fmt #undef pr_fmt
#define pr_fmt(fmt) "BERT: " fmt #define pr_fmt(fmt) "BERT: " fmt
#define ACPI_BERT_PRINT_MAX_RECORDS 5
#define ACPI_BERT_PRINT_MAX_LEN 1024 #define ACPI_BERT_PRINT_MAX_LEN 1024
static int bert_disable; static int bert_disable;
/*
* Print "all" the error records in the BERT table, but avoid huge spam to
* the console if the BIOS included oversize records, or too many records.
* Skipping some records here does not lose anything because the full
* data is available to user tools in:
* /sys/firmware/acpi/tables/data/BERT
*/
static void __init bert_print_all(struct acpi_bert_region *region, static void __init bert_print_all(struct acpi_bert_region *region,
unsigned int region_len) unsigned int region_len)
{ {
struct acpi_hest_generic_status *estatus = struct acpi_hest_generic_status *estatus =
(struct acpi_hest_generic_status *)region; (struct acpi_hest_generic_status *)region;
int remain = region_len; int remain = region_len;
int printed = 0, skipped = 0;
u32 estatus_len; u32 estatus_len;
while (remain >= sizeof(struct acpi_bert_region)) { while (remain >= sizeof(struct acpi_bert_region)) {
...@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region, ...@@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region,
if (remain < estatus_len) { if (remain < estatus_len) {
pr_err(FW_BUG "Truncated status block (length: %u).\n", pr_err(FW_BUG "Truncated status block (length: %u).\n",
estatus_len); estatus_len);
return; break;
} }
/* No more error records. */ /* No more error records. */
if (!estatus->block_status) if (!estatus->block_status)
return; break;
if (cper_estatus_check(estatus)) { if (cper_estatus_check(estatus)) {
pr_err(FW_BUG "Invalid error record.\n"); pr_err(FW_BUG "Invalid error record.\n");
return; break;
} }
if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
printed < ACPI_BERT_PRINT_MAX_RECORDS) {
pr_info_once("Error records from previous boot:\n"); pr_info_once("Error records from previous boot:\n");
if (region_len < ACPI_BERT_PRINT_MAX_LEN)
cper_estatus_print(KERN_INFO HW_ERR, estatus); cper_estatus_print(KERN_INFO HW_ERR, estatus);
else printed++;
pr_info_once("Max print length exceeded, table data is available at:\n" } else {
"/sys/firmware/acpi/tables/data/BERT"); skipped++;
}
/* /*
* Because the boot error source is "one-time polled" type, * Because the boot error source is "one-time polled" type,
...@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region, ...@@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region,
estatus = (void *)estatus + estatus_len; estatus = (void *)estatus + estatus_len;
remain -= estatus_len; remain -= estatus_len;
} }
if (skipped)
pr_info(HW_ERR "Skipped %d error records\n", skipped);
} }
static int __init setup_bert_disable(char *str) static int __init setup_bert_disable(char *str)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment