Commit ea8f5fb8 authored by Huang Ying's avatar Huang Ying Committed by Len Brown

HWPoison: add memory_failure_queue()

memory_failure() is the entry point for HWPoison memory error
recovery.  It must be called in process context.  But commonly
hardware memory errors are notified via MCE or NMI, so some delayed
execution mechanism must be used.  In MCE handler, a work queue + ring
buffer mechanism is used.

In addition to MCE, now APEI (ACPI Platform Error Interface) GHES
(Generic Hardware Error Source) can be used to report memory errors
too.  To add support to APEI GHES memory recovery, a mechanism similar
to that of MCE is implemented.  memory_failure_queue() is the new
entry point that can be called in IRQ context.  The next step is to
make MCE handler uses this interface too.
Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLen Brown <len.brown@intel.com>
parent 152cef40
...@@ -1633,6 +1633,7 @@ enum mf_flags { ...@@ -1633,6 +1633,7 @@ enum mf_flags {
}; };
extern void memory_failure(unsigned long pfn, int trapno); extern void memory_failure(unsigned long pfn, int trapno);
extern int __memory_failure(unsigned long pfn, int trapno, int flags); extern int __memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
extern int unpoison_memory(unsigned long pfn); extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery; extern int sysctl_memory_failure_recovery;
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/memory_hotplug.h> #include <linux/memory_hotplug.h>
#include <linux/mm_inline.h> #include <linux/mm_inline.h>
#include <linux/kfifo.h>
#include "internal.h" #include "internal.h"
int sysctl_memory_failure_early_kill __read_mostly = 0; int sysctl_memory_failure_early_kill __read_mostly = 0;
...@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno) ...@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
__memory_failure(pfn, trapno, 0); __memory_failure(pfn, trapno, 0);
} }
#define MEMORY_FAILURE_FIFO_ORDER 4
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
struct memory_failure_entry {
unsigned long pfn;
int trapno;
int flags;
};
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock;
struct work_struct work;
};
static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
/**
* memory_failure_queue - Schedule handling memory failure of a page.
* @pfn: Page Number of the corrupted page
* @trapno: Trap number reported in the signal to user space.
* @flags: Flags for memory failure handling
*
* This function is called by the low level hardware error handler
* when it detects hardware memory corruption of a page. It schedules
* the recovering of error page, including dropping pages, killing
* processes etc.
*
* The function is primarily of use for corruptions that
* happen outside the current execution context (e.g. when
* detected by a background scrubber)
*
* Can run in IRQ context.
*/
void memory_failure_queue(unsigned long pfn, int trapno, int flags)
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
struct memory_failure_entry entry = {
.pfn = pfn,
.trapno = trapno,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, &entry))
schedule_work_on(smp_processor_id(), &mf_cpu->work);
else
pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
static void memory_failure_work_func(struct work_struct *work)
{
struct memory_failure_cpu *mf_cpu;
struct memory_failure_entry entry = { 0, };
unsigned long proc_flags;
int gotten;
mf_cpu = &__get_cpu_var(memory_failure_cpu);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
__memory_failure(entry.pfn, entry.trapno, entry.flags);
}
}
static int __init memory_failure_init(void)
{
struct memory_failure_cpu *mf_cpu;
int cpu;
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
return 0;
}
core_initcall(memory_failure_init);
/** /**
* unpoison_memory - Unpoison a previously poisoned page * unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page * @pfn: Page number of the to be unpoisoned page
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment