Commit 92cb54a3 authored by Ingo Molnar's avatar Ingo Molnar

x86: make DEBUG_PAGEALLOC and CPA more robust

Use PF_MEMALLOC to prevent recursive calls in the DBEUG_PAGEALLOC
case. This makes the code simpler and more robust against allocation
failures.

This fixes the following fallback to non-mmconfig:

   http://lkml.org/lkml/2008/2/20/551
   http://bugzilla.kernel.org/show_bug.cgi?id=10083

Also, for DEBUG_PAGEALLOC=n reduce the pool size to one page.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 1ce70c4f
...@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void) ...@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void)
#endif #endif
#ifdef CONFIG_DEBUG_PAGEALLOC
# define debug_pagealloc 1
#else
# define debug_pagealloc 0
#endif
static inline int static inline int
within(unsigned long addr, unsigned long start, unsigned long end) within(unsigned long addr, unsigned long start, unsigned long end)
{ {
...@@ -355,45 +361,48 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, ...@@ -355,45 +361,48 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
static LIST_HEAD(page_pool); static LIST_HEAD(page_pool);
static unsigned long pool_size, pool_pages, pool_low; static unsigned long pool_size, pool_pages, pool_low;
static unsigned long pool_used, pool_failed, pool_refill; static unsigned long pool_used, pool_failed;
static void cpa_fill_pool(void) static void cpa_fill_pool(struct page **ret)
{ {
struct page *p;
gfp_t gfp = GFP_KERNEL; gfp_t gfp = GFP_KERNEL;
unsigned long flags;
struct page *p;
/* Do not allocate from interrupt context */
if (in_irq() || irqs_disabled())
return;
/* /*
* Check unlocked. I does not matter when we have one more * Avoid recursion (on debug-pagealloc) and also signal
* page in the pool. The bit lock avoids recursive pool * our priority to get to these pagetables:
* allocations:
*/ */
if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) if (current->flags & PF_MEMALLOC)
return; return;
current->flags |= PF_MEMALLOC;
#ifdef CONFIG_DEBUG_PAGEALLOC
/* /*
* We could do: * Allocate atomically from atomic contexts:
* gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
* but this fails on !PREEMPT kernels
*/ */
gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; if (in_atomic() || irqs_disabled() || debug_pagealloc)
#endif gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
while (pool_pages < pool_size) { while (pool_pages < pool_size || (ret && !*ret)) {
p = alloc_pages(gfp, 0); p = alloc_pages(gfp, 0);
if (!p) { if (!p) {
pool_failed++; pool_failed++;
break; break;
} }
spin_lock_irq(&pgd_lock); /*
* If the call site needs a page right now, provide it:
*/
if (ret && !*ret) {
*ret = p;
continue;
}
spin_lock_irqsave(&pgd_lock, flags);
list_add(&p->lru, &page_pool); list_add(&p->lru, &page_pool);
pool_pages++; pool_pages++;
spin_unlock_irq(&pgd_lock); spin_unlock_irqrestore(&pgd_lock, flags);
} }
clear_bit_unlock(0, &pool_refill);
current->flags &= ~PF_MEMALLOC;
} }
#define SHIFT_MB (20 - PAGE_SHIFT) #define SHIFT_MB (20 - PAGE_SHIFT)
...@@ -414,11 +423,15 @@ void __init cpa_init(void) ...@@ -414,11 +423,15 @@ void __init cpa_init(void)
* GiB. Shift MiB to Gib and multiply the result by * GiB. Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB: * POOL_PAGES_PER_GB:
*/ */
gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; if (debug_pagealloc) {
pool_size = POOL_PAGES_PER_GB * gb; gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
pool_size = POOL_PAGES_PER_GB * gb;
} else {
pool_size = 1;
}
pool_low = pool_size; pool_low = pool_size;
cpa_fill_pool(); cpa_fill_pool(NULL);
printk(KERN_DEBUG printk(KERN_DEBUG
"CPA: page pool initialized %lu of %lu pages preallocated\n", "CPA: page pool initialized %lu of %lu pages preallocated\n",
pool_pages, pool_size); pool_pages, pool_size);
...@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address) ...@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
spin_lock_irqsave(&pgd_lock, flags); spin_lock_irqsave(&pgd_lock, flags);
if (list_empty(&page_pool)) { if (list_empty(&page_pool)) {
spin_unlock_irqrestore(&pgd_lock, flags); spin_unlock_irqrestore(&pgd_lock, flags);
return -ENOMEM; base = NULL;
cpa_fill_pool(&base);
if (!base)
return -ENOMEM;
spin_lock_irqsave(&pgd_lock, flags);
} else {
base = list_first_entry(&page_pool, struct page, lru);
list_del(&base->lru);
pool_pages--;
if (pool_pages < pool_low)
pool_low = pool_pages;
} }
base = list_first_entry(&page_pool, struct page, lru);
list_del(&base->lru);
pool_pages--;
if (pool_pages < pool_low)
pool_low = pool_pages;
/* /*
* Check for races, another CPU might have split this page * Check for races, another CPU might have split this page
* up for us already: * up for us already:
...@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, ...@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
cpa_flush_all(cache); cpa_flush_all(cache);
out: out:
cpa_fill_pool(); cpa_fill_pool(NULL);
return ret; return ret;
} }
...@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) ...@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* Try to refill the page pool here. We can do this only after * Try to refill the page pool here. We can do this only after
* the tlb flush. * the tlb flush.
*/ */
cpa_fill_pool(); cpa_fill_pool(NULL);
} }
#ifdef CONFIG_HIBERNATION #ifdef CONFIG_HIBERNATION
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment