Commit a3a31a5e authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] infrastructure for handling pte_chain_alloc() failures

The VM allocates pte_chains with GFP_ATOMIC, under deep locking.  If that
allocation fails, we oops.

My approach to solving this is to require that the caller of page_add_rmap()
pass in a pte_chain structure for page_add_rmap() to use.  Then, callers can
arrange to allocate that structure outside locks with GFP_KERNEL.

This patch provides the base infrastructure.

A common case is that page_add_rmap() will in fact not consume the pte_chain,
because an empty slot was found within one of the page's existing pte_chain
structures.  So this patch provides for a special one-deep per-cpu pte_chain
cache to optimise this case of taking just one pte_chain and then immediately
putting it back.

We end up adding maybe 20-30 instructions to the pagefault path to handle the
eventuality of pte_chain allocation failures.

Lots of other design ideas were considered.  This is the best I could come up
with.
parent 4d9ebed4
...@@ -5,6 +5,11 @@ ...@@ -5,6 +5,11 @@
* pte chain. * pte chain.
*/ */
#include <linux/slab.h>
struct pte_chain;
extern kmem_cache_t *pte_chain_cache;
static inline void pte_chain_lock(struct page *page) static inline void pte_chain_lock(struct page *page)
{ {
/* /*
...@@ -31,3 +36,12 @@ static inline void pte_chain_unlock(struct page *page) ...@@ -31,3 +36,12 @@ static inline void pte_chain_unlock(struct page *page)
#endif #endif
preempt_enable(); preempt_enable();
} }
struct pte_chain *pte_chain_alloc(int gfp_flags);
void __pte_chain_free(struct pte_chain *pte_chain);
static inline void pte_chain_free(struct pte_chain *pte_chain)
{
if (pte_chain)
__pte_chain_free(pte_chain);
}
...@@ -169,11 +169,13 @@ extern int vm_swappiness; ...@@ -169,11 +169,13 @@ extern int vm_swappiness;
extern void out_of_memory(void); extern void out_of_memory(void);
/* linux/mm/rmap.c */ /* linux/mm/rmap.c */
extern int FASTCALL(page_referenced(struct page *)); struct pte_chain;
extern void FASTCALL(page_add_rmap(struct page *, pte_t *)); int FASTCALL(page_referenced(struct page *));
extern void FASTCALL(page_remove_rmap(struct page *, pte_t *)); struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
extern int FASTCALL(try_to_unmap(struct page *)); struct pte_chain *));
extern int FASTCALL(page_over_rsslimit(struct page *)); void FASTCALL(page_remove_rmap(struct page *, pte_t *));
int FASTCALL(try_to_unmap(struct page *));
int FASTCALL(page_over_rsslimit(struct page *));
/* return values of try_to_unmap */ /* return values of try_to_unmap */
#define SWAP_SUCCESS 0 #define SWAP_SUCCESS 0
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/rmap-locking.h> #include <linux/rmap-locking.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/percpu.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/rmap.h> #include <asm/rmap.h>
...@@ -51,7 +52,7 @@ struct pte_chain { ...@@ -51,7 +52,7 @@ struct pte_chain {
pte_addr_t ptes[NRPTE]; pte_addr_t ptes[NRPTE];
} ____cacheline_aligned; } ____cacheline_aligned;
static kmem_cache_t *pte_chain_cache; kmem_cache_t *pte_chain_cache;
/* /*
* pte_chain list management policy: * pte_chain list management policy:
...@@ -70,39 +71,6 @@ static kmem_cache_t *pte_chain_cache; ...@@ -70,39 +71,6 @@ static kmem_cache_t *pte_chain_cache;
* victim pte and frees the head member if it became empty. * victim pte and frees the head member if it became empty.
*/ */
/**
* pte_chain_alloc - allocate a pte_chain struct
*
* Returns a pointer to a fresh pte_chain structure. Allocates new
* pte_chain structures as required.
* Caller needs to hold the page's pte_chain_lock.
*/
static inline struct pte_chain *pte_chain_alloc(void)
{
struct pte_chain *ret;
ret = kmem_cache_alloc(pte_chain_cache, GFP_ATOMIC);
#ifdef DEBUG_RMAP
{
int i;
for (i = 0; i < NRPTE; i++)
BUG_ON(ret->ptes[i]);
BUG_ON(ret->next);
}
#endif
return ret;
}
/**
* pte_chain_free - free pte_chain structure
* @pte_chain: pte_chain struct to free
*/
static inline void pte_chain_free(struct pte_chain *pte_chain)
{
pte_chain->next = NULL;
kmem_cache_free(pte_chain_cache, pte_chain);
}
/** /**
** VM stuff below this comment ** VM stuff below this comment
**/ **/
...@@ -156,7 +124,7 @@ int page_referenced(struct page * page) ...@@ -156,7 +124,7 @@ int page_referenced(struct page * page)
page->pte.direct = pc->ptes[NRPTE-1]; page->pte.direct = pc->ptes[NRPTE-1];
SetPageDirect(page); SetPageDirect(page);
pc->ptes[NRPTE-1] = 0; pc->ptes[NRPTE-1] = 0;
pte_chain_free(pc); __pte_chain_free(pc);
} }
} }
return referenced; return referenced;
...@@ -170,10 +138,11 @@ int page_referenced(struct page * page) ...@@ -170,10 +138,11 @@ int page_referenced(struct page * page)
* Add a new pte reverse mapping to a page. * Add a new pte reverse mapping to a page.
* The caller needs to hold the mm->page_table_lock. * The caller needs to hold the mm->page_table_lock.
*/ */
void page_add_rmap(struct page * page, pte_t * ptep) struct pte_chain *
page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain)
{ {
pte_addr_t pte_paddr = ptep_to_paddr(ptep); pte_addr_t pte_paddr = ptep_to_paddr(ptep);
struct pte_chain *pte_chain; struct pte_chain *cur_pte_chain;
int i; int i;
#ifdef DEBUG_RMAP #ifdef DEBUG_RMAP
...@@ -186,7 +155,7 @@ void page_add_rmap(struct page * page, pte_t * ptep) ...@@ -186,7 +155,7 @@ void page_add_rmap(struct page * page, pte_t * ptep)
#endif #endif
if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
return; return pte_chain;
pte_chain_lock(page); pte_chain_lock(page);
...@@ -222,30 +191,28 @@ void page_add_rmap(struct page * page, pte_t * ptep) ...@@ -222,30 +191,28 @@ void page_add_rmap(struct page * page, pte_t * ptep)
if (PageDirect(page)) { if (PageDirect(page)) {
/* Convert a direct pointer into a pte_chain */ /* Convert a direct pointer into a pte_chain */
ClearPageDirect(page); ClearPageDirect(page);
pte_chain = pte_chain_alloc();
pte_chain->ptes[NRPTE-1] = page->pte.direct; pte_chain->ptes[NRPTE-1] = page->pte.direct;
pte_chain->ptes[NRPTE-2] = pte_paddr; pte_chain->ptes[NRPTE-2] = pte_paddr;
page->pte.direct = 0; page->pte.direct = 0;
page->pte.chain = pte_chain; page->pte.chain = pte_chain;
pte_chain = NULL; /* We consumed it */
goto out; goto out;
} }
pte_chain = page->pte.chain; cur_pte_chain = page->pte.chain;
if (pte_chain->ptes[0]) { /* It's full */ if (cur_pte_chain->ptes[0]) { /* It's full */
struct pte_chain *new; pte_chain->next = cur_pte_chain;
page->pte.chain = pte_chain;
new = pte_chain_alloc(); pte_chain->ptes[NRPTE-1] = pte_paddr;
new->next = pte_chain; pte_chain = NULL; /* We consumed it */
page->pte.chain = new;
new->ptes[NRPTE-1] = pte_paddr;
goto out; goto out;
} }
BUG_ON(!pte_chain->ptes[NRPTE-1]); BUG_ON(!cur_pte_chain->ptes[NRPTE-1]);
for (i = NRPTE-2; i >= 0; i--) { for (i = NRPTE-2; i >= 0; i--) {
if (!pte_chain->ptes[i]) { if (!cur_pte_chain->ptes[i]) {
pte_chain->ptes[i] = pte_paddr; cur_pte_chain->ptes[i] = pte_paddr;
goto out; goto out;
} }
} }
...@@ -253,7 +220,7 @@ void page_add_rmap(struct page * page, pte_t * ptep) ...@@ -253,7 +220,7 @@ void page_add_rmap(struct page * page, pte_t * ptep)
out: out:
pte_chain_unlock(page); pte_chain_unlock(page);
inc_page_state(nr_reverse_maps); inc_page_state(nr_reverse_maps);
return; return pte_chain;
} }
/** /**
...@@ -311,7 +278,7 @@ void page_remove_rmap(struct page * page, pte_t * ptep) ...@@ -311,7 +278,7 @@ void page_remove_rmap(struct page * page, pte_t * ptep)
if (victim_i == NRPTE-1) { if (victim_i == NRPTE-1) {
/* Emptied a pte_chain */ /* Emptied a pte_chain */
page->pte.chain = start->next; page->pte.chain = start->next;
pte_chain_free(start); __pte_chain_free(start);
} else { } else {
/* Do singleton->PageDirect here */ /* Do singleton->PageDirect here */
} }
...@@ -486,7 +453,7 @@ int try_to_unmap(struct page * page) ...@@ -486,7 +453,7 @@ int try_to_unmap(struct page * page)
victim_i++; victim_i++;
if (victim_i == NRPTE) { if (victim_i == NRPTE) {
page->pte.chain = start->next; page->pte.chain = start->next;
pte_chain_free(start); __pte_chain_free(start);
start = page->pte.chain; start = page->pte.chain;
victim_i = 0; victim_i = 0;
} }
...@@ -522,6 +489,58 @@ static void pte_chain_ctor(void *p, kmem_cache_t *cachep, unsigned long flags) ...@@ -522,6 +489,58 @@ static void pte_chain_ctor(void *p, kmem_cache_t *cachep, unsigned long flags)
memset(pc, 0, sizeof(*pc)); memset(pc, 0, sizeof(*pc));
} }
DEFINE_PER_CPU(struct pte_chain *, local_pte_chain) = 0;
/**
* __pte_chain_free - free pte_chain structure
* @pte_chain: pte_chain struct to free
*/
void __pte_chain_free(struct pte_chain *pte_chain)
{
int cpu = get_cpu();
struct pte_chain **pte_chainp;
if (pte_chain->next)
pte_chain->next = NULL;
pte_chainp = &per_cpu(local_pte_chain, cpu);
if (*pte_chainp)
kmem_cache_free(pte_chain_cache, *pte_chainp);
*pte_chainp = pte_chain;
put_cpu();
}
/*
* pte_chain_alloc(): allocate a pte_chain structure for use by page_add_rmap().
*
* The caller of page_add_rmap() must perform the allocation because
* page_add_rmap() is invariably called under spinlock. Often, page_add_rmap()
* will not actually use the pte_chain, because there is space available in one
* of the existing pte_chains which are attached to the page. So the case of
* allocating and then freeing a single pte_chain is specially optimised here,
* with a one-deep per-cpu cache.
*/
struct pte_chain *pte_chain_alloc(int gfp_flags)
{
int cpu;
struct pte_chain *ret;
struct pte_chain **pte_chainp;
if (gfp_flags & __GFP_WAIT)
might_sleep();
cpu = get_cpu();
pte_chainp = &per_cpu(local_pte_chain, cpu);
if (*pte_chainp) {
ret = *pte_chainp;
*pte_chainp = NULL;
put_cpu();
} else {
put_cpu();
ret = kmem_cache_alloc(pte_chain_cache, gfp_flags);
}
return ret;
}
void __init pte_chain_init(void) void __init pte_chain_init(void)
{ {
pte_chain_cache = kmem_cache_create( "pte_chain", pte_chain_cache = kmem_cache_create( "pte_chain",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment