Commit aaf2ef19 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] handle pte_chain_alloc() failures

Update page_add_rmap() callers to allocate their own pte_chain structures,
and to pass those into page_add_rmap().

The swapoff path has not yet been updated and is still oopsable.  The locking
there is tricky.
parent a3a31a5e
......@@ -45,6 +45,7 @@
#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/rmap-locking.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
......@@ -292,12 +293,13 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
pgd_t * pgd;
pmd_t * pmd;
pte_t * pte;
struct pte_chain *pte_chain;
if (page_count(page) != 1)
printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
pgd = pgd_offset(tsk->mm, address);
pte_chain = pte_chain_alloc(GFP_KERNEL);
spin_lock(&tsk->mm->page_table_lock);
pmd = pmd_alloc(tsk->mm, pgd, address);
if (!pmd)
......@@ -313,17 +315,19 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
flush_dcache_page(page);
flush_page_to_ram(page);
set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
page_add_rmap(page, pte);
pte_chain = page_add_rmap(page, pte, pte_chain);
pte_unmap(pte);
tsk->mm->rss++;
spin_unlock(&tsk->mm->page_table_lock);
/* no need for flush_tlb */
pte_chain_free(pte_chain);
return;
out:
spin_unlock(&tsk->mm->page_table_lock);
__free_page(page);
force_sig(SIGKILL, tsk);
pte_chain_free(pte_chain);
return;
}
......
......@@ -11,6 +11,7 @@
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swapops.h>
#include <linux/rmap-locking.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
......@@ -52,6 +53,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *pte, entry;
pgd_t *pgd;
pmd_t *pmd;
struct pte_chain *pte_chain = NULL;
pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock);
......@@ -60,6 +62,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pmd)
goto err_unlock;
pte_chain = pte_chain_alloc(GFP_KERNEL);
pte = pte_alloc_map(mm, pmd, addr);
if (!pte)
goto err_unlock;
......@@ -73,16 +76,17 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (prot & PROT_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
set_pte(pte, entry);
page_add_rmap(page, pte);
pte_chain = page_add_rmap(page, pte, pte_chain);
pte_unmap(pte);
flush_tlb_page(vma, addr);
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return 0;
err_unlock:
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return err;
}
......
......@@ -44,6 +44,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/vcache.h>
#include <linux/rmap-locking.h>
#include <asm/pgalloc.h>
#include <asm/rmap.h>
......@@ -210,10 +211,20 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
unsigned long address = vma->vm_start;
unsigned long end = vma->vm_end;
unsigned long cow;
struct pte_chain *pte_chain = NULL;
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst, src, vma);
pte_chain = pte_chain_alloc(GFP_ATOMIC);
if (!pte_chain) {
spin_unlock(&dst->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
spin_lock(&dst->page_table_lock);
if (!pte_chain)
goto nomem;
}
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
src_pgd = pgd_offset(src, address)-1;
dst_pgd = pgd_offset(dst, address)-1;
......@@ -306,7 +317,30 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
cont_copy_pte_range:
set_pte(dst_pte, pte);
page_add_rmap(page, dst_pte);
pte_chain = page_add_rmap(page, dst_pte,
pte_chain);
if (pte_chain)
goto cont_copy_pte_range_noset;
pte_chain = pte_chain_alloc(GFP_ATOMIC);
if (pte_chain)
goto cont_copy_pte_range_noset;
/*
* pte_chain allocation failed, and we need to
* run page reclaim.
*/
pte_unmap_nested(src_pte);
pte_unmap(dst_pte);
spin_unlock(&src->page_table_lock);
spin_unlock(&dst->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
spin_lock(&dst->page_table_lock);
if (!pte_chain)
goto nomem;
spin_lock(&src->page_table_lock);
dst_pte = pte_offset_map(dst_pmd, address);
src_pte = pte_offset_map_nested(src_pmd,
address);
cont_copy_pte_range_noset:
address += PAGE_SIZE;
if (address >= end) {
......@@ -329,12 +363,15 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
out_unlock:
spin_unlock(&src->page_table_lock);
out:
pte_chain_free(pte_chain);
return 0;
nomem:
pte_chain_free(pte_chain);
return -ENOMEM;
}
static void zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
static void
zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
{
unsigned long offset;
pte_t *ptep;
......@@ -816,6 +853,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
{
struct page *old_page, *new_page;
unsigned long pfn = pte_pfn(pte);
struct pte_chain *pte_chain = NULL;
if (!pfn_valid(pfn))
goto bad_wp_page;
......@@ -844,6 +882,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
if (!new_page)
goto no_mem;
copy_cow_page(old_page,new_page,address);
pte_chain = pte_chain_alloc(GFP_KERNEL);
/*
* Re-check the pte - we dropped the lock
......@@ -855,7 +894,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
++mm->rss;
page_remove_rmap(old_page, page_table);
break_cow(vma, new_page, address, page_table);
page_add_rmap(new_page, page_table);
pte_chain = page_add_rmap(new_page, page_table, pte_chain);
lru_cache_add_active(new_page);
/* Free the old page.. */
......@@ -865,6 +904,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
spin_unlock(&mm->page_table_lock);
page_cache_release(new_page);
page_cache_release(old_page);
pte_chain_free(pte_chain);
return VM_FAULT_MINOR;
bad_wp_page:
......@@ -1002,6 +1042,7 @@ static int do_swap_page(struct mm_struct * mm,
swp_entry_t entry = pte_to_swp_entry(orig_pte);
pte_t pte;
int ret = VM_FAULT_MINOR;
struct pte_chain *pte_chain = NULL;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
......@@ -1031,6 +1072,11 @@ static int do_swap_page(struct mm_struct * mm,
}
mark_page_accessed(page);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain) {
ret = -ENOMEM;
goto out;
}
lock_page(page);
/*
......@@ -1063,13 +1109,14 @@ static int do_swap_page(struct mm_struct * mm,
flush_page_to_ram(page);
flush_icache_page(vma, page);
set_pte(page_table, pte);
page_add_rmap(page, page_table);
pte_chain = page_add_rmap(page, page_table, pte_chain);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
out:
pte_chain_free(pte_chain);
return ret;
}
......@@ -1078,10 +1125,26 @@ static int do_swap_page(struct mm_struct * mm,
* spinlock held to protect against concurrent faults in
* multithreaded programs.
*/
static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr)
static int
do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *page_table, pmd_t *pmd, int write_access,
unsigned long addr)
{
pte_t entry;
struct page * page = ZERO_PAGE(addr);
struct pte_chain *pte_chain;
int ret;
pte_chain = pte_chain_alloc(GFP_ATOMIC);
if (!pte_chain) {
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
goto no_mem;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, addr);
}
/* Read-only mapping of ZERO_PAGE. */
entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
......@@ -1104,7 +1167,8 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
pte_unmap(page_table);
page_cache_release(page);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
ret = VM_FAULT_MINOR;
goto out;
}
mm->rss++;
flush_page_to_ram(page);
......@@ -1114,16 +1178,21 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
}
set_pte(page_table, entry);
page_add_rmap(page, page_table); /* ignores ZERO_PAGE */
/* ignores ZERO_PAGE */
pte_chain = page_add_rmap(page, page_table, pte_chain);
pte_unmap(page_table);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
ret = VM_FAULT_MINOR;
goto out;
no_mem:
return VM_FAULT_OOM;
ret = VM_FAULT_OOM;
out:
pte_chain_free(pte_chain);
return ret;
}
/*
......@@ -1138,14 +1207,17 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
* This is called with the MM semaphore held and the page table
* spinlock held. Exit with the spinlock released.
*/
static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
static int
do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
{
struct page * new_page;
pte_t entry;
struct pte_chain *pte_chain;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, pmd, write_access, address);
return do_anonymous_page(mm, vma, page_table,
pmd, write_access, address);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
......@@ -1172,6 +1244,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
new_page = page;
}
pte_chain = pte_chain_alloc(GFP_KERNEL);
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
......@@ -1194,19 +1267,21 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
if (write_access)
entry = pte_mkwrite(pte_mkdirty(entry));
set_pte(page_table, entry);
page_add_rmap(new_page, page_table);
pte_chain = page_add_rmap(new_page, page_table, pte_chain);
pte_unmap(page_table);
} else {
/* One of our sibling threads was faster, back out. */
pte_unmap(page_table);
page_cache_release(new_page);
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return VM_FAULT_MINOR;
}
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return VM_FAULT_MAJOR;
}
......
......@@ -15,6 +15,7 @@
#include <linux/swap.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/rmap-locking.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
......@@ -81,7 +82,9 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
return pte;
}
static int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
static int
copy_one_pte(struct mm_struct *mm, pte_t *src, pte_t *dst,
struct pte_chain **pte_chainp)
{
int error = 0;
pte_t pte;
......@@ -101,17 +104,25 @@ static int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
}
set_pte(dst, pte);
if (page)
page_add_rmap(page, dst);
*pte_chainp = page_add_rmap(page, dst, *pte_chainp);
}
return error;
}
static int move_one_page(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr)
static int
move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr)
{
struct mm_struct *mm = vma->vm_mm;
int error = 0;
pte_t *src, *dst;
struct pte_chain *pte_chain;
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain) {
error = -ENOMEM;
goto out;
}
spin_lock(&mm->page_table_lock);
src = get_one_pte_map_nested(mm, old_addr);
if (src) {
......@@ -127,12 +138,14 @@ static int move_one_page(struct vm_area_struct *vma, unsigned long old_addr, uns
dst = alloc_one_pte_map(mm, new_addr);
if (src == NULL)
src = get_one_pte_map_nested(mm, old_addr);
error = copy_one_pte(mm, src, dst);
error = copy_one_pte(mm, src, dst, &pte_chain);
pte_unmap_nested(src);
pte_unmap(dst);
}
flush_tlb_page(vma, old_addr);
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
out:
return error;
}
......
......@@ -20,6 +20,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/rmap-locking.h>
#include <asm/pgtable.h>
#include <linux/swapops.h>
......@@ -377,8 +378,9 @@ void free_swap_and_cache(swp_entry_t entry)
* what to do if a write is requested later.
*/
/* mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
pte_t *dir, swp_entry_t entry, struct page* page)
static void
unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
{
pte_t pte = *dir;
......@@ -388,7 +390,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
return;
get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
page_add_rmap(page, dir);
*pte_chainp = page_add_rmap(page, dir, *pte_chainp);
swap_free(entry);
++vma->vm_mm->rss;
}
......@@ -400,6 +402,7 @@ static void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
{
pte_t * pte;
unsigned long end;
struct pte_chain *pte_chain = NULL;
if (pmd_none(*dir))
return;
......@@ -415,11 +418,18 @@ static void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
if (end > PMD_SIZE)
end = PMD_SIZE;
do {
unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
/*
* FIXME: handle pte_chain_alloc() failures
*/
if (pte_chain == NULL)
pte_chain = pte_chain_alloc(GFP_ATOMIC);
unuse_pte(vma, offset+address-vma->vm_start,
pte, entry, page, &pte_chain);
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
pte_unmap(pte - 1);
pte_chain_free(pte_chain);
}
/* mmlist_lock and vma->vm_mm->page_table_lock are held */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment