Commit b5810039 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] core remove PageReserved

Remove PageReserved() calls from core code by tightening VM_RESERVED
handling in mm/ to cover PageReserved functionality.

PageReserved special casing is removed from get_page and put_page.

All setting and clearing of PageReserved is retained, and it is now flagged
in the page_alloc checks to help ensure we don't introduce any refcount
based freeing of Reserved pages.

MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being
deprecated.  We never completely handled it correctly anyway, and is be
reintroduced in future if required (Hugh has a proof of concept).

Once PageReserved() calls are removed from kernel/power/swsusp.c, and all
arch/ and driver code, the Set and Clear calls, and the PG_reserved bit can
be trivially removed.

Last real user of PageReserved is swsusp, which uses PageReserved to
determine whether a struct page points to valid memory or not.  This still
needs to be addressed (a generic page_is_ram() should work).

A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and
thus mapcounted and count towards shared rss).  These writes to the struct
page could cause excessive cacheline bouncing on big systems.  There are a
number of ways this could be addressed if it is an issue.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>

Refcount bug fix for filemap_xip.c
Signed-off-by: default avatarCarsten Otte <cotte@de.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent f9c98d02
...@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma, ...@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
return NOPAGE_SIGBUS; return NOPAGE_SIGBUS;
/* /*
* Last page is systemcfg, special handling here, no get_page() a * Last page is systemcfg.
* this is a reserved page
*/ */
if ((vma->vm_end - address) <= PAGE_SIZE) if ((vma->vm_end - address) <= PAGE_SIZE)
return virt_to_page(systemcfg); pg = virt_to_page(systemcfg);
else
pg = virt_to_page(vbase + offset);
pg = virt_to_page(vbase + offset);
get_page(pg); get_page(pg);
DBG(" ->page count: %d\n", page_count(pg)); DBG(" ->page count: %d\n", page_count(pg));
...@@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) ...@@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
* gettimeofday will be totally dead. It's fine to use that for setting * gettimeofday will be totally dead. It's fine to use that for setting
* breakpoints in the vDSO code pages though * breakpoints in the vDSO code pages though
*/ */
vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
vma->vm_flags |= mm->def_flags; vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
vma->vm_ops = &vdso_vmops; vma->vm_ops = &vdso_vmops;
...@@ -603,6 +603,8 @@ void __init vdso_init(void) ...@@ -603,6 +603,8 @@ void __init vdso_init(void)
ClearPageReserved(pg); ClearPageReserved(pg);
get_page(pg); get_page(pg);
} }
get_page(virt_to_page(systemcfg));
} }
int in_gate_area_no_task(unsigned long addr) int in_gate_area_no_task(unsigned long addr)
......
...@@ -73,6 +73,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, ...@@ -73,6 +73,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
int space = GET_IOSPACE(pfn); int space = GET_IOSPACE(pfn);
unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
/* See comment in mm/memory.c remap_pfn_range */
vma->vm_flags |= VM_IO | VM_RESERVED;
prot = __pgprot(pg_iobits); prot = __pgprot(pg_iobits);
offset -= from; offset -= from;
dir = pgd_offset(mm, from); dir = pgd_offset(mm, from);
......
...@@ -127,6 +127,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, ...@@ -127,6 +127,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
int space = GET_IOSPACE(pfn); int space = GET_IOSPACE(pfn);
unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
/* See comment in mm/memory.c remap_pfn_range */
vma->vm_flags |= VM_IO | VM_RESERVED;
prot = __pgprot(pg_iobits); prot = __pgprot(pg_iobits);
offset -= from; offset -= from;
dir = pgd_offset(mm, from); dir = pgd_offset(mm, from);
......
...@@ -1886,13 +1886,17 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages, ...@@ -1886,13 +1886,17 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
int i; int i;
for (i=0; i < nr_pages; i++) { for (i=0; i < nr_pages; i++) {
if (dirtied && !PageReserved(sgl[i].page)) struct page *page = sgl[i].page;
SetPageDirty(sgl[i].page);
/* unlock_page(sgl[i].page); */ /* XXX: just for debug. Remove when PageReserved is removed */
BUG_ON(PageReserved(page));
if (dirtied)
SetPageDirty(page);
/* unlock_page(page); */
/* FIXME: cache flush missing for rw==READ /* FIXME: cache flush missing for rw==READ
* FIXME: call the correct reference counting function * FIXME: call the correct reference counting function
*/ */
page_cache_release(sgl[i].page); page_cache_release(page);
} }
return 0; return 0;
......
...@@ -4526,12 +4526,16 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p ...@@ -4526,12 +4526,16 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
int i; int i;
for (i=0; i < nr_pages; i++) { for (i=0; i < nr_pages; i++) {
if (dirtied && !PageReserved(sgl[i].page)) struct page *page = sgl[i].page;
SetPageDirty(sgl[i].page);
/* XXX: just for debug. Remove when PageReserved is removed */
BUG_ON(PageReserved(page));
if (dirtied)
SetPageDirty(page);
/* FIXME: cache flush missing for rw==READ /* FIXME: cache flush missing for rw==READ
* FIXME: call the correct reference counting function * FIXME: call the correct reference counting function
*/ */
page_cache_release(sgl[i].page); page_cache_release(page);
} }
return 0; return 0;
......
...@@ -162,6 +162,7 @@ static int dio_refill_pages(struct dio *dio) ...@@ -162,6 +162,7 @@ static int dio_refill_pages(struct dio *dio)
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) { if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
struct page *page = ZERO_PAGE(dio->curr_user_address);
/* /*
* A memory fault, but the filesystem has some outstanding * A memory fault, but the filesystem has some outstanding
* mapped blocks. We need to use those blocks up to avoid * mapped blocks. We need to use those blocks up to avoid
...@@ -169,7 +170,8 @@ static int dio_refill_pages(struct dio *dio) ...@@ -169,7 +170,8 @@ static int dio_refill_pages(struct dio *dio)
*/ */
if (dio->page_errors == 0) if (dio->page_errors == 0)
dio->page_errors = ret; dio->page_errors = ret;
dio->pages[0] = ZERO_PAGE(dio->curr_user_address); page_cache_get(page);
dio->pages[0] = page;
dio->head = 0; dio->head = 0;
dio->tail = 1; dio->tail = 1;
ret = 0; ret = 0;
......
...@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp); ...@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ #define VM_RESERVED 0x00080000 /* Pages managed in a special way */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
...@@ -338,7 +338,7 @@ static inline void get_page(struct page *page) ...@@ -338,7 +338,7 @@ static inline void get_page(struct page *page)
static inline void put_page(struct page *page) static inline void put_page(struct page *page)
{ {
if (!PageReserved(page) && put_page_testzero(page)) if (put_page_testzero(page))
__page_cache_release(page); __page_cache_release(page);
} }
...@@ -723,6 +723,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); ...@@ -723,6 +723,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
int __set_page_dirty_buffers(struct page *page); int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page);
......
...@@ -578,15 +578,23 @@ static int save_highmem_zone(struct zone *zone) ...@@ -578,15 +578,23 @@ static int save_highmem_zone(struct zone *zone)
continue; continue;
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
/* /*
* This condition results from rvmalloc() sans vmalloc_32() * PageReserved results from rvmalloc() sans vmalloc_32()
* and architectural memory reservations. This should be * and architectural memory reservations.
* corrected eventually when the cases giving rise to this *
* are better understood. * rvmalloc should not cause this, because all implementations
* appear to always be using vmalloc_32 on architectures with
* highmem. This is a good thing, because we would like to save
* rvmalloc pages.
*
* It appears to be triggered by pages which do not point to
* valid memory (see arch/i386/mm/init.c:one_highpage_init(),
* which sets PageReserved if the page does not point to valid
* RAM.
*
* XXX: must remove usage of PageReserved!
*/ */
if (PageReserved(page)) { if (PageReserved(page))
printk("highmem reserved page?!\n");
continue; continue;
}
BUG_ON(PageNosave(page)); BUG_ON(PageNosave(page));
if (PageNosaveFree(page)) if (PageNosaveFree(page))
continue; continue;
...@@ -672,10 +680,9 @@ static int saveable(struct zone * zone, unsigned long * zone_pfn) ...@@ -672,10 +680,9 @@ static int saveable(struct zone * zone, unsigned long * zone_pfn)
return 0; return 0;
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page)) if (PageNosave(page))
return 0; return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) { if (pfn_is_nosave(pfn)) {
pr_debug("[nosave pfn 0x%lx]", pfn); pr_debug("[nosave pfn 0x%lx]", pfn);
return 0; return 0;
} }
......
...@@ -305,6 +305,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) ...@@ -305,6 +305,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
if (j + 16 < BITS_PER_LONG) if (j + 16 < BITS_PER_LONG)
prefetchw(page + j + 16); prefetchw(page + j + 16);
__ClearPageReserved(page + j); __ClearPageReserved(page + j);
set_page_count(page + j, 0);
} }
__free_pages(page, order); __free_pages(page, order);
i += BITS_PER_LONG; i += BITS_PER_LONG;
......
...@@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping, ...@@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping,
unsigned long address; unsigned long address;
pte_t *pte; pte_t *pte;
pte_t pteval; pte_t pteval;
struct page *page = ZERO_PAGE(address);
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
...@@ -185,15 +186,17 @@ __xip_unmap (struct address_space * mapping, ...@@ -185,15 +186,17 @@ __xip_unmap (struct address_space * mapping,
* We need the page_table_lock to protect us from page faults, * We need the page_table_lock to protect us from page faults,
* munmap, fork, etc... * munmap, fork, etc...
*/ */
pte = page_check_address(ZERO_PAGE(address), mm, pte = page_check_address(page, mm, address);
address);
if (!IS_ERR(pte)) { if (!IS_ERR(pte)) {
/* Nuke the page table entry. */ /* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte)); flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte); pteval = ptep_clear_flush(vma, address, pte);
page_remove_rmap(page);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval)); BUG_ON(pte_dirty(pteval));
pte_unmap(pte); pte_unmap(pte);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
page_cache_release(page);
} }
} }
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
...@@ -228,7 +231,7 @@ xip_file_nopage(struct vm_area_struct * area, ...@@ -228,7 +231,7 @@ xip_file_nopage(struct vm_area_struct * area,
page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
if (!IS_ERR(page)) { if (!IS_ERR(page)) {
return page; goto out;
} }
if (PTR_ERR(page) != -ENODATA) if (PTR_ERR(page) != -ENODATA)
return NULL; return NULL;
...@@ -249,6 +252,8 @@ xip_file_nopage(struct vm_area_struct * area, ...@@ -249,6 +252,8 @@ xip_file_nopage(struct vm_area_struct * area,
page = ZERO_PAGE(address); page = ZERO_PAGE(address);
} }
out:
page_cache_get(page);
return page; return page;
} }
......
...@@ -29,19 +29,20 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -29,19 +29,20 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
return; return;
if (pte_present(pte)) { if (pte_present(pte)) {
unsigned long pfn = pte_pfn(pte); unsigned long pfn = pte_pfn(pte);
struct page *page;
flush_cache_page(vma, addr, pfn); flush_cache_page(vma, addr, pfn);
pte = ptep_clear_flush(vma, addr, ptep); pte = ptep_clear_flush(vma, addr, ptep);
if (pfn_valid(pfn)) { if (unlikely(!pfn_valid(pfn))) {
struct page *page = pfn_to_page(pfn); print_bad_pte(vma, pte, addr);
if (!PageReserved(page)) { return;
if (pte_dirty(pte))
set_page_dirty(page);
page_remove_rmap(page);
page_cache_release(page);
dec_mm_counter(mm, file_rss);
}
} }
page = pfn_to_page(pfn);
if (pte_dirty(pte))
set_page_dirty(page);
page_remove_rmap(page);
page_cache_release(page);
dec_mm_counter(mm, file_rss);
} else { } else {
if (!pte_file(pte)) if (!pte_file(pte))
free_swap_and_cache(pte_to_swp_entry(pte)); free_swap_and_cache(pte_to_swp_entry(pte));
...@@ -65,6 +66,8 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -65,6 +66,8 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgd_t *pgd; pgd_t *pgd;
pte_t pte_val; pte_t pte_val;
BUG_ON(vma->vm_flags & VM_RESERVED);
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
...@@ -125,6 +128,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -125,6 +128,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
pgd_t *pgd; pgd_t *pgd;
pte_t pte_val; pte_t pte_val;
BUG_ON(vma->vm_flags & VM_RESERVED);
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
......
...@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, ...@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
*prev = vma; *prev = vma;
if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma)) if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED))
return -EINVAL; return -EINVAL;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) { if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
......
This diff is collapsed.
...@@ -223,13 +223,13 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) ...@@ -223,13 +223,13 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
} }
/* Ensure all existing pages follow the policy. */ /* Ensure all existing pages follow the policy. */
static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, nodemask_t *nodes) unsigned long addr, unsigned long end, nodemask_t *nodes)
{ {
pte_t *orig_pte; pte_t *orig_pte;
pte_t *pte; pte_t *pte;
spin_lock(&mm->page_table_lock); spin_lock(&vma->vm_mm->page_table_lock);
orig_pte = pte = pte_offset_map(pmd, addr); orig_pte = pte = pte_offset_map(pmd, addr);
do { do {
unsigned long pfn; unsigned long pfn;
...@@ -238,18 +238,20 @@ static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, ...@@ -238,18 +238,20 @@ static int check_pte_range(struct mm_struct *mm, pmd_t *pmd,
if (!pte_present(*pte)) if (!pte_present(*pte))
continue; continue;
pfn = pte_pfn(*pte); pfn = pte_pfn(*pte);
if (!pfn_valid(pfn)) if (!pfn_valid(pfn)) {
print_bad_pte(vma, *pte, addr);
continue; continue;
}
nid = pfn_to_nid(pfn); nid = pfn_to_nid(pfn);
if (!node_isset(nid, *nodes)) if (!node_isset(nid, *nodes))
break; break;
} while (pte++, addr += PAGE_SIZE, addr != end); } while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap(orig_pte); pte_unmap(orig_pte);
spin_unlock(&mm->page_table_lock); spin_unlock(&vma->vm_mm->page_table_lock);
return addr != end; return addr != end;
} }
static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end, nodemask_t *nodes) unsigned long addr, unsigned long end, nodemask_t *nodes)
{ {
pmd_t *pmd; pmd_t *pmd;
...@@ -260,13 +262,13 @@ static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, ...@@ -260,13 +262,13 @@ static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud,
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd)) if (pmd_none_or_clear_bad(pmd))
continue; continue;
if (check_pte_range(mm, pmd, addr, next, nodes)) if (check_pte_range(vma, pmd, addr, next, nodes))
return -EIO; return -EIO;
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
return 0; return 0;
} }
static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end, nodemask_t *nodes) unsigned long addr, unsigned long end, nodemask_t *nodes)
{ {
pud_t *pud; pud_t *pud;
...@@ -277,24 +279,24 @@ static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, ...@@ -277,24 +279,24 @@ static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd,
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
continue; continue;
if (check_pmd_range(mm, pud, addr, next, nodes)) if (check_pmd_range(vma, pud, addr, next, nodes))
return -EIO; return -EIO;
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
return 0; return 0;
} }
static inline int check_pgd_range(struct mm_struct *mm, static inline int check_pgd_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end, nodemask_t *nodes) unsigned long addr, unsigned long end, nodemask_t *nodes)
{ {
pgd_t *pgd; pgd_t *pgd;
unsigned long next; unsigned long next;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(vma->vm_mm, addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
if (check_pud_range(mm, pgd, addr, next, nodes)) if (check_pud_range(vma, pgd, addr, next, nodes))
return -EIO; return -EIO;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
return 0; return 0;
...@@ -311,6 +313,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, ...@@ -311,6 +313,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
first = find_vma(mm, start); first = find_vma(mm, start);
if (!first) if (!first)
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
if (first->vm_flags & VM_RESERVED)
return ERR_PTR(-EACCES);
prev = NULL; prev = NULL;
for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
if (!vma->vm_next && vma->vm_end < end) if (!vma->vm_next && vma->vm_end < end)
...@@ -323,8 +327,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, ...@@ -323,8 +327,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
endvma = end; endvma = end;
if (vma->vm_start > start) if (vma->vm_start > start)
start = vma->vm_start; start = vma->vm_start;
err = check_pgd_range(vma->vm_mm, err = check_pgd_range(vma, start, endvma, nodes);
start, endvma, nodes);
if (err) { if (err) {
first = ERR_PTR(err); first = ERR_PTR(err);
break; break;
......
...@@ -1088,6 +1088,17 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -1088,6 +1088,17 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
error = file->f_op->mmap(file, vma); error = file->f_op->mmap(file, vma);
if (error) if (error)
goto unmap_and_free_vma; goto unmap_and_free_vma;
if ((vma->vm_flags & (VM_SHARED | VM_WRITE | VM_RESERVED))
== (VM_WRITE | VM_RESERVED)) {
printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
"PROT_WRITE mmap of VM_RESERVED memory, which "
"is deprecated. Please report this to "
"linux-kernel@vger.kernel.org\n",current->comm);
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
error = -EACCES;
goto unmap_and_free_vma;
}
} else if (vm_flags & VM_SHARED) { } else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma); error = shmem_zero_setup(vma);
if (error) if (error)
......
...@@ -125,6 +125,14 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -125,6 +125,14 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* a MAP_NORESERVE private mapping to writable will now reserve. * a MAP_NORESERVE private mapping to writable will now reserve.
*/ */
if (newflags & VM_WRITE) { if (newflags & VM_WRITE) {
if (oldflags & VM_RESERVED) {
BUG_ON(oldflags & VM_WRITE);
printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
"PROT_WRITE mprotect of VM_RESERVED memory, "
"which is deprecated. Please report this to "
"linux-kernel@vger.kernel.org\n",current->comm);
return -EACCES;
}
if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
charged = nrpages; charged = nrpages;
if (security_vm_enough_memory(charged)) if (security_vm_enough_memory(charged))
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end) unsigned long addr, unsigned long end)
{ {
struct mm_struct *mm = vma->vm_mm;
pte_t *pte; pte_t *pte;
int progress = 0; int progress = 0;
...@@ -37,7 +38,7 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -37,7 +38,7 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (progress >= 64) { if (progress >= 64) {
progress = 0; progress = 0;
if (need_resched() || if (need_resched() ||
need_lockbreak(&vma->vm_mm->page_table_lock)) need_lockbreak(&mm->page_table_lock))
break; break;
} }
progress++; progress++;
...@@ -46,11 +47,11 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -46,11 +47,11 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (!pte_maybe_dirty(*pte)) if (!pte_maybe_dirty(*pte))
continue; continue;
pfn = pte_pfn(*pte); pfn = pte_pfn(*pte);
if (!pfn_valid(pfn)) if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, *pte, addr);
continue; continue;
}
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
if (PageReserved(page))
continue;
if (ptep_clear_flush_dirty(vma, addr, pte) || if (ptep_clear_flush_dirty(vma, addr, pte) ||
page_test_and_clear_dirty(page)) page_test_and_clear_dirty(page))
...@@ -58,7 +59,7 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -58,7 +59,7 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
progress += 3; progress += 3;
} while (pte++, addr += PAGE_SIZE, addr != end); } while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap(pte - 1); pte_unmap(pte - 1);
cond_resched_lock(&vma->vm_mm->page_table_lock); cond_resched_lock(&mm->page_table_lock);
if (addr != end) if (addr != end)
goto again; goto again;
} }
...@@ -102,8 +103,10 @@ static void msync_page_range(struct vm_area_struct *vma, ...@@ -102,8 +103,10 @@ static void msync_page_range(struct vm_area_struct *vma,
/* For hugepages we can't go walking the page table normally, /* For hugepages we can't go walking the page table normally,
* but that's ok, hugetlbfs is memory based, so we don't need * but that's ok, hugetlbfs is memory based, so we don't need
* to do anything more on an msync() */ * to do anything more on an msync().
if (is_vm_hugetlb_page(vma)) * Can't do anything with VM_RESERVED regions either.
*/
if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED))
return; return;
BUG_ON(addr >= end); BUG_ON(addr >= end);
......
...@@ -114,7 +114,8 @@ static void bad_page(const char *function, struct page *page) ...@@ -114,7 +114,8 @@ static void bad_page(const char *function, struct page *page)
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_slab | 1 << PG_slab |
1 << PG_swapcache | 1 << PG_swapcache |
1 << PG_writeback); 1 << PG_writeback |
1 << PG_reserved );
set_page_count(page, 0); set_page_count(page, 0);
reset_page_mapcount(page); reset_page_mapcount(page);
page->mapping = NULL; page->mapping = NULL;
...@@ -244,7 +245,6 @@ static inline int page_is_buddy(struct page *page, int order) ...@@ -244,7 +245,6 @@ static inline int page_is_buddy(struct page *page, int order)
{ {
if (PagePrivate(page) && if (PagePrivate(page) &&
(page_order(page) == order) && (page_order(page) == order) &&
!PageReserved(page) &&
page_count(page) == 0) page_count(page) == 0)
return 1; return 1;
return 0; return 0;
...@@ -327,7 +327,8 @@ static inline void free_pages_check(const char *function, struct page *page) ...@@ -327,7 +327,8 @@ static inline void free_pages_check(const char *function, struct page *page)
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_slab | 1 << PG_slab |
1 << PG_swapcache | 1 << PG_swapcache |
1 << PG_writeback ))) 1 << PG_writeback |
1 << PG_reserved )))
bad_page(function, page); bad_page(function, page);
if (PageDirty(page)) if (PageDirty(page))
__ClearPageDirty(page); __ClearPageDirty(page);
...@@ -455,7 +456,8 @@ static void prep_new_page(struct page *page, int order) ...@@ -455,7 +456,8 @@ static void prep_new_page(struct page *page, int order)
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_slab | 1 << PG_slab |
1 << PG_swapcache | 1 << PG_swapcache |
1 << PG_writeback ))) 1 << PG_writeback |
1 << PG_reserved )))
bad_page(__FUNCTION__, page); bad_page(__FUNCTION__, page);
page->flags &= ~(1 << PG_uptodate | 1 << PG_error | page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
...@@ -1016,7 +1018,7 @@ void __pagevec_free(struct pagevec *pvec) ...@@ -1016,7 +1018,7 @@ void __pagevec_free(struct pagevec *pvec)
fastcall void __free_pages(struct page *page, unsigned int order) fastcall void __free_pages(struct page *page, unsigned int order)
{ {
if (!PageReserved(page) && put_page_testzero(page)) { if (put_page_testzero(page)) {
if (order == 0) if (order == 0)
free_hot_page(page); free_hot_page(page);
else else
...@@ -1674,7 +1676,7 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -1674,7 +1676,7 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
continue; continue;
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn); set_page_links(page, zone, nid, pfn);
set_page_count(page, 0); set_page_count(page, 1);
reset_page_mapcount(page); reset_page_mapcount(page);
SetPageReserved(page); SetPageReserved(page);
INIT_LIST_HEAD(&page->lru); INIT_LIST_HEAD(&page->lru);
......
...@@ -443,8 +443,6 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) ...@@ -443,8 +443,6 @@ int page_referenced(struct page *page, int is_locked, int ignore_token)
void page_add_anon_rmap(struct page *page, void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address) struct vm_area_struct *vma, unsigned long address)
{ {
BUG_ON(PageReserved(page));
if (atomic_inc_and_test(&page->_mapcount)) { if (atomic_inc_and_test(&page->_mapcount)) {
struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma *anon_vma = vma->anon_vma;
...@@ -468,8 +466,7 @@ void page_add_anon_rmap(struct page *page, ...@@ -468,8 +466,7 @@ void page_add_anon_rmap(struct page *page,
void page_add_file_rmap(struct page *page) void page_add_file_rmap(struct page *page)
{ {
BUG_ON(PageAnon(page)); BUG_ON(PageAnon(page));
if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) BUG_ON(!pfn_valid(page_to_pfn(page)));
return;
if (atomic_inc_and_test(&page->_mapcount)) if (atomic_inc_and_test(&page->_mapcount))
inc_page_state(nr_mapped); inc_page_state(nr_mapped);
...@@ -483,8 +480,6 @@ void page_add_file_rmap(struct page *page) ...@@ -483,8 +480,6 @@ void page_add_file_rmap(struct page *page)
*/ */
void page_remove_rmap(struct page *page) void page_remove_rmap(struct page *page)
{ {
BUG_ON(PageReserved(page));
if (atomic_add_negative(-1, &page->_mapcount)) { if (atomic_add_negative(-1, &page->_mapcount)) {
BUG_ON(page_mapcount(page) < 0); BUG_ON(page_mapcount(page) < 0);
/* /*
...@@ -640,13 +635,13 @@ static void try_to_unmap_cluster(unsigned long cursor, ...@@ -640,13 +635,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
continue; continue;
pfn = pte_pfn(*pte); pfn = pte_pfn(*pte);
if (!pfn_valid(pfn)) if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, *pte, address);
continue; continue;
}
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
BUG_ON(PageAnon(page)); BUG_ON(PageAnon(page));
if (PageReserved(page))
continue;
if (ptep_clear_flush_young(vma, address, pte)) if (ptep_clear_flush_young(vma, address, pte))
continue; continue;
...@@ -808,7 +803,6 @@ int try_to_unmap(struct page *page) ...@@ -808,7 +803,6 @@ int try_to_unmap(struct page *page)
{ {
int ret; int ret;
BUG_ON(PageReserved(page));
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
if (PageAnon(page)) if (PageAnon(page))
......
...@@ -1506,8 +1506,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1506,8 +1506,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
*/ */
if (!offset) if (!offset)
mark_page_accessed(page); mark_page_accessed(page);
} else } else {
page = ZERO_PAGE(0); page = ZERO_PAGE(0);
page_cache_get(page);
}
/* /*
* Ok, we have the page, and it's up-to-date, so * Ok, we have the page, and it's up-to-date, so
......
...@@ -48,7 +48,7 @@ void put_page(struct page *page) ...@@ -48,7 +48,7 @@ void put_page(struct page *page)
} }
return; return;
} }
if (!PageReserved(page) && put_page_testzero(page)) if (put_page_testzero(page))
__page_cache_release(page); __page_cache_release(page);
} }
EXPORT_SYMBOL(put_page); EXPORT_SYMBOL(put_page);
...@@ -215,7 +215,7 @@ void release_pages(struct page **pages, int nr, int cold) ...@@ -215,7 +215,7 @@ void release_pages(struct page **pages, int nr, int cold)
struct page *page = pages[i]; struct page *page = pages[i];
struct zone *pagezone; struct zone *pagezone;
if (PageReserved(page) || !put_page_testzero(page)) if (!put_page_testzero(page))
continue; continue;
pagezone = page_zone(page); pagezone = page_zone(page);
......
...@@ -2949,8 +2949,7 @@ static struct page * snd_pcm_mmap_status_nopage(struct vm_area_struct *area, uns ...@@ -2949,8 +2949,7 @@ static struct page * snd_pcm_mmap_status_nopage(struct vm_area_struct *area, uns
return NOPAGE_OOM; return NOPAGE_OOM;
runtime = substream->runtime; runtime = substream->runtime;
page = virt_to_page(runtime->status); page = virt_to_page(runtime->status);
if (!PageReserved(page)) get_page(page);
get_page(page);
if (type) if (type)
*type = VM_FAULT_MINOR; *type = VM_FAULT_MINOR;
return page; return page;
...@@ -2992,8 +2991,7 @@ static struct page * snd_pcm_mmap_control_nopage(struct vm_area_struct *area, un ...@@ -2992,8 +2991,7 @@ static struct page * snd_pcm_mmap_control_nopage(struct vm_area_struct *area, un
return NOPAGE_OOM; return NOPAGE_OOM;
runtime = substream->runtime; runtime = substream->runtime;
page = virt_to_page(runtime->control); page = virt_to_page(runtime->control);
if (!PageReserved(page)) get_page(page);
get_page(page);
if (type) if (type)
*type = VM_FAULT_MINOR; *type = VM_FAULT_MINOR;
return page; return page;
...@@ -3066,8 +3064,7 @@ static struct page *snd_pcm_mmap_data_nopage(struct vm_area_struct *area, unsign ...@@ -3066,8 +3064,7 @@ static struct page *snd_pcm_mmap_data_nopage(struct vm_area_struct *area, unsign
vaddr = runtime->dma_area + offset; vaddr = runtime->dma_area + offset;
page = virt_to_page(vaddr); page = virt_to_page(vaddr);
} }
if (!PageReserved(page)) get_page(page);
get_page(page);
if (type) if (type)
*type = VM_FAULT_MINOR; *type = VM_FAULT_MINOR;
return page; return page;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment