Commit dd9fd0e0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap: nonlinear truncation

From: Hugh Dickins <hugh@veritas.com>

The earlier changes introducing PageAnon left truncated pages mapped into
nonlinear vmas unswappable.  Once we go to object-based rmap, it's
impossible to find where file page is mapped once page->mapping cleared:
switching them to anonymous is odd, and breaks strict commit accounting.

So now handle truncation of nonlinear vmas correctly.  And factor in
Daniel's cluster filesystem needs while we're there: when invalidating
local cache, we do want to unmap shared pages from all mms, but we do not
want to discard private COWed modifications of those pages (which
truncation discards to satisfy the SIGBUS semantics demanded by specs).

Drew from Daniel's patch (LKML 2 Mar 04), but didn't always follow it;
fewer name changes, but still some - "unmap" rather than "invalidate".
zap_page_range is not exported, safe to give it and all the too-many layers
an extra zap_details arg, in normal cases just NULL.

Given details, zap_pte_range checks page mapping or index to skip anon or
untruncated pages.  I didn't realize before implementing, that in nonlinear
case, it should set a file pte when truncating - otherwise linear pages
might appear in place of SIGBUS.  I suspect this implies that ->populate
functions ought to set file ptes beyond EOF instead of failing, but haven't
changed them as yet.

To avoid making yet another copy of that ugly linear pgidx test, added
inline function linear_page_index (to pagemap.h to get PAGE_CACHE_SIZE,
though as usual things don't really work if it differs from PAGE_SIZE). 
Ooh, I thought I'd removed ___add_to_page_cache last time, do so now.

unmap_page_range static, shift its hugepage check up into sole caller
unmap_vmas.  Killed "killme" debug from unmap_vmas, not seen it trigger.
unmap_mapping_range is exported without restriction: I'm one of those who
believe it should be generally available.  But I'm wrongly placed to decide
that, probably just sob quietly to myself if _GPL added later.
parent 3df9aaf3
...@@ -410,7 +410,7 @@ static inline size_t read_zero_pagealigned(char * buf, size_t size) ...@@ -410,7 +410,7 @@ static inline size_t read_zero_pagealigned(char * buf, size_t size)
if (count > size) if (count > size)
count = size; count = size;
zap_page_range(vma, addr, count); zap_page_range(vma, addr, count, NULL);
zeromap_page_range(vma, addr, count, PAGE_COPY); zeromap_page_range(vma, addr, count, PAGE_COPY);
size -= count; size -= count;
......
...@@ -439,22 +439,27 @@ struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags); ...@@ -439,22 +439,27 @@ struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
void shmem_lock(struct file * file, int lock); void shmem_lock(struct file * file, int lock);
int shmem_zero_setup(struct vm_area_struct *); int shmem_zero_setup(struct vm_area_struct *);
struct zap_details;
void zap_page_range(struct vm_area_struct *vma, unsigned long address, void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size); unsigned long size, struct zap_details *);
int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
struct vm_area_struct *start_vma, unsigned long start_addr, struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted); unsigned long end_addr, unsigned long *nr_accounted,
void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct zap_details *);
unsigned long address, unsigned long size);
void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr); void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src, int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma); struct vm_area_struct *vma);
int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
unsigned long size, pgprot_t prot); unsigned long size, pgprot_t prot);
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
static inline void unmap_shared_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen)
{
unmap_mapping_range(mapping, holebegin, holelen, 0);
}
extern void invalidate_mmap_range(struct address_space *mapping,
loff_t const holebegin,
loff_t const holelen);
extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate(struct inode * inode, loff_t offset);
extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
......
...@@ -139,14 +139,12 @@ static inline unsigned long get_page_cache_size(void) ...@@ -139,14 +139,12 @@ static inline unsigned long get_page_cache_size(void)
return atomic_read(&nr_pagecache); return atomic_read(&nr_pagecache);
} }
static inline void ___add_to_page_cache(struct page *page, static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
struct address_space *mapping, unsigned long index) unsigned long address)
{ {
page->mapping = mapping; pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
page->index = index; pgoff += vma->vm_pgoff;
return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
mapping->nrpages++;
pagecache_acct(1);
} }
extern void FASTCALL(__lock_page(struct page *page)); extern void FASTCALL(__lock_page(struct page *page));
......
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
* ->mapping->tree_lock * ->mapping->tree_lock
* *
* ->i_sem * ->i_sem
* ->i_shared_sem (truncate->invalidate_mmap_range) * ->i_shared_sem (truncate->unmap_mapping_range)
* *
* ->mmap_sem * ->mmap_sem
* ->i_shared_sem (various places) * ->i_shared_sem (various places)
...@@ -1363,11 +1363,7 @@ static int filemap_populate(struct vm_area_struct *vma, ...@@ -1363,11 +1363,7 @@ static int filemap_populate(struct vm_area_struct *vma,
* If a nonlinear mapping then store the file page offset * If a nonlinear mapping then store the file page offset
* in the pte. * in the pte.
*/ */
unsigned long pgidx; if (pgoff != linear_page_index(vma, addr)) {
pgidx = (addr - vma->vm_start) >> PAGE_SHIFT;
pgidx += vma->vm_pgoff;
pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
if (pgoff != pgidx) {
err = install_file_pte(mm, vma, addr, pgoff, prot); err = install_file_pte(mm, vma, addr, pgoff, prot);
if (err) if (err)
return err; return err;
......
...@@ -95,7 +95,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, ...@@ -95,7 +95,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
if (vma->vm_flags & VM_LOCKED) if (vma->vm_flags & VM_LOCKED)
return -EINVAL; return -EINVAL;
zap_page_range(vma, start, end - start); zap_page_range(vma, start, end - start, NULL);
return 0; return 0;
} }
......
This diff is collapsed.
...@@ -728,7 +728,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -728,7 +728,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
fput(file); fput(file);
/* Undo any partial mapping done by a device driver. */ /* Undo any partial mapping done by a device driver. */
zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start); zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
free_vma: free_vma:
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
unacct_error: unacct_error:
...@@ -1160,7 +1160,7 @@ static void unmap_region(struct mm_struct *mm, ...@@ -1160,7 +1160,7 @@ static void unmap_region(struct mm_struct *mm,
lru_add_drain(); lru_add_drain();
tlb = tlb_gather_mmu(mm, 0); tlb = tlb_gather_mmu(mm, 0);
unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted); unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted); vm_unacct_memory(nr_accounted);
if (is_hugepage_only_range(start, end - start)) if (is_hugepage_only_range(start, end - start))
...@@ -1446,7 +1446,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1446,7 +1446,7 @@ void exit_mmap(struct mm_struct *mm)
flush_cache_mm(mm); flush_cache_mm(mm);
/* Use ~0UL here to ensure all VMAs in the mm are unmapped */ /* Use ~0UL here to ensure all VMAs in the mm are unmapped */
mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0, mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
~0UL, &nr_accounted); ~0UL, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted); vm_unacct_memory(nr_accounted);
BUG_ON(mm->map_count); /* This is just debugging */ BUG_ON(mm->map_count); /* This is just debugging */
clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
......
...@@ -359,16 +359,12 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) ...@@ -359,16 +359,12 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr)
set_pte(ptep, swp_entry_to_pte(entry)); set_pte(ptep, swp_entry_to_pte(entry));
BUG_ON(pte_file(*ptep)); BUG_ON(pte_file(*ptep));
} else { } else {
unsigned long pgidx;
/* /*
* If a nonlinear mapping then store the file page offset * If a nonlinear mapping then store the file page offset
* in the pte. * in the pte.
*/ */
BUG_ON(!page->mapping); BUG_ON(!page->mapping);
pgidx = (address - vma->vm_start) >> PAGE_SHIFT; if (page->index != linear_page_index(vma, address)) {
pgidx += vma->vm_pgoff;
pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
if (page->index != pgidx) {
set_pte(ptep, pgoff_to_pte(page->index)); set_pte(ptep, pgoff_to_pte(page->index));
BUG_ON(!pte_file(*ptep)); BUG_ON(!pte_file(*ptep));
} }
......
...@@ -1055,11 +1055,7 @@ static int shmem_populate(struct vm_area_struct *vma, ...@@ -1055,11 +1055,7 @@ static int shmem_populate(struct vm_area_struct *vma,
* If a nonlinear mapping then store the file page * If a nonlinear mapping then store the file page
* offset in the pte. * offset in the pte.
*/ */
unsigned long pgidx; if (pgoff != linear_page_index(vma, addr)) {
pgidx = (addr - vma->vm_start) >> PAGE_SHIFT;
pgidx += vma->vm_pgoff;
pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
if (pgoff != pgidx) {
err = install_file_pte(mm, vma, addr, pgoff, prot); err = install_file_pte(mm, vma, addr, pgoff, prot);
if (err) if (err)
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment