Commit 16ceff2d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 22 flush_dcache_mmap_lock

From: Hugh Dickins <hugh@veritas.com>

arm and parisc __flush_dcache_page have been scanning the i_mmap(_shared) list
without locking or disabling preemption.  That may be even more unsafe now
it's a prio tree instead of a list.

It looks like we cannot use i_shared_lock for this protection: most uses of
flush_dcache_page are okay, and only one would need lock ordering fixed
(get_user_pages holds page_table_lock across flush_dcache_page); but there's a
few (e.g.  in net and ntfs) which look as if they're using it in I/O
completion - and it would be restrictive to disallow it there.

So, on arm and parisc only, define flush_dcache_mmap_lock(mapping) as
spin_lock_irq(&(mapping)->tree_lock); on i386 (and other arches left to the
next patch) define it away to nothing; and use where needed.

While updating locking hierarchy in filemap.c, remove two layers of the fossil
record from add_to_page_cache comment: no longer used for swap.

I believe all the #includes will work out, but have only built i386.  I can
see several things about this patch which might cause revulsion: the name
flush_dcache_mmap_lock?  the reuse of the page radix_tree's tree_lock for this
different purpose?  spin_lock_irqsave instead?  can't we somehow get
i_shared_lock to handle the problem?
parent b124bc14
...@@ -94,6 +94,8 @@ void __flush_dcache_page(struct page *page) ...@@ -94,6 +94,8 @@ void __flush_dcache_page(struct page *page)
* and invalidate any user data. * and invalidate any user data.
*/ */
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
flush_dcache_mmap_lock(mapping);
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap, while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
/* /*
...@@ -106,6 +108,7 @@ void __flush_dcache_page(struct page *page) ...@@ -106,6 +108,7 @@ void __flush_dcache_page(struct page *page)
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
flush_cache_page(mpnt, mpnt->vm_start + offset); flush_cache_page(mpnt, mpnt->vm_start + offset);
} }
flush_dcache_mmap_unlock(mapping);
} }
static void static void
...@@ -129,6 +132,7 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, ...@@ -129,6 +132,7 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page,
* space, then we need to handle them specially to maintain * space, then we need to handle them specially to maintain
* cache coherency. * cache coherency.
*/ */
flush_dcache_mmap_lock(mapping);
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap, while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
/* /*
...@@ -143,6 +147,7 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, ...@@ -143,6 +147,7 @@ make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page,
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
aliases += adjust_pte(mpnt, mpnt->vm_start + offset); aliases += adjust_pte(mpnt, mpnt->vm_start + offset);
} }
flush_dcache_mmap_unlock(mapping);
if (aliases) if (aliases)
adjust_pte(vma, addr); adjust_pte(vma, addr);
else else
......
...@@ -249,6 +249,7 @@ void __flush_dcache_page(struct page *page) ...@@ -249,6 +249,7 @@ void __flush_dcache_page(struct page *page)
* declared as MAP_PRIVATE or MAP_SHARED), so we only need * declared as MAP_PRIVATE or MAP_SHARED), so we only need
* to flush one address here for them all to become coherent */ * to flush one address here for them all to become coherent */
flush_dcache_mmap_lock(mapping);
while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap, while ((mpnt = vma_prio_tree_next(mpnt, &mapping->i_mmap,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
...@@ -266,8 +267,9 @@ void __flush_dcache_page(struct page *page) ...@@ -266,8 +267,9 @@ void __flush_dcache_page(struct page *page)
__flush_cache_page(mpnt, addr); __flush_cache_page(mpnt, addr);
return; break;
} }
flush_dcache_mmap_unlock(mapping);
} }
EXPORT_SYMBOL(__flush_dcache_page); EXPORT_SYMBOL(__flush_dcache_page);
......
...@@ -303,6 +303,11 @@ static inline void flush_dcache_page(struct page *page) ...@@ -303,6 +303,11 @@ static inline void flush_dcache_page(struct page *page)
__flush_dcache_page(page); __flush_dcache_page(page);
} }
#define flush_dcache_mmap_lock(mapping) \
spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_icache_user_range(vma,page,addr,len) \ #define flush_icache_user_range(vma,page,addr,len) \
flush_dcache_page(page) flush_dcache_page(page)
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0)
#define flush_cache_page(vma, vmaddr) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0)
#define flush_dcache_page(page) do { } while (0) #define flush_dcache_page(page) do { } while (0)
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_range(start, end) do { } while (0) #define flush_icache_range(start, end) do { } while (0)
#define flush_icache_page(vma,pg) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0)
#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) #define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
......
...@@ -78,6 +78,11 @@ static inline void flush_dcache_page(struct page *page) ...@@ -78,6 +78,11 @@ static inline void flush_dcache_page(struct page *page)
} }
} }
#define flush_dcache_mmap_lock(mapping) \
spin_lock_irq(&(mapping)->tree_lock)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_icache_page(vma,page) do { flush_kernel_dcache_page(page_address(page)); flush_kernel_icache_page(page_address(page)); } while (0) #define flush_icache_page(vma,page) do { flush_kernel_dcache_page(page_address(page)); flush_kernel_icache_page(page_address(page)); } while (0)
#define flush_icache_range(s,e) do { flush_kernel_dcache_range_asm(s,e); flush_kernel_icache_range_asm(s,e); } while (0) #define flush_icache_range(s,e) do { flush_kernel_dcache_range_asm(s,e); flush_kernel_icache_range_asm(s,e); } while (0)
......
...@@ -332,7 +332,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -332,7 +332,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
/* insert tmp into the share list, just after mpnt */ /* insert tmp into the share list, just after mpnt */
spin_lock(&file->f_mapping->i_mmap_lock); spin_lock(&file->f_mapping->i_mmap_lock);
flush_dcache_mmap_lock(mapping);
vma_prio_tree_add(tmp, mpnt); vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(mapping);
spin_unlock(&file->f_mapping->i_mmap_lock); spin_unlock(&file->f_mapping->i_mmap_lock);
} }
......
...@@ -65,7 +65,9 @@ ...@@ -65,7 +65,9 @@
* ->i_mmap_lock (truncate->unmap_mapping_range) * ->i_mmap_lock (truncate->unmap_mapping_range)
* *
* ->mmap_sem * ->mmap_sem
* ->i_mmap_lock (various places) * ->i_mmap_lock
* ->page_table_lock (various places, mainly in mmap.c)
* ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
* *
* ->mmap_sem * ->mmap_sem
* ->lock_page (access_process_vm) * ->lock_page (access_process_vm)
......
...@@ -202,11 +202,13 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, ...@@ -202,11 +202,13 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
!(vma->vm_flags & VM_NONLINEAR)) { !(vma->vm_flags & VM_NONLINEAR)) {
mapping = vma->vm_file->f_mapping; mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
flush_dcache_mmap_lock(mapping);
vma->vm_flags |= VM_NONLINEAR; vma->vm_flags |= VM_NONLINEAR;
vma_prio_tree_remove(vma, &mapping->i_mmap); vma_prio_tree_remove(vma, &mapping->i_mmap);
vma_prio_tree_init(vma); vma_prio_tree_init(vma);
list_add_tail(&vma->shared.vm_set.list, list_add_tail(&vma->shared.vm_set.list,
&mapping->i_mmap_nonlinear); &mapping->i_mmap_nonlinear);
flush_dcache_mmap_unlock(mapping);
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/tlb.h> #include <asm/tlb.h>
/* /*
...@@ -74,10 +75,12 @@ static inline void __remove_shared_vm_struct(struct vm_area_struct *vma, ...@@ -74,10 +75,12 @@ static inline void __remove_shared_vm_struct(struct vm_area_struct *vma,
if (vma->vm_flags & VM_SHARED) if (vma->vm_flags & VM_SHARED)
mapping->i_mmap_writable--; mapping->i_mmap_writable--;
flush_dcache_mmap_lock(mapping);
if (unlikely(vma->vm_flags & VM_NONLINEAR)) if (unlikely(vma->vm_flags & VM_NONLINEAR))
list_del_init(&vma->shared.vm_set.list); list_del_init(&vma->shared.vm_set.list);
else else
vma_prio_tree_remove(vma, &mapping->i_mmap); vma_prio_tree_remove(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
} }
/* /*
...@@ -266,11 +269,13 @@ static inline void __vma_link_file(struct vm_area_struct *vma) ...@@ -266,11 +269,13 @@ static inline void __vma_link_file(struct vm_area_struct *vma)
if (vma->vm_flags & VM_SHARED) if (vma->vm_flags & VM_SHARED)
mapping->i_mmap_writable++; mapping->i_mmap_writable++;
flush_dcache_mmap_lock(mapping);
if (unlikely(vma->vm_flags & VM_NONLINEAR)) if (unlikely(vma->vm_flags & VM_NONLINEAR))
list_add_tail(&vma->shared.vm_set.list, list_add_tail(&vma->shared.vm_set.list,
&mapping->i_mmap_nonlinear); &mapping->i_mmap_nonlinear);
else else
vma_prio_tree_insert(vma, &mapping->i_mmap); vma_prio_tree_insert(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
} }
} }
...@@ -350,14 +355,17 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -350,14 +355,17 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
} }
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (root) if (root) {
flush_dcache_mmap_lock(mapping);
vma_prio_tree_remove(vma, root); vma_prio_tree_remove(vma, root);
}
vma->vm_start = start; vma->vm_start = start;
vma->vm_end = end; vma->vm_end = end;
vma->vm_pgoff = pgoff; vma->vm_pgoff = pgoff;
if (root) { if (root) {
vma_prio_tree_init(vma); vma_prio_tree_init(vma);
vma_prio_tree_insert(vma, root); vma_prio_tree_insert(vma, root);
flush_dcache_mmap_unlock(mapping);
} }
if (next) { if (next) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment