Commit 0b5d6831 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] vmtrunc: vm_truncate_count race caution

Fix some unlikely races in respect of vm_truncate_count.

Firstly, it's supposed to be guarded by i_mmap_lock, but some places copy a
vma structure by *new_vma = *old_vma: if the compiler implements that with a
bytewise copy, new_vma->vm_truncate_count could be munged, and new_vma later
appear up-to-date when it's not; so set it properly once under lock.

vma_link set vm_truncate_count to mapping->truncate_count when adding an empty
vma: if new vmas are being added profusely while vmtruncate is in progess,
this lets them be skipped without scanning.

vma_adjust has vm_truncate_count problem much like it had with anon_vma under
mprotect merge: when merging be careful not to leave vma marked as up-to-date
when it might not be, lest unmap_mapping_range in progress - set
vm_truncate_count 0 when in doubt.  Similarly when mremap moving ptes from one
vma to another.

Cut a little code from __anon_vma_merge: now vma_adjust sets "importer" in the
remove_next case (to get its vm_truncate_count right), its anon_vma is already
linked by the time __anon_vma_merge is called.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 3ee07371
...@@ -219,6 +219,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -219,6 +219,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
/* insert tmp into the share list, just after mpnt */ /* insert tmp into the share list, just after mpnt */
spin_lock(&file->f_mapping->i_mmap_lock); spin_lock(&file->f_mapping->i_mmap_lock);
tmp->vm_truncate_count = mpnt->vm_truncate_count;
flush_dcache_mmap_lock(file->f_mapping); flush_dcache_mmap_lock(file->f_mapping);
vma_prio_tree_add(tmp, mpnt); vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(file->f_mapping); flush_dcache_mmap_unlock(file->f_mapping);
......
...@@ -308,8 +308,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -308,8 +308,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
if (vma->vm_file) if (vma->vm_file)
mapping = vma->vm_file->f_mapping; mapping = vma->vm_file->f_mapping;
if (mapping) if (mapping) {
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
vma->vm_truncate_count = mapping->truncate_count;
}
anon_vma_lock(vma); anon_vma_lock(vma);
__vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link(mm, vma, prev, rb_link, rb_parent);
...@@ -380,6 +382,7 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -380,6 +382,7 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
again: remove_next = 1 + (end > next->vm_end); again: remove_next = 1 + (end > next->vm_end);
end = next->vm_end; end = next->vm_end;
anon_vma = next->anon_vma; anon_vma = next->anon_vma;
importer = vma;
} else if (end > next->vm_start) { } else if (end > next->vm_start) {
/* /*
* vma expands, overlapping part of the next: * vma expands, overlapping part of the next:
...@@ -405,7 +408,16 @@ again: remove_next = 1 + (end > next->vm_end); ...@@ -405,7 +408,16 @@ again: remove_next = 1 + (end > next->vm_end);
if (!(vma->vm_flags & VM_NONLINEAR)) if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap; root = &mapping->i_mmap;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
if (importer &&
vma->vm_truncate_count != next->vm_truncate_count) {
/*
* unmap_mapping_range might be in progress:
* ensure that the expanding vma is rescanned.
*/
importer->vm_truncate_count = 0;
}
if (insert) { if (insert) {
insert->vm_truncate_count = vma->vm_truncate_count;
/* /*
* Put into prio_tree now, so instantiated pages * Put into prio_tree now, so instantiated pages
* are visible to arm/parisc __flush_dcache_page * are visible to arm/parisc __flush_dcache_page
......
...@@ -100,7 +100,7 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) ...@@ -100,7 +100,7 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
static int static int
move_one_page(struct vm_area_struct *vma, unsigned long old_addr, move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr) struct vm_area_struct *new_vma, unsigned long new_addr)
{ {
struct address_space *mapping = NULL; struct address_space *mapping = NULL;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
...@@ -116,6 +116,9 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -116,6 +116,9 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
*/ */
mapping = vma->vm_file->f_mapping; mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
if (new_vma->vm_truncate_count &&
new_vma->vm_truncate_count != vma->vm_truncate_count)
new_vma->vm_truncate_count = 0;
} }
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
...@@ -162,8 +165,8 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -162,8 +165,8 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
} }
static unsigned long move_page_tables(struct vm_area_struct *vma, static unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long old_addr, unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long len) unsigned long new_addr, unsigned long len)
{ {
unsigned long offset; unsigned long offset;
...@@ -175,7 +178,8 @@ static unsigned long move_page_tables(struct vm_area_struct *vma, ...@@ -175,7 +178,8 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
* only a few pages.. This also makes error recovery easier. * only a few pages.. This also makes error recovery easier.
*/ */
for (offset = 0; offset < len; offset += PAGE_SIZE) { for (offset = 0; offset < len; offset += PAGE_SIZE) {
if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0) if (move_one_page(vma, old_addr + offset,
new_vma, new_addr + offset) < 0)
break; break;
cond_resched(); cond_resched();
} }
...@@ -206,14 +210,14 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -206,14 +210,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (!new_vma) if (!new_vma)
return -ENOMEM; return -ENOMEM;
moved_len = move_page_tables(vma, new_addr, old_addr, old_len); moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
if (moved_len < old_len) { if (moved_len < old_len) {
/* /*
* On error, move entries back from new area to old, * On error, move entries back from new area to old,
* which will succeed since page tables still there, * which will succeed since page tables still there,
* and then proceed to unmap new area instead of old. * and then proceed to unmap new area instead of old.
*/ */
move_page_tables(new_vma, old_addr, new_addr, moved_len); move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
vma = new_vma; vma = new_vma;
old_len = new_len; old_len = new_len;
old_addr = new_addr; old_addr = new_addr;
......
...@@ -121,14 +121,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) ...@@ -121,14 +121,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
{ {
if (!vma->anon_vma) {
BUG_ON(!next->anon_vma);
vma->anon_vma = next->anon_vma;
list_add(&vma->anon_vma_node, &next->anon_vma_node);
} else {
/* if they're both non-null they must be the same */
BUG_ON(vma->anon_vma != next->anon_vma); BUG_ON(vma->anon_vma != next->anon_vma);
}
list_del(&next->anon_vma_node); list_del(&next->anon_vma_node);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment