Commit 0b5d6831 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] vmtrunc: vm_truncate_count race caution

Fix some unlikely races in respect of vm_truncate_count.

Firstly, it's supposed to be guarded by i_mmap_lock, but some places copy a
vma structure by *new_vma = *old_vma: if the compiler implements that with a
bytewise copy, new_vma->vm_truncate_count could be munged, and new_vma later
appear up-to-date when it's not; so set it properly once under lock.

vma_link set vm_truncate_count to mapping->truncate_count when adding an empty
vma: if new vmas are being added profusely while vmtruncate is in progess,
this lets them be skipped without scanning.

vma_adjust has vm_truncate_count problem much like it had with anon_vma under
mprotect merge: when merging be careful not to leave vma marked as up-to-date
when it might not be, lest unmap_mapping_range in progress - set
vm_truncate_count 0 when in doubt.  Similarly when mremap moving ptes from one
vma to another.

Cut a little code from __anon_vma_merge: now vma_adjust sets "importer" in the
remove_next case (to get its vm_truncate_count right), its anon_vma is already
linked by the time __anon_vma_merge is called.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 3ee07371
......@@ -219,6 +219,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
/* insert tmp into the share list, just after mpnt */
spin_lock(&file->f_mapping->i_mmap_lock);
tmp->vm_truncate_count = mpnt->vm_truncate_count;
flush_dcache_mmap_lock(file->f_mapping);
vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(file->f_mapping);
......
......@@ -308,8 +308,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
if (vma->vm_file)
mapping = vma->vm_file->f_mapping;
if (mapping)
if (mapping) {
spin_lock(&mapping->i_mmap_lock);
vma->vm_truncate_count = mapping->truncate_count;
}
anon_vma_lock(vma);
__vma_link(mm, vma, prev, rb_link, rb_parent);
......@@ -380,6 +382,7 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
again: remove_next = 1 + (end > next->vm_end);
end = next->vm_end;
anon_vma = next->anon_vma;
importer = vma;
} else if (end > next->vm_start) {
/*
* vma expands, overlapping part of the next:
......@@ -405,7 +408,16 @@ again: remove_next = 1 + (end > next->vm_end);
if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap;
spin_lock(&mapping->i_mmap_lock);
if (importer &&
vma->vm_truncate_count != next->vm_truncate_count) {
/*
* unmap_mapping_range might be in progress:
* ensure that the expanding vma is rescanned.
*/
importer->vm_truncate_count = 0;
}
if (insert) {
insert->vm_truncate_count = vma->vm_truncate_count;
/*
* Put into prio_tree now, so instantiated pages
* are visible to arm/parisc __flush_dcache_page
......
......@@ -100,7 +100,7 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
static int
move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr)
struct vm_area_struct *new_vma, unsigned long new_addr)
{
struct address_space *mapping = NULL;
struct mm_struct *mm = vma->vm_mm;
......@@ -116,6 +116,9 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
*/
mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock);
if (new_vma->vm_truncate_count &&
new_vma->vm_truncate_count != vma->vm_truncate_count)
new_vma->vm_truncate_count = 0;
}
spin_lock(&mm->page_table_lock);
......@@ -162,8 +165,8 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
}
static unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long old_addr,
unsigned long len)
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len)
{
unsigned long offset;
......@@ -175,7 +178,8 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
* only a few pages.. This also makes error recovery easier.
*/
for (offset = 0; offset < len; offset += PAGE_SIZE) {
if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
if (move_one_page(vma, old_addr + offset,
new_vma, new_addr + offset) < 0)
break;
cond_resched();
}
......@@ -206,14 +210,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (!new_vma)
return -ENOMEM;
moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
if (moved_len < old_len) {
/*
* On error, move entries back from new area to old,
* which will succeed since page tables still there,
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, old_addr, new_addr, moved_len);
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
......
......@@ -121,14 +121,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
{
if (!vma->anon_vma) {
BUG_ON(!next->anon_vma);
vma->anon_vma = next->anon_vma;
list_add(&vma->anon_vma_node, &next->anon_vma_node);
} else {
/* if they're both non-null they must be the same */
BUG_ON(vma->anon_vma != next->anon_vma);
}
list_del(&next->anon_vma_node);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment