Commit 8a1a48b7 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] vmtrunc: restart_addr in truncate_count

Despite its restart_pgoff pretentions, unmap_mapping_range_vma was fatally
unable to distinguish a vma to be restarted from the case where that vma
has been freed, and its vm_area_struct reused for the top part of a
!new_below split of an isomorphic vma yet to be scanned.

The obvious answer is to note restart_vma in the struct address_space, and
cancel it when that vma is freed; but I'm reluctant to enlarge every struct
inode just for this.  Another answer is to flag valid restart in the
vm_area_struct; but vm_flags is protected by down_write of mmap_sem, which
we cannot take within down_write of i_sem.  If we're going to need yet
another field, better to record the restart_addr itself: restart_vma only
recorded the last restart, but a busy tree could well use more.

Actually, we don't need another field: we can neatly (though naughtily)
keep restart_addr in vm_truncate_count, provided mapping->truncate_count
leaps over those values which look like a page-aligned address.  Zero
remains good for forcing a scan (though now interpreted as restart_addr 0),
and it turns out no change is needed to any of the vm_truncate_count
settings in dup_mmap, vma_link, vma_adjust, move_one_page.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d5c772ed
...@@ -105,7 +105,7 @@ struct vm_area_struct { ...@@ -105,7 +105,7 @@ struct vm_area_struct {
units, *not* PAGE_CACHE_SIZE */ units, *not* PAGE_CACHE_SIZE */
struct file * vm_file; /* File we map to (can be NULL). */ struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */ void * vm_private_data; /* was vm_pte (shared mem) */
unsigned int vm_truncate_count; /* compare mapping->truncate_count */ unsigned long vm_truncate_count;/* truncate_count or restart_addr */
#ifndef CONFIG_MMU #ifndef CONFIG_MMU
atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */
...@@ -579,11 +579,8 @@ struct zap_details { ...@@ -579,11 +579,8 @@ struct zap_details {
pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */
spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */
struct vm_area_struct *restart_vma; /* Where lock was dropped */
pgoff_t restart_pgoff; /* File offset for restart */
unsigned long restart_addr; /* Where we should restart */
unsigned long break_addr; /* Where unmap_vmas stopped */ unsigned long break_addr; /* Where unmap_vmas stopped */
unsigned int truncate_count; /* Compare vm_truncate_count */ unsigned long truncate_count; /* Compare vm_truncate_count */
}; };
void zap_page_range(struct vm_area_struct *vma, unsigned long address, void zap_page_range(struct vm_area_struct *vma, unsigned long address,
......
...@@ -1392,20 +1392,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, ...@@ -1392,20 +1392,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
* *
* In order to make forward progress despite repeatedly restarting some * In order to make forward progress despite repeatedly restarting some
* large vma, note the break_addr set by unmap_vmas when it breaks out: * large vma, note the break_addr set by unmap_vmas when it breaks out:
* and restart from that address when we reach that vma again (so long * and restart from that address when we reach that vma again. It might
* as another such was not inserted earlier while the lock was dropped). * have been split or merged, shrunk or extended, but never shifted: so
* * restart_addr remains valid so long as it remains in the vma's range.
* There is no guarantee that the restart_vma will remain intact when * unmap_mapping_range forces truncate_count to leap over page-aligned
* the lock is regained: but if it has been freed to some other use, * values so we can save vma's restart_addr in its truncate_count field.
* it cannot then appear in the tree or list of vmas, so no harm done;
* if it has been reused for a new vma of the same mapping, nopage
* checks on i_size and truncate_count ensure it cannot be mapping any
* of the truncated pages, so the area below restart_addr is still safe
* to skip - but we must check pgoff to prevent spurious unmapping; or
* restart_vma may have been split or merged, shrunk or extended - but
* never shifted, so restart_addr and restart_pgoff remain in synch
* (even in the case of mremap move, which makes a copy of the vma).
*/ */
#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
static void reset_vma_truncate_counts(struct address_space *mapping) static void reset_vma_truncate_counts(struct address_space *mapping)
{ {
...@@ -1422,26 +1415,20 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, ...@@ -1422,26 +1415,20 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
unsigned long start_addr, unsigned long end_addr, unsigned long start_addr, unsigned long end_addr,
struct zap_details *details) struct zap_details *details)
{ {
unsigned long restart_addr;
int need_break; int need_break;
if (vma == details->restart_vma && again:
start_addr < details->restart_addr) { restart_addr = vma->vm_truncate_count;
/* if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
* Be careful: this vm_area_struct may have been reused start_addr = restart_addr;
* meanwhile. If pgoff matches up, it's probably the if (start_addr >= end_addr) {
* same one (perhaps shrunk or extended), but no harm is /* Top of vma has been split off since last time */
* done if actually it's new. If pgoff does not match,
* it would likely be wrong to unmap from restart_addr,
* but it must be new, so we can just mark it completed.
*/
start_addr = details->restart_addr;
if (linear_page_index(vma, start_addr) !=
details->restart_pgoff || start_addr >= end_addr) {
vma->vm_truncate_count = details->truncate_count; vma->vm_truncate_count = details->truncate_count;
return 0; return 0;
} }
} }
again:
details->break_addr = end_addr; details->break_addr = end_addr;
zap_page_range(vma, start_addr, end_addr - start_addr, details); zap_page_range(vma, start_addr, end_addr - start_addr, details);
...@@ -1460,14 +1447,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, ...@@ -1460,14 +1447,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
if (!need_break) if (!need_break)
return 0; return 0;
} else { } else {
if (!need_break) { /* Note restart_addr in vma's truncate_count field */
start_addr = details->break_addr; vma->vm_truncate_count = details->break_addr;
if (!need_break)
goto again; goto again;
}
details->restart_vma = vma;
details->restart_pgoff =
linear_page_index(vma, details->break_addr);
details->restart_addr = details->break_addr;
} }
spin_unlock(details->i_mmap_lock); spin_unlock(details->i_mmap_lock);
...@@ -1569,15 +1552,15 @@ void unmap_mapping_range(struct address_space *mapping, ...@@ -1569,15 +1552,15 @@ void unmap_mapping_range(struct address_space *mapping,
if (details.last_index < details.first_index) if (details.last_index < details.first_index)
details.last_index = ULONG_MAX; details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock; details.i_mmap_lock = &mapping->i_mmap_lock;
details.restart_vma = NULL;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
/* Protect against page faults, and endless unmapping loops */ /* Protect against page faults, and endless unmapping loops */
mapping->truncate_count++; mapping->truncate_count++;
if (unlikely(mapping->truncate_count == 0)) { if (unlikely(is_restart_addr(mapping->truncate_count))) {
if (mapping->truncate_count == 0)
reset_vma_truncate_counts(mapping);
mapping->truncate_count++; mapping->truncate_count++;
reset_vma_truncate_counts(mapping);
} }
details.truncate_count = mapping->truncate_count; details.truncate_count = mapping->truncate_count;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment