Commit 068258f7 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 18: i_mmap_nonlinear

From: Hugh Dickins <hugh@veritas.com>

The prio_tree is of no use to nonlinear vmas: currently we're having to search
the tree in the most inefficient way to find all its nonlinears.  At the very
least we need an indication of the unlikely case when there are some
nonlinears; but really, we'd do best to take them out of the prio_tree
altogether, into a list of their own - i_mmap_nonlinear.
parent 2fe9c14c
...@@ -202,6 +202,7 @@ void inode_init_once(struct inode *inode) ...@@ -202,6 +202,7 @@ void inode_init_once(struct inode *inode)
spin_lock_init(&inode->i_data.private_lock); spin_lock_init(&inode->i_data.private_lock);
INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap); INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap_shared); INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap_shared);
INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
spin_lock_init(&inode->i_lock); spin_lock_init(&inode->i_lock);
i_size_ordered_init(inode); i_size_ordered_init(inode);
} }
......
...@@ -332,6 +332,7 @@ struct address_space { ...@@ -332,6 +332,7 @@ struct address_space {
struct address_space_operations *a_ops; /* methods */ struct address_space_operations *a_ops; /* methods */
struct prio_tree_root i_mmap; /* tree of private mappings */ struct prio_tree_root i_mmap; /* tree of private mappings */
struct prio_tree_root i_mmap_shared; /* tree of shared mappings */ struct prio_tree_root i_mmap_shared; /* tree of shared mappings */
struct list_head i_mmap_nonlinear;/*list of nonlinear mappings */
spinlock_t i_mmap_lock; /* protect trees & list above */ spinlock_t i_mmap_lock; /* protect trees & list above */
atomic_t truncate_count; /* Cover race condition with truncate */ atomic_t truncate_count; /* Cover race condition with truncate */
unsigned long flags; /* error bits/gfp mask */ unsigned long flags; /* error bits/gfp mask */
...@@ -382,7 +383,8 @@ int mapping_tagged(struct address_space *mapping, int tag); ...@@ -382,7 +383,8 @@ int mapping_tagged(struct address_space *mapping, int tag);
static inline int mapping_mapped(struct address_space *mapping) static inline int mapping_mapped(struct address_space *mapping)
{ {
return !prio_tree_empty(&mapping->i_mmap) || return !prio_tree_empty(&mapping->i_mmap) ||
!prio_tree_empty(&mapping->i_mmap_shared); !prio_tree_empty(&mapping->i_mmap_shared) ||
!list_empty(&mapping->i_mmap_nonlinear);
} }
/* /*
...@@ -393,7 +395,8 @@ static inline int mapping_mapped(struct address_space *mapping) ...@@ -393,7 +395,8 @@ static inline int mapping_mapped(struct address_space *mapping)
*/ */
static inline int mapping_writably_mapped(struct address_space *mapping) static inline int mapping_writably_mapped(struct address_space *mapping)
{ {
return !prio_tree_empty(&mapping->i_mmap_shared); return !prio_tree_empty(&mapping->i_mmap_shared) ||
!list_empty(&mapping->i_mmap_nonlinear);
} }
/* /*
......
...@@ -157,6 +157,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, ...@@ -157,6 +157,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long __prot, unsigned long pgoff, unsigned long flags) unsigned long __prot, unsigned long pgoff, unsigned long flags)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct address_space *mapping;
unsigned long end = start + size; unsigned long end = start + size;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int err = -EINVAL; int err = -EINVAL;
...@@ -197,8 +198,17 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, ...@@ -197,8 +198,17 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
end <= vma->vm_end) { end <= vma->vm_end) {
/* Must set VM_NONLINEAR before any pages are populated. */ /* Must set VM_NONLINEAR before any pages are populated. */
if (pgoff != linear_page_index(vma, start)) if (pgoff != linear_page_index(vma, start) &&
!(vma->vm_flags & VM_NONLINEAR)) {
mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock);
vma->vm_flags |= VM_NONLINEAR; vma->vm_flags |= VM_NONLINEAR;
vma_prio_tree_remove(vma, &mapping->i_mmap_shared);
vma_prio_tree_init(vma);
list_add_tail(&vma->shared.vm_set.list,
&mapping->i_mmap_nonlinear);
spin_unlock(&mapping->i_mmap_lock);
}
/* ->populate can take a long time, so downgrade the lock. */ /* ->populate can take a long time, so downgrade the lock. */
downgrade_write(&mm->mmap_sem); downgrade_write(&mm->mmap_sem);
......
...@@ -1116,8 +1116,6 @@ static void unmap_mapping_range_list(struct prio_tree_root *root, ...@@ -1116,8 +1116,6 @@ static void unmap_mapping_range_list(struct prio_tree_root *root,
while ((vma = vma_prio_tree_next(vma, root, &iter, while ((vma = vma_prio_tree_next(vma, root, &iter,
details->first_index, details->last_index)) != NULL) { details->first_index, details->last_index)) != NULL) {
if (unlikely(vma->vm_flags & VM_NONLINEAR))
continue;
vba = vma->vm_pgoff; vba = vma->vm_pgoff;
vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
/* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */
...@@ -1133,22 +1131,6 @@ static void unmap_mapping_range_list(struct prio_tree_root *root, ...@@ -1133,22 +1131,6 @@ static void unmap_mapping_range_list(struct prio_tree_root *root,
} }
} }
static void unmap_nonlinear_range_list(struct prio_tree_root *root,
struct zap_details *details)
{
struct vm_area_struct *vma = NULL;
struct prio_tree_iter iter;
while ((vma = vma_prio_tree_next(vma, root, &iter,
0, ULONG_MAX)) != NULL) {
if (!(vma->vm_flags & VM_NONLINEAR))
continue;
details->nonlinear_vma = vma;
zap_page_range(vma, vma->vm_start,
vma->vm_end - vma->vm_start, details);
}
}
/** /**
* unmap_mapping_range - unmap the portion of all mmaps * unmap_mapping_range - unmap the portion of all mmaps
* in the specified address_space corresponding to the specified * in the specified address_space corresponding to the specified
...@@ -1198,11 +1180,18 @@ void unmap_mapping_range(struct address_space *mapping, ...@@ -1198,11 +1180,18 @@ void unmap_mapping_range(struct address_space *mapping,
/* Don't waste time to check mapping on fully shared vmas */ /* Don't waste time to check mapping on fully shared vmas */
details.check_mapping = NULL; details.check_mapping = NULL;
if (unlikely(!prio_tree_empty(&mapping->i_mmap_shared))) { if (unlikely(!prio_tree_empty(&mapping->i_mmap_shared)))
unmap_mapping_range_list(&mapping->i_mmap_shared, &details); unmap_mapping_range_list(&mapping->i_mmap_shared, &details);
unmap_nonlinear_range_list(&mapping->i_mmap_shared, &details);
}
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) {
struct vm_area_struct *vma;
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
details.nonlinear_vma = vma;
zap_page_range(vma, vma->vm_start,
vma->vm_end - vma->vm_start, &details);
}
}
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
} }
EXPORT_SYMBOL(unmap_mapping_range); EXPORT_SYMBOL(unmap_mapping_range);
......
...@@ -72,7 +72,9 @@ static inline void __remove_shared_vm_struct(struct vm_area_struct *vma, ...@@ -72,7 +72,9 @@ static inline void __remove_shared_vm_struct(struct vm_area_struct *vma,
if (vma->vm_flags & VM_DENYWRITE) if (vma->vm_flags & VM_DENYWRITE)
atomic_inc(&file->f_dentry->d_inode->i_writecount); atomic_inc(&file->f_dentry->d_inode->i_writecount);
if (vma->vm_flags & VM_SHARED) if (unlikely(vma->vm_flags & VM_NONLINEAR))
list_del_init(&vma->shared.vm_set.list);
else if (vma->vm_flags & VM_SHARED)
vma_prio_tree_remove(vma, &mapping->i_mmap_shared); vma_prio_tree_remove(vma, &mapping->i_mmap_shared);
else else
vma_prio_tree_remove(vma, &mapping->i_mmap); vma_prio_tree_remove(vma, &mapping->i_mmap);
...@@ -262,7 +264,10 @@ static inline void __vma_link_file(struct vm_area_struct *vma) ...@@ -262,7 +264,10 @@ static inline void __vma_link_file(struct vm_area_struct *vma)
if (vma->vm_flags & VM_DENYWRITE) if (vma->vm_flags & VM_DENYWRITE)
atomic_dec(&file->f_dentry->d_inode->i_writecount); atomic_dec(&file->f_dentry->d_inode->i_writecount);
if (vma->vm_flags & VM_SHARED) if (unlikely(vma->vm_flags & VM_NONLINEAR))
list_add_tail(&vma->shared.vm_set.list,
&mapping->i_mmap_nonlinear);
else if (vma->vm_flags & VM_SHARED)
vma_prio_tree_insert(vma, &mapping->i_mmap_shared); vma_prio_tree_insert(vma, &mapping->i_mmap_shared);
else else
vma_prio_tree_insert(vma, &mapping->i_mmap); vma_prio_tree_insert(vma, &mapping->i_mmap);
...@@ -339,10 +344,10 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -339,10 +344,10 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
if (file) { if (file) {
mapping = file->f_mapping; mapping = file->f_mapping;
if (vma->vm_flags & VM_SHARED) if (!(vma->vm_flags & VM_SHARED))
root = &mapping->i_mmap_shared;
else
root = &mapping->i_mmap; root = &mapping->i_mmap;
else if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap_shared;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
} }
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
......
...@@ -530,6 +530,7 @@ static struct prio_tree_node *prio_tree_next(struct prio_tree_root *root, ...@@ -530,6 +530,7 @@ static struct prio_tree_node *prio_tree_next(struct prio_tree_root *root,
/* /*
* Add a new vma known to map the same set of pages as the old vma: * Add a new vma known to map the same set of pages as the old vma:
* useful for fork's dup_mmap as well as vma_prio_tree_insert below. * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
* Note that it just happens to work correctly on i_mmap_nonlinear too.
*/ */
void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old) void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
{ {
......
...@@ -335,10 +335,6 @@ static inline int page_referenced_file(struct page *page) ...@@ -335,10 +335,6 @@ static inline int page_referenced_file(struct page *page)
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared, while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
failed++;
continue;
}
if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) { if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) {
referenced++; referenced++;
goto out; goto out;
...@@ -352,8 +348,8 @@ static inline int page_referenced_file(struct page *page) ...@@ -352,8 +348,8 @@ static inline int page_referenced_file(struct page *page)
} }
} }
/* Hmm, but what of the nonlinears which pgoff,pgoff skipped? */ if (list_empty(&mapping->i_mmap_nonlinear))
WARN_ON(!failed); WARN_ON(!failed);
out: out:
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
return referenced; return referenced;
...@@ -757,8 +753,6 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -757,8 +753,6 @@ static inline int try_to_unmap_file(struct page *page)
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared, while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared,
&iter, pgoff, pgoff)) != NULL) { &iter, pgoff, pgoff)) != NULL) {
if (unlikely(vma->vm_flags & VM_NONLINEAR))
continue;
if (vma->vm_mm->rss) { if (vma->vm_mm->rss) {
address = vma_address(vma, pgoff); address = vma_address(vma, pgoff);
ret = try_to_unmap_one(page, ret = try_to_unmap_one(page,
...@@ -768,10 +762,12 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -768,10 +762,12 @@ static inline int try_to_unmap_file(struct page *page)
} }
} }
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared, if (list_empty(&mapping->i_mmap_nonlinear))
&iter, 0, ULONG_MAX)) != NULL) { goto out;
if (VM_NONLINEAR != (vma->vm_flags &
(VM_NONLINEAR|VM_LOCKED|VM_RESERVED))) list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) {
if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
continue; continue;
cursor = (unsigned long) vma->vm_private_data; cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor) if (cursor > max_nl_cursor)
...@@ -799,10 +795,9 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -799,10 +795,9 @@ static inline int try_to_unmap_file(struct page *page)
max_nl_cursor = CLUSTER_SIZE; max_nl_cursor = CLUSTER_SIZE;
do { do {
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared, list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
&iter, 0, ULONG_MAX)) != NULL) { shared.vm_set.list) {
if (VM_NONLINEAR != (vma->vm_flags & if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
(VM_NONLINEAR|VM_LOCKED|VM_RESERVED)))
continue; continue;
cursor = (unsigned long) vma->vm_private_data; cursor = (unsigned long) vma->vm_private_data;
while (vma->vm_mm->rss && while (vma->vm_mm->rss &&
...@@ -831,11 +826,9 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -831,11 +826,9 @@ static inline int try_to_unmap_file(struct page *page)
* in locked vmas). Reset cursor on all unreserved nonlinear * in locked vmas). Reset cursor on all unreserved nonlinear
* vmas, now forgetting on which ones it had fallen behind. * vmas, now forgetting on which ones it had fallen behind.
*/ */
vma = NULL; /* it is already, but above loop might change */ list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap_shared, shared.vm_set.list) {
&iter, 0, ULONG_MAX)) != NULL) { if (!(vma->vm_flags & VM_RESERVED))
if ((vma->vm_flags & (VM_NONLINEAR|VM_RESERVED)) ==
VM_NONLINEAR)
vma->vm_private_data = 0; vma->vm_private_data = 0;
} }
relock: relock:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment