Commit d9be9136 authored by Andrew Morton's avatar Andrew Morton Committed by Richard Henderson

[PATCH] turn i_shared_lock into a semaphore

i_shared_lock is held for a very long time during vmtruncate() and causes
high scheduling latencies when truncating a file which is mmapped.  I've seen
100 milliseconds.

So turn it into a semaphore.  It nests inside mmap_sem.

This change is also needed by the shared pagetable patch, which needs to
unshare pte's on the vmtruncate path - lots of pagetable pages need to
be allocated and they are using __GFP_WAIT.

The patch also makes unmap_vma() static.
parent b473e48b
......@@ -66,10 +66,9 @@ in some cases it is not really needed. Eg, vm_start is modified by
expand_stack(), it is hard to come up with a destructive scenario without
having the vmlist protection in this case.
The page_table_lock nests with the inode i_shared_lock and the kmem cache
c_spinlock spinlocks. This is okay, since code that holds i_shared_lock
never asks for memory, and the kmem code asks for pages after dropping
c_spinlock. The page_table_lock also nests with pagecache_lock and
The page_table_lock nests with the inode i_shared_sem and the kmem cache
c_spinlock spinlocks. This is okay, since the kmem code asks for pages after
dropping c_spinlock. The page_table_lock also nests with pagecache_lock and
pagemap_lru_lock spinlocks, and no code asks for memory with these locks
held.
......
......@@ -1219,7 +1219,7 @@ static int __init init_blkmtd(void)
INIT_LIST_HEAD(&mtd_rawdevice->as.dirty_pages);
INIT_LIST_HEAD(&mtd_rawdevice->as.locked_pages);
mtd_rawdevice->as.host = NULL;
spin_lock_init(&(mtd_rawdevice->as.i_shared_lock));
init_MUTEX(&(mtd_rawdevice->as.i_shared_sem));
mtd_rawdevice->as.a_ops = &blkmtd_aops;
INIT_LIST_HEAD(&mtd_rawdevice->as.i_mmap);
......
......@@ -297,7 +297,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
goto do_expand;
inode->i_size = offset;
spin_lock(&mapping->i_shared_lock);
down(&mapping->i_shared_sem);
if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared))
goto out_unlock;
if (!list_empty(&mapping->i_mmap))
......@@ -306,7 +306,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
out_unlock:
spin_unlock(&mapping->i_shared_lock);
up(&mapping->i_shared_sem);
truncate_hugepages(mapping, offset);
return 0;
......
......@@ -171,7 +171,7 @@ void inode_init_once(struct inode *inode)
sema_init(&inode->i_sem, 1);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
rwlock_init(&inode->i_data.page_lock);
spin_lock_init(&inode->i_data.i_shared_lock);
init_MUTEX(&inode->i_data.i_shared_sem);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
INIT_LIST_HEAD(&inode->i_data.i_mmap);
......
......@@ -319,7 +319,7 @@ struct address_space {
struct address_space_operations *a_ops; /* methods */
struct list_head i_mmap; /* list of private mappings */
struct list_head i_mmap_shared; /* list of private mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */
struct semaphore i_shared_sem; /* and sem protecting it */
unsigned long dirtied_when; /* jiffies of first page dirtying */
int gfp_mask; /* how to allocate the pages */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
......
......@@ -529,7 +529,6 @@ extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned lon
struct vm_area_struct **pprev);
extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below);
extern void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area);
/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
NULL if none. Assume start_addr < end_addr. */
......
......@@ -262,9 +262,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
atomic_dec(&inode->i_writecount);
/* insert tmp into the share list, just after mpnt */
spin_lock(&inode->i_mapping->i_shared_lock);
down(&inode->i_mapping->i_shared_sem);
list_add_tail(&tmp->shared, &mpnt->shared);
spin_unlock(&inode->i_mapping->i_shared_lock);
up(&inode->i_mapping->i_shared_sem);
}
/*
......
......@@ -55,11 +55,14 @@
/*
* Lock ordering:
*
* ->i_shared_lock (vmtruncate)
* ->i_shared_sem (vmtruncate)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_list_lock
* ->swap_device_lock (exclusive_swap_page, others)
* ->mapping->page_lock
* ->mmap_sem
* ->i_shared_sem (various places)
*
* ->inode_lock
* ->sb_lock (fs/fs-writeback.c)
* ->mapping->page_lock (__sync_single_inode)
......
......@@ -968,7 +968,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
if (inode->i_size < offset)
goto do_expand;
inode->i_size = offset;
spin_lock(&mapping->i_shared_lock);
down(&mapping->i_shared_sem);
if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared))
goto out_unlock;
......@@ -979,7 +979,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
vmtruncate_list(&mapping->i_mmap_shared, pgoff);
out_unlock:
spin_unlock(&mapping->i_shared_lock);
up(&mapping->i_shared_sem);
truncate_inode_pages(mapping, offset);
goto out_truncate;
......
......@@ -132,7 +132,9 @@ int vm_enough_memory(long pages)
return 0;
}
/* Remove one vm structure from the inode's i_mapping address space. */
/*
* Remove one vm structure from the inode's i_mapping address space.
*/
static void remove_shared_vm_struct(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
......@@ -140,11 +142,11 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma)
if (file) {
struct inode *inode = file->f_dentry->d_inode;
spin_lock(&inode->i_mapping->i_shared_lock);
down(&inode->i_mapping->i_shared_sem);
if (vma->vm_flags & VM_DENYWRITE)
atomic_inc(&inode->i_writecount);
list_del_init(&vma->shared);
spin_unlock(&inode->i_mapping->i_shared_lock);
up(&inode->i_mapping->i_shared_sem);
}
}
......@@ -346,12 +348,12 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
if (mapping)
spin_lock(&mapping->i_shared_lock);
down(&mapping->i_shared_sem);
spin_lock(&mm->page_table_lock);
__vma_link(mm, vma, prev, rb_link, rb_parent);
spin_unlock(&mm->page_table_lock);
if (mapping)
spin_unlock(&mapping->i_shared_lock);
up(&mapping->i_shared_sem);
mm->map_count++;
validate_mm(mm);
......@@ -955,7 +957,7 @@ static void free_pgtables(mmu_gather_t *tlb, struct vm_area_struct *prev,
* By the time this function is called, the area struct has been
* removed from the process mapping list.
*/
void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
{
size_t len = area->vm_end - area->vm_start;
......@@ -1339,7 +1341,7 @@ void exit_mmap(struct mm_struct * mm)
/* Insert vm structure into process list sorted by address
* and into the inode's i_mmap ring. If vm_file is non-NULL
* then the i_shared_lock must be held here.
* then i_shared_sem is taken here.
*/
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
{
......
......@@ -363,4 +363,5 @@ void __init swap_setup(void)
* Right now other parts of the system means that we
* _really_ don't want to cluster much more
*/
init_MUTEX(&swapper_space.i_shared_sem);
}
......@@ -42,7 +42,6 @@ struct address_space swapper_space = {
.host = &swapper_inode,
.a_ops = &swap_aops,
.backing_dev_info = &swap_backing_dev_info,
.i_shared_lock = SPIN_LOCK_UNLOCKED,
.i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap),
.i_mmap_shared = LIST_HEAD_INIT(swapper_space.i_mmap_shared),
.private_lock = SPIN_LOCK_UNLOCKED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment