Commit d9be9136 authored by Andrew Morton's avatar Andrew Morton Committed by Richard Henderson

[PATCH] turn i_shared_lock into a semaphore

i_shared_lock is held for a very long time during vmtruncate() and causes
high scheduling latencies when truncating a file which is mmapped.  I've seen
100 milliseconds.

So turn it into a semaphore.  It nests inside mmap_sem.

This change is also needed by the shared pagetable patch, which needs to
unshare pte's on the vmtruncate path - lots of pagetable pages need to
be allocated and they are using __GFP_WAIT.

The patch also makes unmap_vma() static.
parent b473e48b
...@@ -66,10 +66,9 @@ in some cases it is not really needed. Eg, vm_start is modified by ...@@ -66,10 +66,9 @@ in some cases it is not really needed. Eg, vm_start is modified by
expand_stack(), it is hard to come up with a destructive scenario without expand_stack(), it is hard to come up with a destructive scenario without
having the vmlist protection in this case. having the vmlist protection in this case.
The page_table_lock nests with the inode i_shared_lock and the kmem cache The page_table_lock nests with the inode i_shared_sem and the kmem cache
c_spinlock spinlocks. This is okay, since code that holds i_shared_lock c_spinlock spinlocks. This is okay, since the kmem code asks for pages after
never asks for memory, and the kmem code asks for pages after dropping dropping c_spinlock. The page_table_lock also nests with pagecache_lock and
c_spinlock. The page_table_lock also nests with pagecache_lock and
pagemap_lru_lock spinlocks, and no code asks for memory with these locks pagemap_lru_lock spinlocks, and no code asks for memory with these locks
held. held.
......
...@@ -1219,7 +1219,7 @@ static int __init init_blkmtd(void) ...@@ -1219,7 +1219,7 @@ static int __init init_blkmtd(void)
INIT_LIST_HEAD(&mtd_rawdevice->as.dirty_pages); INIT_LIST_HEAD(&mtd_rawdevice->as.dirty_pages);
INIT_LIST_HEAD(&mtd_rawdevice->as.locked_pages); INIT_LIST_HEAD(&mtd_rawdevice->as.locked_pages);
mtd_rawdevice->as.host = NULL; mtd_rawdevice->as.host = NULL;
spin_lock_init(&(mtd_rawdevice->as.i_shared_lock)); init_MUTEX(&(mtd_rawdevice->as.i_shared_sem));
mtd_rawdevice->as.a_ops = &blkmtd_aops; mtd_rawdevice->as.a_ops = &blkmtd_aops;
INIT_LIST_HEAD(&mtd_rawdevice->as.i_mmap); INIT_LIST_HEAD(&mtd_rawdevice->as.i_mmap);
......
...@@ -297,7 +297,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) ...@@ -297,7 +297,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
goto do_expand; goto do_expand;
inode->i_size = offset; inode->i_size = offset;
spin_lock(&mapping->i_shared_lock); down(&mapping->i_shared_sem);
if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared))
goto out_unlock; goto out_unlock;
if (!list_empty(&mapping->i_mmap)) if (!list_empty(&mapping->i_mmap))
...@@ -306,7 +306,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) ...@@ -306,7 +306,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff); hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
out_unlock: out_unlock:
spin_unlock(&mapping->i_shared_lock); up(&mapping->i_shared_sem);
truncate_hugepages(mapping, offset); truncate_hugepages(mapping, offset);
return 0; return 0;
......
...@@ -171,7 +171,7 @@ void inode_init_once(struct inode *inode) ...@@ -171,7 +171,7 @@ void inode_init_once(struct inode *inode)
sema_init(&inode->i_sem, 1); sema_init(&inode->i_sem, 1);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
rwlock_init(&inode->i_data.page_lock); rwlock_init(&inode->i_data.page_lock);
spin_lock_init(&inode->i_data.i_shared_lock); init_MUTEX(&inode->i_data.i_shared_sem);
INIT_LIST_HEAD(&inode->i_data.private_list); INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock); spin_lock_init(&inode->i_data.private_lock);
INIT_LIST_HEAD(&inode->i_data.i_mmap); INIT_LIST_HEAD(&inode->i_data.i_mmap);
......
...@@ -319,7 +319,7 @@ struct address_space { ...@@ -319,7 +319,7 @@ struct address_space {
struct address_space_operations *a_ops; /* methods */ struct address_space_operations *a_ops; /* methods */
struct list_head i_mmap; /* list of private mappings */ struct list_head i_mmap; /* list of private mappings */
struct list_head i_mmap_shared; /* list of private mappings */ struct list_head i_mmap_shared; /* list of private mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */ struct semaphore i_shared_sem; /* and sem protecting it */
unsigned long dirtied_when; /* jiffies of first page dirtying */ unsigned long dirtied_when; /* jiffies of first page dirtying */
int gfp_mask; /* how to allocate the pages */ int gfp_mask; /* how to allocate the pages */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */ struct backing_dev_info *backing_dev_info; /* device readahead, etc */
......
...@@ -529,7 +529,6 @@ extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned lon ...@@ -529,7 +529,6 @@ extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned lon
struct vm_area_struct **pprev); struct vm_area_struct **pprev);
extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below); unsigned long addr, int new_below);
extern void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area);
/* Look up the first VMA which intersects the interval start_addr..end_addr-1, /* Look up the first VMA which intersects the interval start_addr..end_addr-1,
NULL if none. Assume start_addr < end_addr. */ NULL if none. Assume start_addr < end_addr. */
......
...@@ -262,9 +262,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -262,9 +262,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
atomic_dec(&inode->i_writecount); atomic_dec(&inode->i_writecount);
/* insert tmp into the share list, just after mpnt */ /* insert tmp into the share list, just after mpnt */
spin_lock(&inode->i_mapping->i_shared_lock); down(&inode->i_mapping->i_shared_sem);
list_add_tail(&tmp->shared, &mpnt->shared); list_add_tail(&tmp->shared, &mpnt->shared);
spin_unlock(&inode->i_mapping->i_shared_lock); up(&inode->i_mapping->i_shared_sem);
} }
/* /*
......
...@@ -55,11 +55,14 @@ ...@@ -55,11 +55,14 @@
/* /*
* Lock ordering: * Lock ordering:
* *
* ->i_shared_lock (vmtruncate) * ->i_shared_sem (vmtruncate)
* ->private_lock (__free_pte->__set_page_dirty_buffers) * ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_list_lock * ->swap_list_lock
* ->swap_device_lock (exclusive_swap_page, others) * ->swap_device_lock (exclusive_swap_page, others)
* ->mapping->page_lock * ->mapping->page_lock
* ->mmap_sem
* ->i_shared_sem (various places)
*
* ->inode_lock * ->inode_lock
* ->sb_lock (fs/fs-writeback.c) * ->sb_lock (fs/fs-writeback.c)
* ->mapping->page_lock (__sync_single_inode) * ->mapping->page_lock (__sync_single_inode)
......
...@@ -968,7 +968,7 @@ int vmtruncate(struct inode * inode, loff_t offset) ...@@ -968,7 +968,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
if (inode->i_size < offset) if (inode->i_size < offset)
goto do_expand; goto do_expand;
inode->i_size = offset; inode->i_size = offset;
spin_lock(&mapping->i_shared_lock); down(&mapping->i_shared_sem);
if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared))
goto out_unlock; goto out_unlock;
...@@ -979,7 +979,7 @@ int vmtruncate(struct inode * inode, loff_t offset) ...@@ -979,7 +979,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
vmtruncate_list(&mapping->i_mmap_shared, pgoff); vmtruncate_list(&mapping->i_mmap_shared, pgoff);
out_unlock: out_unlock:
spin_unlock(&mapping->i_shared_lock); up(&mapping->i_shared_sem);
truncate_inode_pages(mapping, offset); truncate_inode_pages(mapping, offset);
goto out_truncate; goto out_truncate;
......
...@@ -132,7 +132,9 @@ int vm_enough_memory(long pages) ...@@ -132,7 +132,9 @@ int vm_enough_memory(long pages)
return 0; return 0;
} }
/* Remove one vm structure from the inode's i_mapping address space. */ /*
* Remove one vm structure from the inode's i_mapping address space.
*/
static void remove_shared_vm_struct(struct vm_area_struct *vma) static void remove_shared_vm_struct(struct vm_area_struct *vma)
{ {
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
...@@ -140,11 +142,11 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma) ...@@ -140,11 +142,11 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma)
if (file) { if (file) {
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
spin_lock(&inode->i_mapping->i_shared_lock); down(&inode->i_mapping->i_shared_sem);
if (vma->vm_flags & VM_DENYWRITE) if (vma->vm_flags & VM_DENYWRITE)
atomic_inc(&inode->i_writecount); atomic_inc(&inode->i_writecount);
list_del_init(&vma->shared); list_del_init(&vma->shared);
spin_unlock(&inode->i_mapping->i_shared_lock); up(&inode->i_mapping->i_shared_sem);
} }
} }
...@@ -346,12 +348,12 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -346,12 +348,12 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
mapping = vma->vm_file->f_dentry->d_inode->i_mapping; mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
if (mapping) if (mapping)
spin_lock(&mapping->i_shared_lock); down(&mapping->i_shared_sem);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
__vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link(mm, vma, prev, rb_link, rb_parent);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (mapping) if (mapping)
spin_unlock(&mapping->i_shared_lock); up(&mapping->i_shared_sem);
mm->map_count++; mm->map_count++;
validate_mm(mm); validate_mm(mm);
...@@ -955,7 +957,7 @@ static void free_pgtables(mmu_gather_t *tlb, struct vm_area_struct *prev, ...@@ -955,7 +957,7 @@ static void free_pgtables(mmu_gather_t *tlb, struct vm_area_struct *prev,
* By the time this function is called, the area struct has been * By the time this function is called, the area struct has been
* removed from the process mapping list. * removed from the process mapping list.
*/ */
void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
{ {
size_t len = area->vm_end - area->vm_start; size_t len = area->vm_end - area->vm_start;
...@@ -1339,7 +1341,7 @@ void exit_mmap(struct mm_struct * mm) ...@@ -1339,7 +1341,7 @@ void exit_mmap(struct mm_struct * mm)
/* Insert vm structure into process list sorted by address /* Insert vm structure into process list sorted by address
* and into the inode's i_mmap ring. If vm_file is non-NULL * and into the inode's i_mmap ring. If vm_file is non-NULL
* then the i_shared_lock must be held here. * then i_shared_sem is taken here.
*/ */
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
{ {
......
...@@ -363,4 +363,5 @@ void __init swap_setup(void) ...@@ -363,4 +363,5 @@ void __init swap_setup(void)
* Right now other parts of the system means that we * Right now other parts of the system means that we
* _really_ don't want to cluster much more * _really_ don't want to cluster much more
*/ */
init_MUTEX(&swapper_space.i_shared_sem);
} }
...@@ -42,7 +42,6 @@ struct address_space swapper_space = { ...@@ -42,7 +42,6 @@ struct address_space swapper_space = {
.host = &swapper_inode, .host = &swapper_inode,
.a_ops = &swap_aops, .a_ops = &swap_aops,
.backing_dev_info = &swap_backing_dev_info, .backing_dev_info = &swap_backing_dev_info,
.i_shared_lock = SPIN_LOCK_UNLOCKED,
.i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap), .i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap),
.i_mmap_shared = LIST_HEAD_INIT(swapper_space.i_mmap_shared), .i_mmap_shared = LIST_HEAD_INIT(swapper_space.i_mmap_shared),
.private_lock = SPIN_LOCK_UNLOCKED, .private_lock = SPIN_LOCK_UNLOCKED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment