Commit 0c751b9b authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] tmpfs truncate latency

High latency observed when deleting a large video file from tmpfs.

shmem_truncate held info->lock (easily dropped) and nested atomic kmaps
(more awkward to handle, since it's structured to keep them) while scanning
the many pages of its swap vector.  Now be prepared to cond_resched every
64 ops (but scan an empty page in one go).

shmem_free_pages to free a linked list of empty swap vector pages at the
end (cond_resched every 64): could still free them one by one in the main
loop, but this foreshadows scalability changes - which will want to use RCU
on them, only freeing the pages after a grace period.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent dd619f7b
...@@ -66,6 +66,9 @@ ...@@ -66,6 +66,9 @@
#define SHMEM_PAGEIN VM_READ #define SHMEM_PAGEIN VM_READ
#define SHMEM_TRUNCATE VM_WRITE #define SHMEM_TRUNCATE VM_WRITE
/* Definition to limit shmem_truncate's steps between cond_rescheds */
#define LATENCY_LIMIT 64
/* Pretend that each entry is of this size in directory's i_size */ /* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20 #define BOGO_DIRENT_SIZE 20
...@@ -188,13 +191,13 @@ static struct backing_dev_info shmem_backing_dev_info = { ...@@ -188,13 +191,13 @@ static struct backing_dev_info shmem_backing_dev_info = {
static LIST_HEAD(shmem_swaplist); static LIST_HEAD(shmem_swaplist);
static spinlock_t shmem_swaplist_lock = SPIN_LOCK_UNLOCKED; static spinlock_t shmem_swaplist_lock = SPIN_LOCK_UNLOCKED;
static void shmem_free_block(struct inode *inode) static void shmem_free_blocks(struct inode *inode, long pages)
{ {
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo) { if (sbinfo) {
spin_lock(&sbinfo->stat_lock); spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks++; sbinfo->free_blocks += pages;
inode->i_blocks -= BLOCKS_PER_PAGE; inode->i_blocks -= pages*BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock); spin_unlock(&sbinfo->stat_lock);
} }
} }
...@@ -219,15 +222,9 @@ static void shmem_recalc_inode(struct inode *inode) ...@@ -219,15 +222,9 @@ static void shmem_recalc_inode(struct inode *inode)
freed = info->alloced - info->swapped - inode->i_mapping->nrpages; freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
if (freed > 0) { if (freed > 0) {
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
info->alloced -= freed; info->alloced -= freed;
shmem_unacct_blocks(info->flags, freed); shmem_unacct_blocks(info->flags, freed);
if (sbinfo) { shmem_free_blocks(inode, freed);
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks += freed;
inode->i_blocks -= freed*BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
} }
} }
...@@ -380,7 +377,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long ...@@ -380,7 +377,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
spin_lock(&info->lock); spin_lock(&info->lock);
if (!page) { if (!page) {
shmem_free_block(inode); shmem_free_blocks(inode, 1);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
if (sgp != SGP_WRITE && if (sgp != SGP_WRITE &&
...@@ -393,7 +390,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long ...@@ -393,7 +390,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
} }
if (page) { if (page) {
/* another task gave its page, or truncated the file */ /* another task gave its page, or truncated the file */
shmem_free_block(inode); shmem_free_blocks(inode, 1);
shmem_dir_free(page); shmem_dir_free(page);
} }
if (info->next_index <= index && !IS_ERR(entry)) if (info->next_index <= index && !IS_ERR(entry))
...@@ -422,6 +419,49 @@ static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) ...@@ -422,6 +419,49 @@ static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
return freed; return freed;
} }
static int shmem_map_and_free_swp(struct page *subdir,
int offset, int limit, struct page ***dir)
{
swp_entry_t *ptr;
int freed = 0;
ptr = shmem_swp_map(subdir);
for (; offset < limit; offset += LATENCY_LIMIT) {
int size = limit - offset;
if (size > LATENCY_LIMIT)
size = LATENCY_LIMIT;
freed += shmem_free_swp(ptr+offset, ptr+offset+size);
if (need_resched()) {
shmem_swp_unmap(ptr);
if (*dir) {
shmem_dir_unmap(*dir);
*dir = NULL;
}
cond_resched();
ptr = shmem_swp_map(subdir);
}
}
shmem_swp_unmap(ptr);
return freed;
}
static void shmem_free_pages(struct list_head *next)
{
struct page *page;
int freed = 0;
do {
page = container_of(next, struct page, lru);
next = next->next;
shmem_dir_free(page);
freed++;
if (freed >= LATENCY_LIMIT) {
cond_resched();
freed = 0;
}
} while (next);
}
static void shmem_truncate(struct inode *inode) static void shmem_truncate(struct inode *inode)
{ {
struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_inode_info *info = SHMEM_I(inode);
...@@ -429,10 +469,15 @@ static void shmem_truncate(struct inode *inode) ...@@ -429,10 +469,15 @@ static void shmem_truncate(struct inode *inode)
unsigned long size; unsigned long size;
unsigned long limit; unsigned long limit;
unsigned long stage; unsigned long stage;
unsigned long diroff;
struct page **dir; struct page **dir;
struct page *topdir;
struct page *middir;
struct page *subdir; struct page *subdir;
struct page *empty;
swp_entry_t *ptr; swp_entry_t *ptr;
LIST_HEAD(pages_to_free);
long nr_pages_to_free = 0;
long nr_swaps_freed = 0;
int offset; int offset;
int freed; int freed;
...@@ -445,14 +490,22 @@ static void shmem_truncate(struct inode *inode) ...@@ -445,14 +490,22 @@ static void shmem_truncate(struct inode *inode)
info->flags |= SHMEM_TRUNCATE; info->flags |= SHMEM_TRUNCATE;
limit = info->next_index; limit = info->next_index;
info->next_index = idx; info->next_index = idx;
topdir = info->i_indirect;
if (topdir && idx <= SHMEM_NR_DIRECT) {
info->i_indirect = NULL;
nr_pages_to_free++;
list_add(&topdir->lru, &pages_to_free);
}
spin_unlock(&info->lock);
if (info->swapped && idx < SHMEM_NR_DIRECT) { if (info->swapped && idx < SHMEM_NR_DIRECT) {
ptr = info->i_direct; ptr = info->i_direct;
size = limit; size = limit;
if (size > SHMEM_NR_DIRECT) if (size > SHMEM_NR_DIRECT)
size = SHMEM_NR_DIRECT; size = SHMEM_NR_DIRECT;
info->swapped -= shmem_free_swp(ptr+idx, ptr+size); nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
} }
if (!info->i_indirect) if (!topdir)
goto done2; goto done2;
BUG_ON(limit <= SHMEM_NR_DIRECT); BUG_ON(limit <= SHMEM_NR_DIRECT);
...@@ -461,36 +514,38 @@ static void shmem_truncate(struct inode *inode) ...@@ -461,36 +514,38 @@ static void shmem_truncate(struct inode *inode)
offset = idx % ENTRIES_PER_PAGE; offset = idx % ENTRIES_PER_PAGE;
idx -= offset; idx -= offset;
empty = NULL; dir = shmem_dir_map(topdir);
dir = shmem_dir_map(info->i_indirect);
stage = ENTRIES_PER_PAGEPAGE/2; stage = ENTRIES_PER_PAGEPAGE/2;
if (idx < ENTRIES_PER_PAGEPAGE/2) if (idx < ENTRIES_PER_PAGEPAGE/2) {
dir += idx/ENTRIES_PER_PAGE; middir = topdir;
else { diroff = idx/ENTRIES_PER_PAGE;
} else {
dir += ENTRIES_PER_PAGE/2; dir += ENTRIES_PER_PAGE/2;
dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
while (stage <= idx) while (stage <= idx)
stage += ENTRIES_PER_PAGEPAGE; stage += ENTRIES_PER_PAGEPAGE;
middir = *dir;
if (*dir) { if (*dir) {
subdir = *dir; diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
size = ((idx - ENTRIES_PER_PAGEPAGE/2) %
ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
if (!size && !offset) { if (!diroff && !offset) {
empty = subdir;
*dir = NULL; *dir = NULL;
nr_pages_to_free++;
list_add(&middir->lru, &pages_to_free);
} }
shmem_dir_unmap(dir); shmem_dir_unmap(dir);
dir = shmem_dir_map(subdir) + size; dir = shmem_dir_map(middir);
} else { } else {
diroff = 0;
offset = 0; offset = 0;
idx = stage; idx = stage;
} }
} }
for (; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
if (unlikely(idx == stage)) { if (unlikely(idx == stage)) {
shmem_dir_unmap(dir-1); shmem_dir_unmap(dir);
dir = shmem_dir_map(info->i_indirect) + dir = shmem_dir_map(topdir) +
ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
while (!*dir) { while (!*dir) {
dir++; dir++;
...@@ -499,50 +554,43 @@ static void shmem_truncate(struct inode *inode) ...@@ -499,50 +554,43 @@ static void shmem_truncate(struct inode *inode)
goto done1; goto done1;
} }
stage = idx + ENTRIES_PER_PAGEPAGE; stage = idx + ENTRIES_PER_PAGEPAGE;
subdir = *dir; middir = *dir;
*dir = NULL; *dir = NULL;
nr_pages_to_free++;
list_add(&middir->lru, &pages_to_free);
shmem_dir_unmap(dir); shmem_dir_unmap(dir);
if (empty) { cond_resched();
shmem_dir_free(empty); dir = shmem_dir_map(middir);
shmem_free_block(inode); diroff = 0;
}
empty = subdir;
cond_resched_lock(&info->lock);
dir = shmem_dir_map(subdir);
} }
subdir = *dir; subdir = dir[diroff];
if (subdir && subdir->nr_swapped) { if (subdir && subdir->nr_swapped) {
ptr = shmem_swp_map(subdir);
size = limit - idx; size = limit - idx;
if (size > ENTRIES_PER_PAGE) if (size > ENTRIES_PER_PAGE)
size = ENTRIES_PER_PAGE; size = ENTRIES_PER_PAGE;
freed = shmem_free_swp(ptr+offset, ptr+size); freed = shmem_map_and_free_swp(subdir,
shmem_swp_unmap(ptr); offset, size, &dir);
info->swapped -= freed; if (!dir)
dir = shmem_dir_map(middir);
nr_swaps_freed += freed;
if (offset)
spin_lock(&info->lock);
subdir->nr_swapped -= freed; subdir->nr_swapped -= freed;
if (offset)
spin_unlock(&info->lock);
BUG_ON(subdir->nr_swapped > offset); BUG_ON(subdir->nr_swapped > offset);
} }
if (offset) if (offset)
offset = 0; offset = 0;
else if (subdir) { else if (subdir) {
*dir = NULL; dir[diroff] = NULL;
shmem_dir_free(subdir); nr_pages_to_free++;
shmem_free_block(inode); list_add(&subdir->lru, &pages_to_free);
} }
} }
done1: done1:
shmem_dir_unmap(dir-1); shmem_dir_unmap(dir);
if (empty) {
shmem_dir_free(empty);
shmem_free_block(inode);
}
if (info->next_index <= SHMEM_NR_DIRECT) {
shmem_dir_free(info->i_indirect);
info->i_indirect = NULL;
shmem_free_block(inode);
}
done2: done2:
BUG_ON(info->swapped > info->next_index);
if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
/* /*
* Call truncate_inode_pages again: racing shmem_unuse_inode * Call truncate_inode_pages again: racing shmem_unuse_inode
...@@ -551,13 +599,24 @@ static void shmem_truncate(struct inode *inode) ...@@ -551,13 +599,24 @@ static void shmem_truncate(struct inode *inode)
* Also, though shmem_getpage checks i_size before adding to * Also, though shmem_getpage checks i_size before adding to
* cache, no recheck after: so fix the narrow window there too. * cache, no recheck after: so fix the narrow window there too.
*/ */
spin_unlock(&info->lock);
truncate_inode_pages(inode->i_mapping, inode->i_size); truncate_inode_pages(inode->i_mapping, inode->i_size);
spin_lock(&info->lock);
} }
spin_lock(&info->lock);
info->flags &= ~SHMEM_TRUNCATE; info->flags &= ~SHMEM_TRUNCATE;
info->swapped -= nr_swaps_freed;
if (nr_pages_to_free)
shmem_free_blocks(inode, nr_pages_to_free);
shmem_recalc_inode(inode); shmem_recalc_inode(inode);
spin_unlock(&info->lock); spin_unlock(&info->lock);
/*
* Empty swap vector directory pages to be freed?
*/
if (!list_empty(&pages_to_free)) {
pages_to_free.prev->next = NULL;
shmem_free_pages(pages_to_free.next);
}
} }
static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
...@@ -665,9 +724,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s ...@@ -665,9 +724,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
} }
if (!info->i_indirect) if (!info->i_indirect)
goto lost2; goto lost2;
/* we might be racing with shmem_truncate */
if (limit <= SHMEM_NR_DIRECT)
goto lost2;
dir = shmem_dir_map(info->i_indirect); dir = shmem_dir_map(info->i_indirect);
stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
...@@ -1050,7 +1106,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1050,7 +1106,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
idx); idx);
if (!filepage) { if (!filepage) {
shmem_unacct_blocks(info->flags, 1); shmem_unacct_blocks(info->flags, 1);
shmem_free_block(inode); shmem_free_blocks(inode, 1);
error = -ENOMEM; error = -ENOMEM;
goto failed; goto failed;
} }
...@@ -1068,7 +1124,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1068,7 +1124,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
spin_unlock(&info->lock); spin_unlock(&info->lock);
page_cache_release(filepage); page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1); shmem_unacct_blocks(info->flags, 1);
shmem_free_block(inode); shmem_free_blocks(inode, 1);
filepage = NULL; filepage = NULL;
if (error) if (error)
goto failed; goto failed;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment