Commit deda1b5e authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] try to remove buffer_heads from to-be-reaped inodes

Stephen Tweedie reports a 2.4.7 problem in which kswapd is chewing lots
of CPU trying to reclaim inodes which are pinned by buffer_heads at
i_dirty_buffers.

This can only happen when there's memory pressure on ZONE_HIGHMEM - the
2.4 kernel runs shrink_icache_memory in that case as well.  But there's
no reclaim pressure on ZONE_NORMAL so the VM is never running
try_to_free_buffers() against the ZONE_NORMAL buffers which are pinning
the inodes.

The 2.5 kernel also runs the slab shrinkers in response to ZONE_HIGHMEM
pressure.  This may be wrong - still thinking about that.

This patch arranges for prune_icache to try to remove the inode's buffers
when the inode is to be reclaimed.

It also changes inode_has_buffers() and the other inode-buffer-list
functions to look at inode->i_data, not inode->i_mapping.  The latter
was wrong.
parent db054df8
...@@ -678,7 +678,7 @@ static inline void __remove_assoc_queue(struct buffer_head *bh) ...@@ -678,7 +678,7 @@ static inline void __remove_assoc_queue(struct buffer_head *bh)
int inode_has_buffers(struct inode *inode) int inode_has_buffers(struct inode *inode)
{ {
return !list_empty(&inode->i_mapping->private_list); return !list_empty(&inode->i_data.private_list);
} }
/* /*
...@@ -858,7 +858,7 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list) ...@@ -858,7 +858,7 @@ int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
void invalidate_inode_buffers(struct inode *inode) void invalidate_inode_buffers(struct inode *inode)
{ {
if (inode_has_buffers(inode)) { if (inode_has_buffers(inode)) {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->private_list; struct list_head *list = &mapping->private_list;
struct address_space *buffer_mapping = mapping->assoc_mapping; struct address_space *buffer_mapping = mapping->assoc_mapping;
...@@ -869,6 +869,35 @@ void invalidate_inode_buffers(struct inode *inode) ...@@ -869,6 +869,35 @@ void invalidate_inode_buffers(struct inode *inode)
} }
} }
/*
* Remove any clean buffers from the inode's buffer list. This is called
* when we're trying to free the inode itself. Those buffers can pin it.
*
* Returns true if all buffers were removed.
*/
int remove_inode_buffers(struct inode *inode)
{
int ret = 1;
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->private_list;
struct address_space *buffer_mapping = mapping->assoc_mapping;
spin_lock(&buffer_mapping->private_lock);
while (!list_empty(list)) {
struct buffer_head *bh = BH_ENTRY(list->next);
if (buffer_dirty(bh)) {
ret = 0;
break;
}
__remove_assoc_queue(bh);
}
spin_unlock(&buffer_mapping->private_lock);
}
return ret;
}
/* /*
* Create the appropriate buffers when given a page for data area and * Create the appropriate buffers when given a page for data area and
* the size of each buffer.. Use the bh->b_this_page linked list to * the size of each buffer.. Use the bh->b_this_page linked list to
......
...@@ -371,6 +371,8 @@ static int can_unuse(struct inode *inode) ...@@ -371,6 +371,8 @@ static int can_unuse(struct inode *inode)
return 0; return 0;
if (atomic_read(&inode->i_count)) if (atomic_read(&inode->i_count))
return 0; return 0;
if (inode->i_data.nrpages)
return 0;
return 1; return 1;
} }
...@@ -383,6 +385,9 @@ static int can_unuse(struct inode *inode) ...@@ -383,6 +385,9 @@ static int can_unuse(struct inode *inode)
* the front of the inode_unused list. So look for it there and if the * the front of the inode_unused list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the * inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way. * time in testing on a 4-way.
*
* If the inode has metadata buffers attached to mapping->private_list then
* try to remove them.
*/ */
static void prune_icache(int nr_to_scan) static void prune_icache(int nr_to_scan)
{ {
...@@ -399,13 +404,14 @@ static void prune_icache(int nr_to_scan) ...@@ -399,13 +404,14 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_unused.prev, struct inode, i_list); inode = list_entry(inode_unused.prev, struct inode, i_list);
if (!can_unuse(inode)) { if (inode->i_state || atomic_read(&inode->i_count)) {
list_move(&inode->i_list, &inode_unused); list_move(&inode->i_list, &inode_unused);
continue; continue;
} }
if (inode->i_data.nrpages) { if (inode_has_buffers(inode) || inode->i_data.nrpages) {
__iget(inode); __iget(inode);
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
invalidate_inode_pages(&inode->i_data); invalidate_inode_pages(&inode->i_data);
iput(inode); iput(inode);
spin_lock(&inode_lock); spin_lock(&inode_lock);
...@@ -415,8 +421,6 @@ static void prune_icache(int nr_to_scan) ...@@ -415,8 +421,6 @@ static void prune_icache(int nr_to_scan)
continue; /* wrong inode or list_empty */ continue; /* wrong inode or list_empty */
if (!can_unuse(inode)) if (!can_unuse(inode))
continue; continue;
if (inode->i_data.nrpages)
continue;
} }
list_del_init(&inode->i_hash); list_del_init(&inode->i_hash);
list_move(&inode->i_list, &freeable); list_move(&inode->i_list, &freeable);
......
...@@ -141,6 +141,7 @@ void buffer_insert_list(spinlock_t *lock, ...@@ -141,6 +141,7 @@ void buffer_insert_list(spinlock_t *lock,
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
int inode_has_buffers(struct inode *); int inode_has_buffers(struct inode *);
void invalidate_inode_buffers(struct inode *); void invalidate_inode_buffers(struct inode *);
int remove_inode_buffers(struct inode *inode);
int fsync_buffers_list(spinlock_t *lock, struct list_head *); int fsync_buffers_list(spinlock_t *lock, struct list_head *);
int sync_mapping_buffers(struct address_space *mapping); int sync_mapping_buffers(struct address_space *mapping);
void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void unmap_underlying_metadata(struct block_device *bdev, sector_t block);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment