Commit f9a316fa authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] strip pagecache from to-be-reaped inodes

With large highmem machines and many small cached files it is possible
to encounter ZONE_NORMAL allocation failures.  This can be demonstrated
with a large number of one-byte files on a 7G machine.

All lowmem is filled with icache and all those inodes have a small
amount of highmem pagecache which makes them unfreeable.

The patch strips the pagecache from inodes as they come off the tail of
the inode_unused list.

I play tricks in there peeking at the head of the inode_unused list to
pick up the inode again after running iput().  The alternatives seemed
to involve more widespread changes.

Or running invalidate_inode_pages() under inode_lock which would be a
bad thing from a scheduling latency and lock contention point of view.
parent 1bbb1949
...@@ -363,57 +363,69 @@ int invalidate_device(kdev_t dev, int do_sync) ...@@ -363,57 +363,69 @@ int invalidate_device(kdev_t dev, int do_sync)
return res; return res;
} }
static int can_unuse(struct inode *inode)
{
if (inode->i_state)
return 0;
if (inode_has_buffers(inode))
return 0;
if (atomic_read(&inode->i_count))
return 0;
return 1;
}
/* /*
* This is called with the inode lock held. It searches * Scan `goal' inodes on the unused list for freeable ones. They are moved to
* the in-use for freeable inodes, which are moved to a * a temporary list and then are freed outside inode_lock by dispose_list().
* temporary list and then placed on the unused list by
* dispose_list.
*
* We don't expect to have to call this very often.
* *
* N.B. The spinlock is released during the call to * Any inodes which are pinned purely because of attached pagecache have their
* dispose_list. * pagecache removed. We expect the final iput() on that inode to add it to
* the front of the inode_unused list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way.
*/ */
#define CAN_UNUSE(inode) \ static void prune_icache(int nr_to_scan)
((((inode)->i_state | (inode)->i_data.nrpages) == 0) && \
!inode_has_buffers(inode))
#define INODE(entry) (list_entry(entry, struct inode, i_list))
static inline void prune_icache(int goal)
{ {
LIST_HEAD(list); LIST_HEAD(freeable);
struct list_head *entry, *freeable = &list; int nr_pruned = 0;
int count; int nr_scanned;
struct inode * inode;
spin_lock(&inode_lock); spin_lock(&inode_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
count = 0; if (list_empty(&inode_unused))
entry = inode_unused.prev;
for(; goal; goal--) {
struct list_head *tmp = entry;
if (entry == &inode_unused)
break; break;
entry = entry->prev;
inode = INODE(tmp); inode = list_entry(inode_unused.prev, struct inode, i_list);
if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
continue; if (!can_unuse(inode)) {
if (!CAN_UNUSE(inode)) list_move(&inode->i_list, &inode_unused);
continue;
if (atomic_read(&inode->i_count))
continue; continue;
list_del(tmp); }
if (inode->i_data.nrpages) {
__iget(inode);
spin_unlock(&inode_lock);
invalidate_inode_pages(&inode->i_data);
iput(inode);
spin_lock(&inode_lock);
if (inode != list_entry(inode_unused.next,
struct inode, i_list))
continue; /* wrong inode or list_empty */
if (!can_unuse(inode))
continue;
if (inode->i_data.nrpages)
continue;
}
list_del_init(&inode->i_hash); list_del_init(&inode->i_hash);
list_add(tmp, freeable); list_move(&inode->i_list, &freeable);
inode->i_state |= I_FREEING; inode->i_state |= I_FREEING;
count++; nr_pruned++;
} }
inodes_stat.nr_unused -= count; inodes_stat.nr_unused -= nr_pruned;
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
dispose_list(&freeable);
dispose_list(freeable);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment