Commit 3b0db538 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Move reclaimable pages to the tail ofthe inactive list on

The patch addresses some search complexity failures which occur when
there is a large amount of dirty data on the inactive list.

Normally we attempt to write out those pages and then move them to the
head of the inactive list.  But this goes against page aging, and means
that the page has to traverse the entire list again before it can be
reclaimed.

But the VM really wants to reclaim that page - it has reached the tail
of the LRU.

So what we do in this patch is to mark the page as needing reclamation,
and then start I/O.  In the IO completion handler we check to see if
the page is still probably reclaimable and if so, move it to the tail of
the inactive list, where it can be reclaimed immediately.

Under really heavy swap-intensive loads this increases the page reclaim
efficiency (pages reclaimed/pages scanned) from 10% to 25%.  Which is
OK for that sort of load.  Not great, but OK.

This code path takes the LRU lock once per page.  I didn't bother
playing games with batching up the locking work - it's a rare code
path, and the machine has plenty of CPU to spare when this is
happening.
parent 3139a3ec
...@@ -72,6 +72,7 @@ ...@@ -72,6 +72,7 @@
#define PG_direct 16 /* ->pte_chain points directly at pte */ #define PG_direct 16 /* ->pte_chain points directly at pte */
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be recalimed asap */
/* /*
* Global page accounting. One instance per CPU. Only unsigned longs are * Global page accounting. One instance per CPU. Only unsigned longs are
...@@ -239,6 +240,11 @@ extern void get_full_page_state(struct page_state *ret); ...@@ -239,6 +240,11 @@ extern void get_full_page_state(struct page_state *ret);
#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
#define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags)
#define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags)
#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags)
#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
/* /*
* The PageSwapCache predicate doesn't use a PG_flag at this time, * The PageSwapCache predicate doesn't use a PG_flag at this time,
* but it may again do so one day. * but it may again do so one day.
......
...@@ -150,6 +150,7 @@ extern void FASTCALL(lru_cache_add(struct page *)); ...@@ -150,6 +150,7 @@ extern void FASTCALL(lru_cache_add(struct page *));
extern void FASTCALL(lru_cache_add_active(struct page *)); extern void FASTCALL(lru_cache_add_active(struct page *));
extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(activate_page(struct page *));
extern void lru_add_drain(void); extern void lru_add_drain(void);
extern int rotate_reclaimable_page(struct page *page);
extern void swap_setup(void); extern void swap_setup(void);
/* linux/mm/vmscan.c */ /* linux/mm/vmscan.c */
......
...@@ -323,10 +323,13 @@ void unlock_page(struct page *page) ...@@ -323,10 +323,13 @@ void unlock_page(struct page *page)
void end_page_writeback(struct page *page) void end_page_writeback(struct page *page)
{ {
wait_queue_head_t *waitqueue = page_waitqueue(page); wait_queue_head_t *waitqueue = page_waitqueue(page);
if (!TestClearPageReclaim(page) || rotate_reclaimable_page(page)) {
smp_mb__before_clear_bit(); smp_mb__before_clear_bit();
if (!TestClearPageWriteback(page)) if (!TestClearPageWriteback(page))
BUG(); BUG();
smp_mb__after_clear_bit(); smp_mb__after_clear_bit();
}
if (waitqueue_active(waitqueue)) if (waitqueue_active(waitqueue))
wake_up_all(waitqueue); wake_up_all(waitqueue);
} }
......
...@@ -157,6 +157,7 @@ static inline void free_pages_check(const char *function, struct page *page) ...@@ -157,6 +157,7 @@ static inline void free_pages_check(const char *function, struct page *page)
1 << PG_private | 1 << PG_private |
1 << PG_locked | 1 << PG_locked |
1 << PG_active | 1 << PG_active |
1 << PG_reclaim |
1 << PG_writeback ))) 1 << PG_writeback )))
bad_page(function, page); bad_page(function, page);
if (PageDirty(page)) if (PageDirty(page))
...@@ -260,6 +261,7 @@ static void prep_new_page(struct page *page, int order) ...@@ -260,6 +261,7 @@ static void prep_new_page(struct page *page, int order)
1 << PG_lru | 1 << PG_lru |
1 << PG_active | 1 << PG_active |
1 << PG_dirty | 1 << PG_dirty |
1 << PG_reclaim |
1 << PG_writeback ))) 1 << PG_writeback )))
bad_page(__FUNCTION__, page); bad_page(__FUNCTION__, page);
......
...@@ -27,6 +27,47 @@ ...@@ -27,6 +27,47 @@
/* How many pages do we try to swap or page in/out together? */ /* How many pages do we try to swap or page in/out together? */
int page_cluster; int page_cluster;
/*
* Writeback is about to end against a page whic has been marked for immediate
* reclaim. If it still appears to be reclaimable, move it to the tail of the
* inactive list. The page still has PageWriteback set, which will pin it.
*
* We don't expect many pages to come through here, so don't bother batching
* things up.
*
* To avoid placing the page at the tail of the LRU while PG_writeback is still
* set, this function will clear PG_writeback before performing the page
* motion. Do that inside the lru lock because once PG_writeback is cleared
* we may not touch the page.
*
* Returns zero if it cleared PG_writeback.
*/
int rotate_reclaimable_page(struct page *page)
{
struct zone *zone;
unsigned long flags;
if (PageLocked(page))
return 1;
if (PageDirty(page))
return 1;
if (PageActive(page))
return 1;
if (!PageLRU(page))
return 1;
zone = page_zone(page);
spin_lock_irqsave(&zone->lru_lock, flags);
if (PageLRU(page) && !PageActive(page)) {
list_del(&page->lru);
list_add_tail(&page->lru, &zone->inactive_list);
}
if (!TestClearPageWriteback(page))
BUG();
spin_unlock_irqrestore(&zone->lru_lock, flags);
return 0;
}
/* /*
* FIXME: speed this up? * FIXME: speed this up?
*/ */
......
...@@ -316,12 +316,25 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask, ...@@ -316,12 +316,25 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask,
bdi_write_congested(bdi)) bdi_write_congested(bdi))
goto keep_locked; goto keep_locked;
if (test_clear_page_dirty(page)) { if (test_clear_page_dirty(page)) {
int res;
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
list_move(&page->list, &mapping->locked_pages); list_move(&page->list, &mapping->locked_pages);
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
if (mapping->a_ops->writepage(page) == -EAGAIN) SetPageReclaim(page);
res = mapping->a_ops->writepage(page);
if (res == -EAGAIN) {
ClearPageReclaim(page);
__set_page_dirty_nobuffers(page); __set_page_dirty_nobuffers(page);
} else if (!PageWriteback(page)) {
/*
* synchronous writeout or broken
* a_ops?
*/
ClearPageReclaim(page);
}
goto keep; goto keep;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment