Commit d4cf1012 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] mm: teach kswapd about higher order areas

Teach kswapd to free memory on behalf of higher order allocators.  This
could be important for higher order atomic allocations because they
otherwise have no means to free the memory themselves.
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 206ca74e
......@@ -264,8 +264,9 @@ typedef struct pglist_data {
range, including holes */
int node_id;
struct pglist_data *pgdat_next;
wait_queue_head_t kswapd_wait;
wait_queue_head_t kswapd_wait;
struct task_struct *kswapd;
int kswapd_max_order;
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
......@@ -279,7 +280,7 @@ void __get_zone_counts(unsigned long *active, unsigned long *inactive,
void get_zone_counts(unsigned long *active, unsigned long *inactive,
unsigned long *free);
void build_all_zonelists(void);
void wakeup_kswapd(struct zone *zone);
void wakeup_kswapd(struct zone *zone, int order);
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int alloc_type, int can_try_harder, int gfp_high);
......
......@@ -677,7 +677,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
}
for (i = 0; (z = zones[i]) != NULL; i++)
wakeup_kswapd(z);
wakeup_kswapd(z, order);
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
......@@ -1516,6 +1516,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait);
pgdat->kswapd_max_order = 0;
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
......
......@@ -968,7 +968,7 @@ int try_to_free_pages(struct zone **zones,
* the page allocator fallback scheme to ensure that aging of pages is balanced
* across the zones.
*/
static int balance_pgdat(pg_data_t *pgdat, int nr_pages)
static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
{
int to_free = nr_pages;
int all_zones_ok;
......@@ -1014,7 +1014,8 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages)
priority != DEF_PRIORITY)
continue;
if (zone->free_pages <= zone->pages_high) {
if (!zone_watermark_ok(zone, order,
zone->pages_high, 0, 0, 0)) {
end_zone = i;
goto scan;
}
......@@ -1049,7 +1050,8 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages)
continue;
if (nr_pages == 0) { /* Not software suspend */
if (zone->free_pages <= zone->pages_high)
if (!zone_watermark_ok(zone, order,
zone->pages_high, end_zone, 0, 0))
all_zones_ok = 0;
}
zone->temp_priority = priority;
......@@ -1127,6 +1129,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages)
*/
static int kswapd(void *p)
{
unsigned long order;
pg_data_t *pgdat = (pg_data_t*)p;
struct task_struct *tsk = current;
DEFINE_WAIT(wait);
......@@ -1155,14 +1158,28 @@ static int kswapd(void *p)
*/
tsk->flags |= PF_MEMALLOC|PF_KSWAPD;
order = 0;
for ( ; ; ) {
unsigned long new_order;
if (current->flags & PF_FREEZE)
refrigerator(PF_FREEZE);
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
schedule();
new_order = pgdat->kswapd_max_order;
pgdat->kswapd_max_order = 0;
if (order < new_order) {
/*
* Don't sleep if someone wants a larger 'order'
* allocation
*/
order = new_order;
} else {
schedule();
order = pgdat->kswapd_max_order;
}
finish_wait(&pgdat->kswapd_wait, &wait);
balance_pgdat(pgdat, 0);
balance_pgdat(pgdat, 0, order);
}
return 0;
}
......@@ -1170,12 +1187,18 @@ static int kswapd(void *p)
/*
* A zone is low on free memory, so wake its kswapd task to service it.
*/
void wakeup_kswapd(struct zone *zone)
void wakeup_kswapd(struct zone *zone, int order)
{
pg_data_t *pgdat;
if (zone->present_pages == 0)
return;
if (zone->free_pages > zone->pages_low)
pgdat = zone->zone_pgdat;
if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
return;
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
return;
wake_up_interruptible(&zone->zone_pgdat->kswapd_wait);
......@@ -1198,7 +1221,7 @@ int shrink_all_memory(int nr_pages)
current->reclaim_state = &reclaim_state;
for_each_pgdat(pgdat) {
int freed;
freed = balance_pgdat(pgdat, nr_to_free);
freed = balance_pgdat(pgdat, nr_to_free, 0);
ret += freed;
nr_to_free -= freed;
if (nr_to_free <= 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment