Commit db7b0c9f authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] per-node kswapd instances

Patch from David Hansen.

Start one kswapd instance for each NUMA node.  That kswapd instance
only works against the pages which are local to that node.

We need to bind that kswapd to that node's CPU set, but the
infrastructure for this is not yet in place.
parent 1f769291
......@@ -168,6 +168,7 @@ typedef struct pglist_data {
unsigned long node_size;
int node_id;
struct pglist_data *pgdat_next;
wait_queue_head_t kswapd_wait;
} pg_data_t;
extern int numnodes;
......
......@@ -162,7 +162,6 @@ extern void FASTCALL(activate_page(struct page *));
extern void swap_setup(void);
/* linux/mm/vmscan.c */
extern wait_queue_head_t kswapd_wait;
extern int try_to_free_pages(struct zone *, unsigned int, unsigned int);
/* linux/mm/page_io.c */
......
......@@ -348,8 +348,12 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
classzone->need_balance = 1;
mb();
/* we're somewhat low on memory, failed to find what we needed */
if (waitqueue_active(&kswapd_wait))
wake_up_interruptible(&kswapd_wait);
for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i];
if (z->free_pages <= z->pages_low &&
waitqueue_active(&z->zone_pgdat->kswapd_wait))
wake_up_interruptible(&z->zone_pgdat->kswapd_wait);
}
/* Go through the zonelist again, taking __GFP_HIGH into account */
min = 1UL << order;
......@@ -836,6 +840,8 @@ void __init free_area_init_core(pg_data_t *pgdat,
unsigned long zone_start_pfn = pgdat->node_start_pfn;
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait);
local_offset = 0; /* offset within lmem_map */
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
......
......@@ -667,8 +667,6 @@ try_to_free_pages(struct zone *classzone,
return 0;
}
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
static int check_classzone_need_balance(struct zone *classzone)
{
struct zone *first_classzone;
......@@ -707,20 +705,6 @@ static int kswapd_balance_pgdat(pg_data_t * pgdat)
return need_more_balance;
}
static void kswapd_balance(void)
{
int need_more_balance;
pg_data_t * pgdat;
do {
need_more_balance = 0;
pgdat = pgdat_list;
do
need_more_balance |= kswapd_balance_pgdat(pgdat);
while ((pgdat = pgdat->pgdat_next));
} while (need_more_balance);
}
static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
{
struct zone *zone;
......@@ -728,28 +712,13 @@ static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
for (i = pgdat->nr_zones-1; i >= 0; i--) {
zone = pgdat->node_zones + i;
if (!zone->need_balance)
continue;
if (zone->need_balance)
return 0;
}
return 1;
}
static int kswapd_can_sleep(void)
{
pg_data_t * pgdat;
pgdat = pgdat_list;
do {
if (kswapd_can_sleep_pgdat(pgdat))
continue;
return 0;
} while ((pgdat = pgdat->pgdat_next));
return 1;
}
/*
* The background pageout daemon, started as a kernel thread
* from the init process.
......@@ -763,13 +732,14 @@ static int kswapd_can_sleep(void)
* If there are applications that are active memory-allocators
* (most normal use), this basically shouldn't matter.
*/
int kswapd(void *unused)
int kswapd(void *p)
{
pg_data_t *pgdat = (pg_data_t*)p;
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
daemonize();
strcpy(tsk->comm, "kswapd");
sprintf(tsk->comm, "kswapd%d", pgdat->node_id);
sigfillset(&tsk->blocked);
/*
......@@ -793,30 +763,32 @@ int kswapd(void *unused)
if (current->flags & PF_FREEZE)
refrigerator(PF_IOTHREAD);
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&kswapd_wait, &wait);
add_wait_queue(&pgdat->kswapd_wait, &wait);
mb();
if (kswapd_can_sleep())
if (kswapd_can_sleep_pgdat(pgdat))
schedule();
__set_current_state(TASK_RUNNING);
remove_wait_queue(&kswapd_wait, &wait);
remove_wait_queue(&pgdat->kswapd_wait, &wait);
/*
* If we actually get into a low-memory situation,
* the processes needing more memory will wake us
* up on a more timely basis.
*/
kswapd_balance();
kswapd_balance_pgdat(pgdat);
blk_run_queues();
}
}
static int __init kswapd_init(void)
{
pg_data_t *pgdat;
printk("Starting kswapd\n");
swap_setup();
kernel_thread(kswapd, NULL, CLONE_KERNEL);
for_each_pgdat(pgdat)
kernel_thread(kswapd, pgdat, CLONE_KERNEL);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment