Page allocator: get rid of the list of cold pages

We have repeatedly discussed if the cold pages still have a point. There is one way to join the two lists: Use a single list and put the cold pages at the end and the hot pages at the beginning. That way a single list can serve for both types of allocations. The discussion of the RFC for this and Mel's measurements indicate that there may not be too much of a point left to having separate lists for hot and cold pages (see http://marc.info/?t=119492914200001&r=1&w=2). Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Martin Bligh <mbligh@mbligh.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Page allocator: get rid of the list of cold pages
We have repeatedly discussed if the cold pages still have a point. There is one way to join the two lists: Use a single list and put the cold pages at the end and the hot pages at the beginning. That way a single list can serve for both types of allocations. The discussion of the RFC for this and Mel's measurements indicate that there may not be too much of a point left to having separate lists for hot and cold pages (see http://marc.info/?t=119492914200001&r=1&w=2). Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Martin Bligh <mbligh@mbligh.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
3dfa5721 · Christoph Lameter · Linus Torvalds · 5dc33185 · 3dfa5721 · 3dfa5721
Commit 3dfa5721 authored Feb 04, 2008 by Christoph Lameter Committed by Linus Torvalds Feb 05, 2008
Hide whitespace changes
Inline Side-by-side

Showing with 40 additions and 49 deletions

include/linux/mmzone.h include/linux/mmzone.h +1 -1

mm/page_alloc.c mm/page_alloc.c +27 -30

mm/vmstat.c mm/vmstat.c +12 -18

No files found.
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -113,7 +113,7 @@ struct per_cpu_pages {
 };
 struct per_cpu_pageset {
-	struct per_cpu_pages pcp[2];	/* 0: hot.  1: cold */
+	struct per_cpu_pages pcp;
 #ifdef CONFIG_NUMA
 	s8 expire;
 #endif

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -901,24 +901,21 @@ static void drain_pages(unsigned int cpu)
 {
 	unsigned long flags;
 	struct zone *zone;
-	int i;
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
+		struct per_cpu_pages *pcp;
 		if (!populated_zone(zone))
 			continue;
 		pset = zone_pcp(zone, cpu);
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
+		pcp = &pset->pcp;
+		local_irq_save(flags);
-			pcp = &pset->pcp[i];
+		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
-			local_irq_save(flags);
+		pcp->count = 0;
-			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		local_irq_restore(flags);
-			pcp->count = 0;
-			local_irq_restore(flags);
-		}
 	}
 }
@@ -993,10 +990,13 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
-	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	pcp = &zone_pcp(zone, get_cpu())->pcp;
 	local_irq_save(flags);
 	__count_vm_event(PGFREE);
-	list_add(&page->lru, &pcp->list);
+	if (cold)
+		list_add_tail(&page->lru, &pcp->list);
+	else
+		list_add(&page->lru, &pcp->list);
 	set_page_private(page, get_pageblock_migratetype(page));
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
@@ -1054,7 +1054,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
-		pcp = &zone_pcp(zone, cpu)->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp;
 		local_irq_save(flags);
 		if (!pcp->count) {
 			pcp->count = rmqueue_bulk(zone, 0,
@@ -1064,9 +1064,15 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
 		}
 		/* Find a page of the appropriate migrate type */
-		list_for_each_entry(page, &pcp->list, lru)
+		if (cold) {
-			if (page_private(page) == migratetype)
+			list_for_each_entry_reverse(page, &pcp->list, lru)
-				break;
+				if (page_private(page) == migratetype)
+					break;
+		} else {
+			list_for_each_entry(page, &pcp->list, lru)
+				if (page_private(page) == migratetype)
+					break;
+		}
 		/* Allocate more to the pcp list if necessary */
 		if (unlikely(&page->lru == &pcp->list)) {
@@ -1793,12 +1799,9 @@ void show_free_areas(void)
 			pageset = zone_pcp(zone, cpu);
-			printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d   "
+			printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
-			       "Cold: hi:%5d, btch:%4d usd:%4d\n",
+			       cpu, pageset->pcp.high,
-			       cpu, pageset->pcp[0].high,
+			       pageset->pcp.batch, pageset->pcp.count);
-			       pageset->pcp[0].batch, pageset->pcp[0].count,
-			       pageset->pcp[1].high, pageset->pcp[1].batch,
-			       pageset->pcp[1].count);
 		}
 	}
@@ -2596,17 +2599,11 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 	memset(p, 0, sizeof(*p));
-	pcp = &p->pcp[0];		/* hot */
+	pcp = &p->pcp;
 	pcp->count = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&pcp->list);
-	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
-	pcp->high = 2 * batch;
-	pcp->batch = max(1UL, batch/2);
-	INIT_LIST_HEAD(&pcp->list);
 }
 /*
@@ -2619,7 +2616,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
 {
 	struct per_cpu_pages *pcp;
-	pcp = &p->pcp[0]; /* hot list */
+	pcp = &p->pcp;
 	pcp->high = high;
 	pcp->batch = max(1UL, high/4);
 	if ((high/4) > (PAGE_SHIFT * 8))

--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -337,7 +337,7 @@ void refresh_cpu_vm_stats(int cpu)
 		 * Check if there are pages remaining in this pageset
 		 * if not then there is nothing to expire.
 		 */
-		if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count))
+		if (!p->expire || !p->pcp.count)
 			continue;
 		/*
@@ -352,11 +352,8 @@ void refresh_cpu_vm_stats(int cpu)
 		if (p->expire)
 			continue;
-		if (p->pcp[0].count)
+		if (p->pcp.count)
-			drain_zone_pages(zone, p->pcp + 0);
+			drain_zone_pages(zone, &p->pcp);
-		if (p->pcp[1].count)
-			drain_zone_pages(zone, p->pcp + 1);
 #endif
 	}
@@ -693,20 +690,17 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n  pagesets");
 	for_each_online_cpu(i) {
 		struct per_cpu_pageset *pageset;
-		int j;
 		pageset = zone_pcp(zone, i);
-		for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
+		seq_printf(m,
-			seq_printf(m,
+			   "\n    cpu: %i"
-				   "\n    cpu: %i pcp: %i"
+			   "\n              count: %i"
-				   "\n              count: %i"
+			   "\n              high:  %i"
-				   "\n              high:  %i"
+			   "\n              batch: %i",
-				   "\n              batch: %i",
+			   i,
-				   i, j,
+			   pageset->pcp.count,
-				   pageset->pcp[j].count,
+			   pageset->pcp.high,
-				   pageset->pcp[j].high,
+			   pageset->pcp.batch);
-				   pageset->pcp[j].batch);
-			}
 #ifdef CONFIG_SMP
 		seq_printf(m, "\n  vm stats threshold: %d",
 				pageset->stat_threshold);