Commit 54a6eb5c authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm: use two zonelist that are filtered by GFP mask

Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations.  Based on the zones
allowed by a gfp mask, one of these zonelists is selected.  All of these
zonelists consume memory and occupy cache lines.

This patch replaces the multiple zonelists per-node with two zonelists.  The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages.  The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.

An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Acked-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 18ea7e71
...@@ -603,15 +603,18 @@ void show_mem(void) ...@@ -603,15 +603,18 @@ void show_mem(void)
#ifdef CONFIG_DISCONTIGMEM #ifdef CONFIG_DISCONTIGMEM
{ {
struct zonelist *zl; struct zonelist *zl;
int i, j, k; int i, j;
for (i = 0; i < npmem_ranges; i++) { for (i = 0; i < npmem_ranges; i++) {
zl = node_zonelist(i);
for (j = 0; j < MAX_NR_ZONES; j++) { for (j = 0; j < MAX_NR_ZONES; j++) {
zl = NODE_DATA(i)->node_zonelists + j; struct zone **z;
struct zone *zone;
printk("Zone list for zone %d on node %d: ", j, i); printk("Zone list for zone %d on node %d: ", j, i);
for (k = 0; zl->zones[k] != NULL; k++) for_each_zone_zonelist(zone, z, zl, j)
printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name); printk("[%d/%s] ", zone_to_nid(zone),
zone->name);
printk("\n"); printk("\n");
} }
} }
......
...@@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev) ...@@ -360,16 +360,18 @@ void invalidate_bdev(struct block_device *bdev)
*/ */
static void free_more_memory(void) static void free_more_memory(void)
{ {
struct zonelist *zonelist; struct zone **zones;
int nid; int nid;
wakeup_pdflush(1024); wakeup_pdflush(1024);
yield(); yield();
for_each_online_node(nid) { for_each_online_node(nid) {
zonelist = node_zonelist(nid, GFP_NOFS); zones = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
if (zonelist->zones[0]) gfp_zone(GFP_NOFS));
try_to_free_pages(zonelist, 0, GFP_NOFS); if (*zones)
try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
GFP_NOFS);
} }
} }
......
...@@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags) ...@@ -151,17 +151,26 @@ static inline enum zone_type gfp_zone(gfp_t flags)
* virtual kernel addresses to the allocated page(s). * virtual kernel addresses to the allocated page(s).
*/ */
static inline int gfp_zonelist(gfp_t flags)
{
if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
return 1;
return 0;
}
/* /*
* We get the zone list from the current node and the gfp_mask. * We get the zone list from the current node and the gfp_mask.
* This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
* There are many zonelists per node, two for each active zone. * There are two zonelists per node, one for all zones with memory and
* one containing just zones from the node the zonelist belongs to.
* *
* For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
* optimized to &contig_page_data at compile-time. * optimized to &contig_page_data at compile-time.
*/ */
static inline struct zonelist *node_zonelist(int nid, gfp_t flags) static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{ {
return NODE_DATA(nid)->node_zonelists + gfp_zone(flags); return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
} }
#ifndef HAVE_ARCH_FREE_PAGE #ifndef HAVE_ARCH_FREE_PAGE
......
...@@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone) ...@@ -393,10 +393,10 @@ static inline int zone_is_oom_locked(const struct zone *zone)
* The NUMA zonelists are doubled becausse we need zonelists that restrict the * The NUMA zonelists are doubled becausse we need zonelists that restrict the
* allocations to a single node for GFP_THISNODE. * allocations to a single node for GFP_THISNODE.
* *
* [0 .. MAX_NR_ZONES -1] : Zonelists with fallback * [0] : Zonelist with fallback
* [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE) * [1] : No fallback (GFP_THISNODE)
*/ */
#define MAX_ZONELISTS (2 * MAX_NR_ZONES) #define MAX_ZONELISTS 2
/* /*
...@@ -464,7 +464,7 @@ struct zonelist_cache { ...@@ -464,7 +464,7 @@ struct zonelist_cache {
unsigned long last_full_zap; /* when last zap'd (jiffies) */ unsigned long last_full_zap; /* when last zap'd (jiffies) */
}; };
#else #else
#define MAX_ZONELISTS MAX_NR_ZONES #define MAX_ZONELISTS 1
struct zonelist_cache; struct zonelist_cache;
#endif #endif
...@@ -486,24 +486,6 @@ struct zonelist { ...@@ -486,24 +486,6 @@ struct zonelist {
#endif #endif
}; };
#ifdef CONFIG_NUMA
/*
* Only custom zonelists like MPOL_BIND need to be filtered as part of
* policies. As described in the comment for struct zonelist_cache, these
* zonelists will not have a zlcache so zlcache_ptr will not be set. Use
* that to determine if the zonelists needs to be filtered or not.
*/
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return !zonelist->zlcache_ptr;
}
#else
static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
{
return 0;
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region { struct node_active_region {
unsigned long start_pfn; unsigned long start_pfn;
...@@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone); ...@@ -731,6 +713,45 @@ extern struct zone *next_zone(struct zone *zone);
zone; \ zone; \
zone = next_zone(zone)) zone = next_zone(zone))
/* Returns the first zone at or below highest_zoneidx in a zonelist */
static inline struct zone **first_zones_zonelist(struct zonelist *zonelist,
enum zone_type highest_zoneidx)
{
struct zone **z;
/* Find the first suitable zone to use for the allocation */
z = zonelist->zones;
while (*z && zone_idx(*z) > highest_zoneidx)
z++;
return z;
}
/* Returns the next zone at or below highest_zoneidx in a zonelist */
static inline struct zone **next_zones_zonelist(struct zone **z,
enum zone_type highest_zoneidx)
{
/* Find the next suitable zone to use for the allocation */
while (*z && zone_idx(*z) > highest_zoneidx)
z++;
return z;
}
/**
* for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
* @zone - The current zone in the iterator
* @z - The current pointer within zonelist->zones being iterated
* @zlist - The zonelist being iterated
* @highidx - The zone index of the highest zone to return
*
* This iterator iterates though all zones at or below a given zone index.
*/
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
for (z = first_zones_zonelist(zlist, highidx), zone = *z++; \
zone; \
z = next_zones_zonelist(z, highidx), zone = *z++)
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
#include <asm/sparsemem.h> #include <asm/sparsemem.h>
#endif #endif
......
...@@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, ...@@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
struct mempolicy *mpol; struct mempolicy *mpol;
struct zonelist *zonelist = huge_zonelist(vma, address, struct zonelist *zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol); htlb_alloc_mask, &mpol);
struct zone **z; struct zone *zone, **z;
for (z = zonelist->zones; *z; z++) { for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
nid = zone_to_nid(*z); nid = zone_to_nid(zone);
if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
!list_empty(&hugepage_freelists[nid])) { !list_empty(&hugepage_freelists[nid])) {
page = list_entry(hugepage_freelists[nid].next, page = list_entry(hugepage_freelists[nid].next,
struct page, lru); struct page, lru);
......
...@@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, ...@@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
struct zone *zone;
struct zone **z; struct zone **z;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
nodemask_t nodes = node_states[N_HIGH_MEMORY]; nodemask_t nodes = node_states[N_HIGH_MEMORY];
for (z = zonelist->zones; *z; z++) for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
if (cpuset_zone_allowed_softwall(*z, gfp_mask)) if (cpuset_zone_allowed_softwall(zone, gfp_mask))
node_clear(zone_to_nid(*z), nodes); node_clear(zone_to_nid(zone), nodes);
else else
return CONSTRAINT_CPUSET; return CONSTRAINT_CPUSET;
......
...@@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) ...@@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
*/ */
static struct page * static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order, get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, int alloc_flags) struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
{ {
struct zone **z; struct zone **z;
struct page *page = NULL; struct page *page = NULL;
int classzone_idx = zone_idx(zonelist->zones[0]); int classzone_idx;
struct zone *zone, *preferred_zone; struct zone *zone, *preferred_zone;
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
int zlc_active = 0; /* set if using zonelist_cache */ int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */ int did_zlc_setup = 0; /* just call zlc_setup() one time */
enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
z = first_zones_zonelist(zonelist, high_zoneidx);
classzone_idx = zone_idx(*z);
preferred_zone = *z;
zonelist_scan: zonelist_scan:
/* /*
* Scan zonelist, looking for a zone with enough free. * Scan zonelist, looking for a zone with enough free.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c. * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/ */
z = zonelist->zones; for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
preferred_zone = *z;
do {
/*
* In NUMA, this could be a policy zonelist which contains
* zones that may not be allowed by the current gfp_mask.
* Check the zone is allowed by the current flags
*/
if (unlikely(alloc_should_filter_zonelist(zonelist))) {
if (highest_zoneidx == -1)
highest_zoneidx = gfp_zone(gfp_mask);
if (zone_idx(*z) > highest_zoneidx)
continue;
}
if (NUMA_BUILD && zlc_active && if (NUMA_BUILD && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes)) !zlc_zone_worth_trying(zonelist, z, allowednodes))
continue; continue;
zone = *z;
if ((alloc_flags & ALLOC_CPUSET) && if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask)) !cpuset_zone_allowed_softwall(zone, gfp_mask))
goto try_next_zone; goto try_next_zone;
...@@ -1447,7 +1434,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, ...@@ -1447,7 +1434,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
zlc_active = 1; zlc_active = 1;
did_zlc_setup = 1; did_zlc_setup = 1;
} }
} while (*(++z) != NULL); }
if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
/* Disable zlc cache for second zonelist scan */ /* Disable zlc cache for second zonelist scan */
...@@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist) struct zonelist *zonelist)
{ {
const gfp_t wait = gfp_mask & __GFP_WAIT; const gfp_t wait = gfp_mask & __GFP_WAIT;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
struct zone **z; struct zone **z;
struct page *page; struct page *page;
struct reclaim_state reclaim_state; struct reclaim_state reclaim_state;
...@@ -1490,7 +1478,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -1490,7 +1478,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
} }
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
if (page) if (page)
goto got_pg; goto got_pg;
...@@ -1534,7 +1522,8 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -1534,7 +1522,8 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c. * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/ */
page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); page = get_page_from_freelist(gfp_mask, order, zonelist,
high_zoneidx, alloc_flags);
if (page) if (page)
goto got_pg; goto got_pg;
...@@ -1547,7 +1536,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -1547,7 +1536,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
nofail_alloc: nofail_alloc:
/* go through the zonelist yet again, ignoring mins */ /* go through the zonelist yet again, ignoring mins */
page = get_page_from_freelist(gfp_mask, order, page = get_page_from_freelist(gfp_mask, order,
zonelist, ALLOC_NO_WATERMARKS); zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
if (page) if (page)
goto got_pg; goto got_pg;
if (gfp_mask & __GFP_NOFAIL) { if (gfp_mask & __GFP_NOFAIL) {
...@@ -1582,7 +1571,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -1582,7 +1571,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
if (likely(did_some_progress)) { if (likely(did_some_progress)) {
page = get_page_from_freelist(gfp_mask, order, page = get_page_from_freelist(gfp_mask, order,
zonelist, alloc_flags); zonelist, high_zoneidx, alloc_flags);
if (page) if (page)
goto got_pg; goto got_pg;
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
...@@ -1598,7 +1587,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -1598,7 +1587,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
* under heavy pressure. * under heavy pressure.
*/ */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
if (page) { if (page) {
clear_zonelist_oom(zonelist); clear_zonelist_oom(zonelist);
goto got_pg; goto got_pg;
...@@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages); ...@@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages);
static unsigned int nr_free_zone_pages(int offset) static unsigned int nr_free_zone_pages(int offset)
{ {
struct zone **z;
struct zone *zone;
/* Just pick one node, since fallback list is circular */ /* Just pick one node, since fallback list is circular */
unsigned int sum = 0; unsigned int sum = 0;
struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
struct zone **zonep = zonelist->zones;
struct zone *zone;
for (zone = *zonep++; zone; zone = *zonep++) { for_each_zone_zonelist(zone, z, zonelist, offset) {
unsigned long size = zone->present_pages; unsigned long size = zone->present_pages;
unsigned long high = zone->pages_high; unsigned long high = zone->pages_high;
if (size > high) if (size > high)
...@@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) ...@@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
*/ */
static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
{ {
enum zone_type i;
int j; int j;
struct zonelist *zonelist; struct zonelist *zonelist;
for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = &pgdat->node_zonelists[0];
zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++)
for (j = 0; zonelist->zones[j] != NULL; j++) ;
; j = build_zonelists_node(NODE_DATA(node), zonelist, j,
j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); MAX_NR_ZONES - 1);
zonelist->zones[j] = NULL; zonelist->zones[j] = NULL;
}
} }
/* /*
...@@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) ...@@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
*/ */
static void build_thisnode_zonelists(pg_data_t *pgdat) static void build_thisnode_zonelists(pg_data_t *pgdat)
{ {
enum zone_type i;
int j; int j;
struct zonelist *zonelist; struct zonelist *zonelist;
for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = &pgdat->node_zonelists[1];
zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i; j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
j = build_zonelists_node(pgdat, zonelist, 0, i); zonelist->zones[j] = NULL;
zonelist->zones[j] = NULL;
}
} }
/* /*
...@@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES]; ...@@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES];
static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
{ {
enum zone_type i;
int pos, j, node; int pos, j, node;
int zone_type; /* needs to be signed */ int zone_type; /* needs to be signed */
struct zone *z; struct zone *z;
struct zonelist *zonelist; struct zonelist *zonelist;
for (i = 0; i < MAX_NR_ZONES; i++) { zonelist = &pgdat->node_zonelists[0];
zonelist = pgdat->node_zonelists + i; pos = 0;
pos = 0; for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
for (zone_type = i; zone_type >= 0; zone_type--) { for (j = 0; j < nr_nodes; j++) {
for (j = 0; j < nr_nodes; j++) { node = node_order[j];
node = node_order[j]; z = &NODE_DATA(node)->node_zones[zone_type];
z = &NODE_DATA(node)->node_zones[zone_type]; if (populated_zone(z)) {
if (populated_zone(z)) { zonelist->zones[pos++] = z;
zonelist->zones[pos++] = z; check_highest_zone(zone_type);
check_highest_zone(zone_type);
}
} }
} }
zonelist->zones[pos] = NULL;
} }
zonelist->zones[pos] = NULL;
} }
static int default_zonelist_order(void) static int default_zonelist_order(void)
...@@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat) ...@@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat)
/* Construct the zonelist performance cache - see further mmzone.h */ /* Construct the zonelist performance cache - see further mmzone.h */
static void build_zonelist_cache(pg_data_t *pgdat) static void build_zonelist_cache(pg_data_t *pgdat)
{ {
int i; struct zonelist *zonelist;
struct zonelist_cache *zlc;
for (i = 0; i < MAX_NR_ZONES; i++) { struct zone **z;
struct zonelist *zonelist;
struct zonelist_cache *zlc;
struct zone **z;
zonelist = pgdat->node_zonelists + i; zonelist = &pgdat->node_zonelists[0];
zonelist->zlcache_ptr = zlc = &zonelist->zlcache; zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
for (z = zonelist->zones; *z; z++) for (z = zonelist->zones; *z; z++)
zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
}
} }
...@@ -2290,45 +2268,43 @@ static void set_zonelist_order(void) ...@@ -2290,45 +2268,43 @@ static void set_zonelist_order(void)
static void build_zonelists(pg_data_t *pgdat) static void build_zonelists(pg_data_t *pgdat)
{ {
int node, local_node; int node, local_node;
enum zone_type i,j; enum zone_type j;
struct zonelist *zonelist;
local_node = pgdat->node_id; local_node = pgdat->node_id;
for (i = 0; i < MAX_NR_ZONES; i++) {
struct zonelist *zonelist;
zonelist = pgdat->node_zonelists + i; zonelist = &pgdat->node_zonelists[0];
j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
j = build_zonelists_node(pgdat, zonelist, 0, i); /*
/* * Now we build the zonelist so that it contains the zones
* Now we build the zonelist so that it contains the zones * of all the other nodes.
* of all the other nodes. * We don't want to pressure a particular node, so when
* We don't want to pressure a particular node, so when * building the zones for node N, we make sure that the
* building the zones for node N, we make sure that the * zones coming right after the local ones are those from
* zones coming right after the local ones are those from * node N+1 (modulo N)
* node N+1 (modulo N) */
*/ for (node = local_node + 1; node < MAX_NUMNODES; node++) {
for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node))
if (!node_online(node)) continue;
continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j,
j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); MAX_NR_ZONES - 1);
}
for (node = 0; node < local_node; node++) {
if (!node_online(node))
continue;
j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
}
zonelist->zones[j] = NULL;
} }
for (node = 0; node < local_node; node++) {
if (!node_online(node))
continue;
j = build_zonelists_node(NODE_DATA(node), zonelist, j,
MAX_NR_ZONES - 1);
}
zonelist->zones[j] = NULL;
} }
/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
static void build_zonelist_cache(pg_data_t *pgdat) static void build_zonelist_cache(pg_data_t *pgdat)
{ {
int i; pgdat->node_zonelists[0].zlcache_ptr = NULL;
pgdat->node_zonelists[1].zlcache_ptr = NULL;
for (i = 0; i < MAX_NR_ZONES; i++)
pgdat->node_zonelists[i].zlcache_ptr = NULL;
} }
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
......
...@@ -3243,6 +3243,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) ...@@ -3243,6 +3243,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zonelist *zonelist; struct zonelist *zonelist;
gfp_t local_flags; gfp_t local_flags;
struct zone **z; struct zone **z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
void *obj = NULL; void *obj = NULL;
int nid; int nid;
...@@ -3257,10 +3259,10 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) ...@@ -3257,10 +3259,10 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
* Look through allowed nodes for objects available * Look through allowed nodes for objects available
* from existing per node queues. * from existing per node queues.
*/ */
for (z = zonelist->zones; *z && !obj; z++) { for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
nid = zone_to_nid(*z); nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(*z, flags) && if (cpuset_zone_allowed_hardwall(zone, flags) &&
cache->nodelists[nid] && cache->nodelists[nid] &&
cache->nodelists[nid]->free_objects) cache->nodelists[nid]->free_objects)
obj = ____cache_alloc_node(cache, obj = ____cache_alloc_node(cache,
......
...@@ -1285,6 +1285,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) ...@@ -1285,6 +1285,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
struct zonelist *zonelist; struct zonelist *zonelist;
struct zone **z; struct zone **z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
struct page *page; struct page *page;
/* /*
...@@ -1310,12 +1312,12 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) ...@@ -1310,12 +1312,12 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
return NULL; return NULL;
zonelist = node_zonelist(slab_node(current->mempolicy), flags); zonelist = node_zonelist(slab_node(current->mempolicy), flags);
for (z = zonelist->zones; *z; z++) { for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct kmem_cache_node *n; struct kmem_cache_node *n;
n = get_node(s, zone_to_nid(*z)); n = get_node(s, zone_to_nid(zone));
if (n && cpuset_zone_allowed_hardwall(*z, flags) && if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
n->nr_partial > MIN_PARTIAL) { n->nr_partial > MIN_PARTIAL) {
page = get_partial_node(n); page = get_partial_node(n);
if (page) if (page)
......
...@@ -1249,15 +1249,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone, ...@@ -1249,15 +1249,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
static unsigned long shrink_zones(int priority, struct zonelist *zonelist, static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
struct scan_control *sc) struct scan_control *sc)
{ {
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
unsigned long nr_reclaimed = 0; unsigned long nr_reclaimed = 0;
struct zone **zones = zonelist->zones; struct zone **z;
int i; struct zone *zone;
sc->all_unreclaimable = 1; sc->all_unreclaimable = 1;
for (i = 0; zones[i] != NULL; i++) { for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct zone *zone = zones[i];
if (!populated_zone(zone)) if (!populated_zone(zone))
continue; continue;
/* /*
...@@ -1311,8 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1311,8 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
unsigned long nr_reclaimed = 0; unsigned long nr_reclaimed = 0;
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long lru_pages = 0; unsigned long lru_pages = 0;
struct zone **zones = zonelist->zones; struct zone **z;
int i; struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
if (scan_global_lru(sc)) if (scan_global_lru(sc))
count_vm_event(ALLOCSTALL); count_vm_event(ALLOCSTALL);
...@@ -1320,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1320,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
* mem_cgroup will not do shrink_slab. * mem_cgroup will not do shrink_slab.
*/ */
if (scan_global_lru(sc)) { if (scan_global_lru(sc)) {
for (i = 0; zones[i] != NULL; i++) { for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct zone *zone = zones[i];
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue; continue;
...@@ -1385,8 +1383,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1385,8 +1383,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
priority = 0; priority = 0;
if (scan_global_lru(sc)) { if (scan_global_lru(sc)) {
for (i = 0; zones[i] != NULL; i++) { for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct zone *zone = zones[i];
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment