Commit a5f5f91d authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm: convert zone_reclaim to node_reclaim

As reclaim is now per-node based, convert zone_reclaim to be
node_reclaim.  It is possible that a node will be reclaimed multiple
times if it has multiple zones but this is unavoidable without caching
all nodes traversed so far.  The documentation and interface to
userspace is the same from a configuration perspective and will will be
similar in behaviour unless the node-local allocation requests were also
limited to lower zones.

Link: http://lkml.kernel.org/r/1467970510-21195-24-git-send-email-mgorman@techsingularity.netSigned-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 52e9f87a
...@@ -372,14 +372,6 @@ struct zone { ...@@ -372,14 +372,6 @@ struct zone {
unsigned long *pageblock_flags; unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */ #endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_NUMA
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif /* CONFIG_NUMA */
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn; unsigned long zone_start_pfn;
...@@ -525,7 +517,6 @@ struct zone { ...@@ -525,7 +517,6 @@ struct zone {
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
enum zone_flags { enum zone_flags {
ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
}; };
...@@ -540,6 +531,7 @@ enum pgdat_flags { ...@@ -540,6 +531,7 @@ enum pgdat_flags {
PGDAT_WRITEBACK, /* reclaim scanning has recently found PGDAT_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback * many pages under writeback
*/ */
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
}; };
static inline unsigned long zone_end_pfn(const struct zone *zone) static inline unsigned long zone_end_pfn(const struct zone *zone)
...@@ -688,6 +680,14 @@ typedef struct pglist_data { ...@@ -688,6 +680,14 @@ typedef struct pglist_data {
*/ */
unsigned long totalreserve_pages; unsigned long totalreserve_pages;
#ifdef CONFIG_NUMA
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif /* CONFIG_NUMA */
/* Write-intensive fields used by page reclaim */ /* Write-intensive fields used by page reclaim */
ZONE_PADDING(_pad1_) ZONE_PADDING(_pad1_)
spinlock_t lru_lock; spinlock_t lru_lock;
......
...@@ -326,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page); ...@@ -326,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long vm_total_pages; extern unsigned long vm_total_pages;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
extern int zone_reclaim_mode; extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio; extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio; extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int); extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
#else #else
#define zone_reclaim_mode 0 #define node_reclaim_mode 0
static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
unsigned int order)
{ {
return 0; return 0;
} }
......
...@@ -54,7 +54,7 @@ int arch_update_cpu_topology(void); ...@@ -54,7 +54,7 @@ int arch_update_cpu_topology(void);
/* /*
* If the distance between nodes in a system is larger than RECLAIM_DISTANCE * If the distance between nodes in a system is larger than RECLAIM_DISTANCE
* (in whatever arch specific measurement units returned by node_distance()) * (in whatever arch specific measurement units returned by node_distance())
* and zone_reclaim_mode is enabled then the VM will only call zone_reclaim() * and node_reclaim_mode is enabled then the VM will only call node_reclaim()
* on nodes within this distance. * on nodes within this distance.
*/ */
#define RECLAIM_DISTANCE 30 #define RECLAIM_DISTANCE 30
......
...@@ -1508,8 +1508,8 @@ static struct ctl_table vm_table[] = { ...@@ -1508,8 +1508,8 @@ static struct ctl_table vm_table[] = {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
{ {
.procname = "zone_reclaim_mode", .procname = "zone_reclaim_mode",
.data = &zone_reclaim_mode, .data = &node_reclaim_mode,
.maxlen = sizeof(zone_reclaim_mode), .maxlen = sizeof(node_reclaim_mode),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
.extra1 = &zero, .extra1 = &zero,
......
...@@ -433,10 +433,10 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, ...@@ -433,10 +433,10 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
} }
#endif /* CONFIG_SPARSEMEM */ #endif /* CONFIG_SPARSEMEM */
#define ZONE_RECLAIM_NOSCAN -2 #define NODE_RECLAIM_NOSCAN -2
#define ZONE_RECLAIM_FULL -1 #define NODE_RECLAIM_FULL -1
#define ZONE_RECLAIM_SOME 0 #define NODE_RECLAIM_SOME 0
#define ZONE_RECLAIM_SUCCESS 1 #define NODE_RECLAIM_SUCCESS 1
extern int hwpoison_filter(struct page *p); extern int hwpoison_filter(struct page *p);
......
...@@ -672,10 +672,10 @@ static bool khugepaged_scan_abort(int nid) ...@@ -672,10 +672,10 @@ static bool khugepaged_scan_abort(int nid)
int i; int i;
/* /*
* If zone_reclaim_mode is disabled, then no extra effort is made to * If node_reclaim_mode is disabled, then no extra effort is made to
* allocate memory locally. * allocate memory locally.
*/ */
if (!zone_reclaim_mode) if (!node_reclaim_mode)
return false; return false;
/* If there is a count for this node already, it must be acceptable */ /* If there is a count for this node already, it must be acceptable */
......
...@@ -2942,16 +2942,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, ...@@ -2942,16 +2942,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
if (alloc_flags & ALLOC_NO_WATERMARKS) if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone; goto try_this_zone;
if (zone_reclaim_mode == 0 || if (node_reclaim_mode == 0 ||
!zone_allows_reclaim(ac->preferred_zoneref->zone, zone)) !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
continue; continue;
ret = zone_reclaim(zone, gfp_mask, order); ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
switch (ret) { switch (ret) {
case ZONE_RECLAIM_NOSCAN: case NODE_RECLAIM_NOSCAN:
/* did not scan */ /* did not scan */
continue; continue;
case ZONE_RECLAIM_FULL: case NODE_RECLAIM_FULL:
/* scanned but unreclaimable */ /* scanned but unreclaimable */
continue; continue;
default: default:
...@@ -5948,9 +5948,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) ...@@ -5948,9 +5948,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
zone->node = nid; zone->node = nid;
zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio) pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
/ 100; / 100;
zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100; pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
#endif #endif
zone->name = zone_names[j]; zone->name = zone_names[j];
zone->zone_pgdat = pgdat; zone->zone_pgdat = pgdat;
...@@ -6922,6 +6922,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, ...@@ -6922,6 +6922,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos) void __user *buffer, size_t *length, loff_t *ppos)
{ {
struct pglist_data *pgdat;
struct zone *zone; struct zone *zone;
int rc; int rc;
...@@ -6929,8 +6930,11 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, ...@@ -6929,8 +6930,11 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
if (rc) if (rc)
return rc; return rc;
for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;
for_each_zone(zone) for_each_zone(zone)
zone->min_unmapped_pages = (zone->managed_pages * zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
sysctl_min_unmapped_ratio) / 100; sysctl_min_unmapped_ratio) / 100;
return 0; return 0;
} }
...@@ -6938,6 +6942,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, ...@@ -6938,6 +6942,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos) void __user *buffer, size_t *length, loff_t *ppos)
{ {
struct pglist_data *pgdat;
struct zone *zone; struct zone *zone;
int rc; int rc;
...@@ -6945,8 +6950,11 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, ...@@ -6945,8 +6950,11 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
if (rc) if (rc)
return rc; return rc;
for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;
for_each_zone(zone) for_each_zone(zone)
zone->min_slab_pages = (zone->managed_pages * zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
sysctl_min_slab_ratio) / 100; sysctl_min_slab_ratio) / 100;
return 0; return 0;
} }
......
...@@ -3565,12 +3565,12 @@ module_init(kswapd_init) ...@@ -3565,12 +3565,12 @@ module_init(kswapd_init)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
/* /*
* Zone reclaim mode * Node reclaim mode
* *
* If non-zero call zone_reclaim when the number of free pages falls below * If non-zero call node_reclaim when the number of free pages falls below
* the watermarks. * the watermarks.
*/ */
int zone_reclaim_mode __read_mostly; int node_reclaim_mode __read_mostly;
#define RECLAIM_OFF 0 #define RECLAIM_OFF 0
#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ #define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
...@@ -3578,14 +3578,14 @@ int zone_reclaim_mode __read_mostly; ...@@ -3578,14 +3578,14 @@ int zone_reclaim_mode __read_mostly;
#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */ #define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
/* /*
* Priority for ZONE_RECLAIM. This determines the fraction of pages * Priority for NODE_RECLAIM. This determines the fraction of pages
* of a node considered for each zone_reclaim. 4 scans 1/16th of * of a node considered for each zone_reclaim. 4 scans 1/16th of
* a zone. * a zone.
*/ */
#define ZONE_RECLAIM_PRIORITY 4 #define NODE_RECLAIM_PRIORITY 4
/* /*
* Percentage of pages in a zone that must be unmapped for zone_reclaim to * Percentage of pages in a zone that must be unmapped for node_reclaim to
* occur. * occur.
*/ */
int sysctl_min_unmapped_ratio = 1; int sysctl_min_unmapped_ratio = 1;
...@@ -3611,7 +3611,7 @@ static inline unsigned long node_unmapped_file_pages(struct pglist_data *pgdat) ...@@ -3611,7 +3611,7 @@ static inline unsigned long node_unmapped_file_pages(struct pglist_data *pgdat)
} }
/* Work out how many page cache pages we can reclaim in this reclaim_mode */ /* Work out how many page cache pages we can reclaim in this reclaim_mode */
static unsigned long zone_pagecache_reclaimable(struct zone *zone) static unsigned long node_pagecache_reclaimable(struct pglist_data *pgdat)
{ {
unsigned long nr_pagecache_reclaimable; unsigned long nr_pagecache_reclaimable;
unsigned long delta = 0; unsigned long delta = 0;
...@@ -3622,14 +3622,14 @@ static unsigned long zone_pagecache_reclaimable(struct zone *zone) ...@@ -3622,14 +3622,14 @@ static unsigned long zone_pagecache_reclaimable(struct zone *zone)
* pages like swapcache and node_unmapped_file_pages() provides * pages like swapcache and node_unmapped_file_pages() provides
* a better estimate * a better estimate
*/ */
if (zone_reclaim_mode & RECLAIM_UNMAP) if (node_reclaim_mode & RECLAIM_UNMAP)
nr_pagecache_reclaimable = node_page_state(zone->zone_pgdat, NR_FILE_PAGES); nr_pagecache_reclaimable = node_page_state(pgdat, NR_FILE_PAGES);
else else
nr_pagecache_reclaimable = node_unmapped_file_pages(zone->zone_pgdat); nr_pagecache_reclaimable = node_unmapped_file_pages(pgdat);
/* If we can't clean pages, remove dirty pages from consideration */ /* If we can't clean pages, remove dirty pages from consideration */
if (!(zone_reclaim_mode & RECLAIM_WRITE)) if (!(node_reclaim_mode & RECLAIM_WRITE))
delta += node_page_state(zone->zone_pgdat, NR_FILE_DIRTY); delta += node_page_state(pgdat, NR_FILE_DIRTY);
/* Watch for any possible underflows due to delta */ /* Watch for any possible underflows due to delta */
if (unlikely(delta > nr_pagecache_reclaimable)) if (unlikely(delta > nr_pagecache_reclaimable))
...@@ -3639,23 +3639,24 @@ static unsigned long zone_pagecache_reclaimable(struct zone *zone) ...@@ -3639,23 +3639,24 @@ static unsigned long zone_pagecache_reclaimable(struct zone *zone)
} }
/* /*
* Try to free up some pages from this zone through reclaim. * Try to free up some pages from this node through reclaim.
*/ */
static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{ {
/* Minimum pages needed in order to stay on node */ /* Minimum pages needed in order to stay on node */
const unsigned long nr_pages = 1 << order; const unsigned long nr_pages = 1 << order;
struct task_struct *p = current; struct task_struct *p = current;
struct reclaim_state reclaim_state; struct reclaim_state reclaim_state;
int classzone_idx = gfp_zone(gfp_mask);
struct scan_control sc = { struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order, .order = order,
.priority = ZONE_RECLAIM_PRIORITY, .priority = NODE_RECLAIM_PRIORITY,
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(zone_reclaim_mode & RECLAIM_UNMAP), .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1, .may_swap = 1,
.reclaim_idx = zone_idx(zone), .reclaim_idx = classzone_idx,
}; };
cond_resched(); cond_resched();
...@@ -3669,13 +3670,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) ...@@ -3669,13 +3670,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state; p->reclaim_state = &reclaim_state;
if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) { if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
/* /*
* Free memory by calling shrink zone with increasing * Free memory by calling shrink zone with increasing
* priorities until we have enough memory freed. * priorities until we have enough memory freed.
*/ */
do { do {
shrink_node(zone->zone_pgdat, &sc, zone_idx(zone)); shrink_node(pgdat, &sc, classzone_idx);
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
} }
...@@ -3685,49 +3686,47 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) ...@@ -3685,49 +3686,47 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
return sc.nr_reclaimed >= nr_pages; return sc.nr_reclaimed >= nr_pages;
} }
int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{ {
int node_id;
int ret; int ret;
/* /*
* Zone reclaim reclaims unmapped file backed pages and * Node reclaim reclaims unmapped file backed pages and
* slab pages if we are over the defined limits. * slab pages if we are over the defined limits.
* *
* A small portion of unmapped file backed pages is needed for * A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately * file I/O otherwise pages read by file I/O will be immediately
* thrown out if the zone is overallocated. So we do not reclaim * thrown out if the node is overallocated. So we do not reclaim
* if less than a specified percentage of the zone is used by * if less than a specified percentage of the node is used by
* unmapped file backed pages. * unmapped file backed pages.
*/ */
if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages && if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return ZONE_RECLAIM_FULL; return NODE_RECLAIM_FULL;
if (!pgdat_reclaimable(zone->zone_pgdat)) if (!pgdat_reclaimable(pgdat))
return ZONE_RECLAIM_FULL; return NODE_RECLAIM_FULL;
/* /*
* Do not scan if the allocation should not be delayed. * Do not scan if the allocation should not be delayed.
*/ */
if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
return ZONE_RECLAIM_NOSCAN; return NODE_RECLAIM_NOSCAN;
/* /*
* Only run zone reclaim on the local zone or on zones that do not * Only run node reclaim on the local node or on nodes that do not
* have associated processors. This will favor the local processor * have associated processors. This will favor the local processor
* over remote processors and spread off node memory allocations * over remote processors and spread off node memory allocations
* as wide as possible. * as wide as possible.
*/ */
node_id = zone_to_nid(zone); if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id())
if (node_state(node_id, N_CPU) && node_id != numa_node_id()) return NODE_RECLAIM_NOSCAN;
return ZONE_RECLAIM_NOSCAN;
if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags)) if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags))
return ZONE_RECLAIM_NOSCAN; return NODE_RECLAIM_NOSCAN;
ret = __zone_reclaim(zone, gfp_mask, order); ret = __node_reclaim(pgdat, gfp_mask, order);
clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags); clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);
if (!ret) if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment