Commit 27329369 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness

Jan Stancek reports manual page migration encountering allocation
failures after some pages when there is still plenty of memory free, and
bisected the problem down to commit 81c0a2bb ("mm: page_alloc: fair
zone allocator policy").

The problem is that GFP_THISNODE obeys the zone fairness allocation
batches on one hand, but doesn't reset them and wake kswapd on the other
hand.  After a few of those allocations, the batches are exhausted and
the allocations fail.

Fixing this means either having GFP_THISNODE wake up kswapd, or
GFP_THISNODE not participating in zone fairness at all.  The latter
seems safer as an acute bugfix, we can clean up later.
Reported-by: default avatarJan Stancek <jstancek@redhat.com>
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Acked-by: default avatarMel Gorman <mgorman@suse.de>
Cc: <stable@kernel.org>		[3.12+]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1ae71d03
...@@ -1238,6 +1238,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) ...@@ -1238,6 +1238,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
} }
local_irq_restore(flags); local_irq_restore(flags);
} }
static bool gfp_thisnode_allocation(gfp_t gfp_mask)
{
return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
}
#else
static bool gfp_thisnode_allocation(gfp_t gfp_mask)
{
return false;
}
#endif #endif
/* /*
...@@ -1574,7 +1583,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, ...@@ -1574,7 +1583,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
get_pageblock_migratetype(page)); get_pageblock_migratetype(page));
} }
/*
* NOTE: GFP_THISNODE allocations do not partake in the kswapd
* aging protocol, so they can't be fair.
*/
if (!gfp_thisnode_allocation(gfp_flags))
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__count_zone_vm_events(PGALLOC, zone, 1 << order); __count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags); zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags); local_irq_restore(flags);
...@@ -1946,8 +1961,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, ...@@ -1946,8 +1961,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
* ultimately fall back to remote zones that do not * ultimately fall back to remote zones that do not
* partake in the fairness round-robin cycle of this * partake in the fairness round-robin cycle of this
* zonelist. * zonelist.
*
* NOTE: GFP_THISNODE allocations do not partake in
* the kswapd aging protocol, so they can't be fair.
*/ */
if (alloc_flags & ALLOC_WMARK_LOW) { if ((alloc_flags & ALLOC_WMARK_LOW) &&
!gfp_thisnode_allocation(gfp_mask)) {
if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
continue; continue;
if (!zone_local(preferred_zone, zone)) if (!zone_local(preferred_zone, zone))
...@@ -2503,8 +2522,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2503,8 +2522,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* allowed per node queues are empty and that nodes are * allowed per node queues are empty and that nodes are
* over allocated. * over allocated.
*/ */
if (IS_ENABLED(CONFIG_NUMA) && if (gfp_thisnode_allocation(gfp_mask))
(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
goto nopage; goto nopage;
restart: restart:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment