Commit 123cb69c authored by Andrea Arcangeli's avatar Andrea Arcangeli

mm: zone_reclaim: compaction: add compaction to zone_reclaim_mode

This adds compaction to zone_reclaim so THP enabled won't decrease the
NUMA locality with /proc/sys/vm/zone_reclaim_mode > 0.

It is important to boot with numa_zonelist_order=n (n means nodes) to
get more accurate NUMA locality if there are multiple zones per node.
Signed-off-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
parent ea160218
......@@ -336,10 +336,14 @@ extern unsigned long vm_total_pages;
extern int zone_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
extern int zone_reclaim(struct zone *, struct zone *, gfp_t, unsigned int,
unsigned long, int, int);
#else
#define zone_reclaim_mode 0
static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
static inline int zone_reclaim(struct zone *preferred_zone, struct zone *zone,
gfp_t mask, unsigned int order,
unsigned long mark, int alloc_flags,
int classzone_idx)
{
return 0;
}
......
......@@ -2212,7 +2212,10 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
ret = zone_reclaim(zone, gfp_mask, order);
ret = zone_reclaim(ac->preferred_zone, zone, gfp_mask,
order,
mark, alloc_flags,
ac->classzone_idx);
switch (ret) {
case ZONE_RECLAIM_NOSCAN:
/* did not scan */
......
......@@ -3671,6 +3671,24 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
};
cond_resched();
/*
* Zone reclaim reclaims unmapped file backed pages and
* slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
* thrown out if the zone is overallocated. So we do not reclaim
* if less than a specified percentage of the zone is used by
* unmapped file backed pages.
*/
if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
return ZONE_RECLAIM_FULL;
if (!zone_reclaimable(zone))
return ZONE_RECLAIM_FULL;
/*
* We need to be able to allocate from the reserves for RECLAIM_SWAP
* and we also need to be able to write out pages for RECLAIM_WRITE
......@@ -3697,28 +3715,39 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
return sc.nr_reclaimed >= nr_pages;
}
int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
static int zone_reclaim_compact(struct zone *preferred_zone,
struct zone *zone, gfp_t gfp_mask,
unsigned int order,
bool sync_compaction,
bool *need_compaction,
int alloc_flags, int classzone_idx)
{
int node_id;
int ret;
int contended;
/*
* Zone reclaim reclaims unmapped file backed pages and
* slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
* thrown out if the zone is overallocated. So we do not reclaim
* if less than a specified percentage of the zone is used by
* unmapped file backed pages.
*/
if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
return ZONE_RECLAIM_FULL;
if (compaction_deferred(preferred_zone, order) ||
!order ||
(gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) {
*need_compaction = false;
return COMPACT_SKIPPED;
}
if (!zone_reclaimable(zone))
return ZONE_RECLAIM_FULL;
*need_compaction = true;
return compact_zone_order(zone, order, gfp_mask,
sync_compaction, &contended,
alloc_flags, classzone_idx);
}
int zone_reclaim(struct zone *preferred_zone, struct zone *zone,
gfp_t gfp_mask, unsigned int order,
unsigned long mark, int alloc_flags, int classzone_idx)
{
int node_id;
int ret, c_ret;
bool sync_compaction = false, need_compaction = false;
/*
* Do not scan if the allocation should not be delayed.
*/
......@@ -3735,7 +3764,55 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
if (node_state(node_id, N_CPU) && node_id != numa_node_id())
return ZONE_RECLAIM_NOSCAN;
repeat_compaction:
/*
* If this allocation may be satisfied by memory compaction,
* run compaction before reclaim.
*/
c_ret = zone_reclaim_compact(preferred_zone, zone, gfp_mask, order,
sync_compaction, &need_compaction,
alloc_flags, classzone_idx);
if (need_compaction &&
c_ret != COMPACT_SKIPPED &&
zone_watermark_ok(zone, order, mark,
classzone_idx,
alloc_flags)) {
#ifdef CONFIG_COMPACTION
zone->compact_considered = 0;
zone->compact_defer_shift = 0;
#endif
return ZONE_RECLAIM_SUCCESS;
}
/*
* reclaim if compaction failed because not enough memory was
* available or if compaction didn't run (order 0) or didn't
* succeed.
*/
ret = __zone_reclaim(zone, gfp_mask, order);
if (ret == ZONE_RECLAIM_SUCCESS) {
if (zone_watermark_ok(zone, order, mark,
classzone_idx,
alloc_flags))
return ZONE_RECLAIM_SUCCESS;
/*
* If compaction run but it was skipped and reclaim was
* successful keep going.
*/
if (need_compaction && c_ret == COMPACT_SKIPPED) {
/*
* If it's ok to wait for I/O we can as well run sync
* compaction
*/
sync_compaction = !!(zone_reclaim_mode &
(RECLAIM_WRITE|RECLAIM_SWAP));
cond_resched();
goto repeat_compaction;
}
}
if (need_compaction)
defer_compaction(preferred_zone, order);
if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment