Commit 5660048c authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: move memcg hierarchy reclaim to generic reclaim code

Memory cgroup limit reclaim and traditional global pressure reclaim will
soon share the same code to reclaim from a hierarchical tree of memory
cgroups.

In preparation of this, move the two right next to each other in
shrink_zone().

The mem_cgroup_hierarchical_reclaim() polymath is split into a soft
limit reclaim function, which still does hierarchy walking on its own,
and a limit (shrinking) reclaim function, which relies on generic
reclaim code to walk the hierarchy.
Signed-off-by: default avatarJohannes Weiner <jweiner@redhat.com>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: default avatarMichal Hocko <mhocko@suse.cz>
Reviewed-by: default avatarKirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 527a5ec9
...@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, ...@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct mem_cgroup *mem_cont, struct mem_cgroup *mem_cont,
int active, int file); int active, int file);
struct mem_cgroup_reclaim_cookie {
struct zone *zone;
int priority;
unsigned int generation;
};
#ifdef CONFIG_CGROUP_MEM_RES_CTLR #ifdef CONFIG_CGROUP_MEM_RES_CTLR
/* /*
* All "charge" functions with gfp_mask should use GFP_KERNEL or * All "charge" functions with gfp_mask should use GFP_KERNEL or
...@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page, ...@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page,
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok); struct page *oldpage, struct page *newpage, bool migration_ok);
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
/* /*
* For memory reclaim. * For memory reclaim.
*/ */
...@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, ...@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
{ {
} }
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
struct mem_cgroup_reclaim_cookie *reclaim)
{
return NULL;
}
static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
struct mem_cgroup *prev)
{
}
static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg) static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
{ {
return 0; return 0;
......
...@@ -370,8 +370,6 @@ enum charge_type { ...@@ -370,8 +370,6 @@ enum charge_type {
#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
static void mem_cgroup_get(struct mem_cgroup *memcg); static void mem_cgroup_get(struct mem_cgroup *memcg);
static void mem_cgroup_put(struct mem_cgroup *memcg); static void mem_cgroup_put(struct mem_cgroup *memcg);
...@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) ...@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
return memcg; return memcg;
} }
struct mem_cgroup_reclaim_cookie { /**
struct zone *zone; * mem_cgroup_iter - iterate over memory cgroup hierarchy
int priority; * @root: hierarchy root
unsigned int generation; * @prev: previously returned memcg, NULL on first invocation
}; * @reclaim: cookie for shared reclaim walks, NULL for full walks
*
static struct mem_cgroup * * Returns references to children of the hierarchy below @root, or
mem_cgroup_iter(struct mem_cgroup *root, * @root itself, or %NULL after a full round-trip.
struct mem_cgroup *prev, *
struct mem_cgroup_reclaim_cookie *reclaim) * Caller must pass the return value in @prev on subsequent
* invocations for reference counting, or use mem_cgroup_iter_break()
* to cancel a hierarchy walk before the round-trip is complete.
*
* Reclaimers can specify a zone and a priority level in @reclaim to
* divide up the memcgs in the hierarchy among all concurrent
* reclaimers operating on the same zone and priority.
*/
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
struct mem_cgroup_reclaim_cookie *reclaim)
{ {
struct mem_cgroup *memcg = NULL; struct mem_cgroup *memcg = NULL;
int id = 0; int id = 0;
if (mem_cgroup_disabled())
return NULL;
if (!root) if (!root)
root = root_mem_cgroup; root = root_mem_cgroup;
...@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root, ...@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root,
return memcg; return memcg;
} }
static void mem_cgroup_iter_break(struct mem_cgroup *root, /**
struct mem_cgroup *prev) * mem_cgroup_iter_break - abort a hierarchy walk prematurely
* @root: hierarchy root
* @prev: last visited hierarchy member as returned by mem_cgroup_iter()
*/
void mem_cgroup_iter_break(struct mem_cgroup *root,
struct mem_cgroup *prev)
{ {
if (!root) if (!root)
root = root_mem_cgroup; root = root_mem_cgroup;
...@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) ...@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
return min(limit, memsw); return min(limit, memsw);
} }
static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
gfp_t gfp_mask,
unsigned long flags)
{
unsigned long total = 0;
bool noswap = false;
int loop;
if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
noswap = true;
if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
noswap = true;
for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
if (loop)
drain_all_stock_async(memcg);
total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
/*
* Allow limit shrinkers, which are triggered directly
* by userspace, to catch signals and stop reclaim
* after minimal progress, regardless of the margin.
*/
if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
break;
if (mem_cgroup_margin(memcg))
break;
/*
* If nothing was reclaimed after two attempts, there
* may be no reclaimable pages in this hierarchy.
*/
if (loop && !total)
break;
}
return total;
}
/** /**
* test_mem_cgroup_node_reclaimable * test_mem_cgroup_node_reclaimable
* @mem: the target memcg * @mem: the target memcg
...@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) ...@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
} }
#endif #endif
/* static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
* Scan the hierarchy if needed to reclaim memory. We remember the last child struct zone *zone,
* we reclaimed from, so that we don't end up penalizing one child extensively gfp_t gfp_mask,
* based on its position in the children list. unsigned long *total_scanned)
*
* root_memcg is the original ancestor that we've been reclaim from.
*
* We give up and return to the caller when we visit root_memcg twice.
* (other groups can be removed while we're walking....)
*
* If shrink==true, for avoiding to free too much, this returns immedieately.
*/
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
struct zone *zone,
gfp_t gfp_mask,
unsigned long reclaim_options,
unsigned long *total_scanned)
{ {
struct mem_cgroup *victim = NULL; struct mem_cgroup *victim = NULL;
int ret, total = 0; int total = 0;
int loop = 0; int loop = 0;
bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
unsigned long excess; unsigned long excess;
unsigned long nr_scanned; unsigned long nr_scanned;
struct mem_cgroup_reclaim_cookie reclaim = { struct mem_cgroup_reclaim_cookie reclaim = {
...@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, ...@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
/* If memsw_is_minimum==1, swap-out is of-no-use. */
if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
noswap = true;
while (1) { while (1) {
victim = mem_cgroup_iter(root_memcg, victim, &reclaim); victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
if (!victim) { if (!victim) {
loop++; loop++;
/*
* We are not draining per cpu cached charges during
* soft limit reclaim because global reclaim doesn't
* care about charges. It tries to free some memory and
* charges will not give any.
*/
if (!check_soft && loop >= 1)
drain_all_stock_async(root_memcg);
if (loop >= 2) { if (loop >= 2) {
/* /*
* If we have not been able to reclaim * If we have not been able to reclaim
* anything, it might because there are * anything, it might because there are
* no reclaimable pages under this hierarchy * no reclaimable pages under this hierarchy
*/ */
if (!check_soft || !total) if (!total)
break; break;
/* /*
* We want to do more targeted reclaim. * We want to do more targeted reclaim.
...@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, ...@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
} }
continue; continue;
} }
if (!mem_cgroup_reclaimable(victim, noswap)) { if (!mem_cgroup_reclaimable(victim, false))
/* this cgroup's local usage == 0 */
continue; continue;
} total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
/* we use swappiness of local cgroup */ zone, &nr_scanned);
if (check_soft) { *total_scanned += nr_scanned;
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, if (!res_counter_soft_limit_excess(&root_memcg->res))
noswap, zone, &nr_scanned);
*total_scanned += nr_scanned;
} else
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
noswap);
total += ret;
/*
* At shrinking usage, we can't check we should stop here or
* reclaim more. It's depends on callers. last_scanned_child
* will work enough for keeping fairness under tree.
*/
if (shrink)
break;
if (check_soft) {
if (!res_counter_soft_limit_excess(&root_memcg->res))
break;
} else if (mem_cgroup_margin(root_memcg))
break; break;
} }
mem_cgroup_iter_break(root_memcg, victim); mem_cgroup_iter_break(root_memcg, victim);
...@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, ...@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (!(gfp_mask & __GFP_WAIT)) if (!(gfp_mask & __GFP_WAIT))
return CHARGE_WOULDBLOCK; return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
gfp_mask, flags, NULL);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages) if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY; return CHARGE_RETRY;
/* /*
...@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, ...@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
if (!ret) if (!ret)
break; break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, mem_cgroup_reclaim(memcg, GFP_KERNEL,
MEM_CGROUP_RECLAIM_SHRINK, MEM_CGROUP_RECLAIM_SHRINK);
NULL);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE); curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */ /* Usage is reduced ? */
if (curusage >= oldusage) if (curusage >= oldusage)
...@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, ...@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
if (!ret) if (!ret)
break; break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, mem_cgroup_reclaim(memcg, GFP_KERNEL,
MEM_CGROUP_RECLAIM_NOSWAP | MEM_CGROUP_RECLAIM_NOSWAP |
MEM_CGROUP_RECLAIM_SHRINK, MEM_CGROUP_RECLAIM_SHRINK);
NULL);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */ /* Usage is reduced ? */
if (curusage >= oldusage) if (curusage >= oldusage)
...@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, ...@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
break; break;
nr_scanned = 0; nr_scanned = 0;
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
gfp_mask, gfp_mask, &nr_scanned);
MEM_CGROUP_RECLAIM_SOFT,
&nr_scanned);
nr_reclaimed += reclaimed; nr_reclaimed += reclaimed;
*total_scanned += nr_scanned; *total_scanned += nr_scanned;
spin_lock(&mctz->lock); spin_lock(&mctz->lock);
......
...@@ -2104,12 +2104,43 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, ...@@ -2104,12 +2104,43 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
static void shrink_zone(int priority, struct zone *zone, static void shrink_zone(int priority, struct zone *zone,
struct scan_control *sc) struct scan_control *sc)
{ {
struct mem_cgroup_zone mz = { struct mem_cgroup *root = sc->target_mem_cgroup;
.mem_cgroup = sc->target_mem_cgroup, struct mem_cgroup_reclaim_cookie reclaim = {
.zone = zone, .zone = zone,
.priority = priority,
}; };
struct mem_cgroup *memcg;
if (global_reclaim(sc)) {
struct mem_cgroup_zone mz = {
.mem_cgroup = NULL,
.zone = zone,
};
shrink_mem_cgroup_zone(priority, &mz, sc);
return;
}
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
struct mem_cgroup_zone mz = {
.mem_cgroup = memcg,
.zone = zone,
};
shrink_mem_cgroup_zone(priority, &mz, sc); shrink_mem_cgroup_zone(priority, &mz, sc);
/*
* Limit reclaim has historically picked one memcg and
* scanned it with decreasing priority levels until
* nr_to_reclaim had been reclaimed. This priority
* cycle is thus over after a single memcg.
*/
if (!global_reclaim(sc)) {
mem_cgroup_iter_break(root, memcg);
break;
}
memcg = mem_cgroup_iter(root, memcg, &reclaim);
} while (memcg);
} }
/* /*
...@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, ...@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
.order = 0, .order = 0,
.target_mem_cgroup = mem, .target_mem_cgroup = mem,
}; };
struct mem_cgroup_zone mz = {
.mem_cgroup = mem,
.zone = zone,
};
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
...@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, ...@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
* will pick up pages from other mem cgroup's as well. We hack * will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero. * the priority and make it zero.
*/ */
shrink_zone(0, zone, &sc); shrink_mem_cgroup_zone(0, &mz, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment