Commit 05b84301 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: memcontrol: use root_mem_cgroup res_counter

Due to an old optimization to keep expensive res_counter changes at a
minimum, the root_mem_cgroup res_counter is never charged; there is no
limit at that level anyway, and any statistics can be generated on
demand by summing up the counters of all other cgroups.

However, with per-cpu charge caches, res_counter operations do not even
show up in profiles anymore, so this optimization is no longer
necessary.

Remove it to simplify the code.
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 692e7c45
...@@ -2570,9 +2570,8 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg, ...@@ -2570,9 +2570,8 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
unsigned long nr_reclaimed; unsigned long nr_reclaimed;
unsigned long flags = 0; unsigned long flags = 0;
unsigned long long size; unsigned long long size;
int ret = 0;
if (mem_cgroup_is_root(memcg))
goto done;
retry: retry:
if (consume_stock(memcg, nr_pages)) if (consume_stock(memcg, nr_pages))
goto done; goto done;
...@@ -2650,13 +2649,15 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg, ...@@ -2650,13 +2649,15 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
if (!(gfp_mask & __GFP_NOFAIL)) if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM; return -ENOMEM;
bypass: bypass:
return -EINTR; memcg = root_mem_cgroup;
ret = -EINTR;
goto retry;
done_restock: done_restock:
if (batch > nr_pages) if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages); refill_stock(memcg, batch - nr_pages);
done: done:
return 0; return ret;
} }
/** /**
...@@ -2695,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, ...@@ -2695,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
unsigned int nr_pages) unsigned int nr_pages)
{ {
if (!mem_cgroup_is_root(memcg)) {
unsigned long bytes = nr_pages * PAGE_SIZE; unsigned long bytes = nr_pages * PAGE_SIZE;
res_counter_uncharge(&memcg->res, bytes); res_counter_uncharge(&memcg->res, bytes);
if (do_swap_account) if (do_swap_account)
res_counter_uncharge(&memcg->memsw, bytes); res_counter_uncharge(&memcg->memsw, bytes);
}
} }
/* /*
...@@ -2713,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, ...@@ -2713,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
{ {
unsigned long bytes = nr_pages * PAGE_SIZE; unsigned long bytes = nr_pages * PAGE_SIZE;
if (mem_cgroup_is_root(memcg))
return;
res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
if (do_swap_account) if (do_swap_account)
res_counter_uncharge_until(&memcg->memsw, res_counter_uncharge_until(&memcg->memsw,
...@@ -3943,7 +3939,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, ...@@ -3943,7 +3939,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
* replacement page, so leave it alone when phasing out the * replacement page, so leave it alone when phasing out the
* page that is unused after the migration. * page that is unused after the migration.
*/ */
if (!end_migration && !mem_cgroup_is_root(memcg)) if (!end_migration)
mem_cgroup_do_uncharge(memcg, nr_pages, ctype); mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
return memcg; return memcg;
...@@ -4076,7 +4072,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) ...@@ -4076,7 +4072,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
* We uncharge this because swap is freed. This memcg can * We uncharge this because swap is freed. This memcg can
* be obsolete one. We avoid calling css_tryget_online(). * be obsolete one. We avoid calling css_tryget_online().
*/ */
if (!mem_cgroup_is_root(memcg))
res_counter_uncharge(&memcg->memsw, PAGE_SIZE); res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false); mem_cgroup_swap_statistics(memcg, false);
css_put(&memcg->css); css_put(&memcg->css);
...@@ -4767,78 +4762,24 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, ...@@ -4767,78 +4762,24 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
return retval; return retval;
} }
static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup *iter;
long val = 0;
/* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
if (val < 0) /* race ? */
val = 0;
return val;
}
static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
{
u64 val;
if (!mem_cgroup_is_root(memcg)) {
if (!swap)
return res_counter_read_u64(&memcg->res, RES_USAGE);
else
return res_counter_read_u64(&memcg->memsw, RES_USAGE);
}
/*
* Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
* as well as in MEM_CGROUP_STAT_RSS_HUGE.
*/
val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
if (swap)
val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
return val << PAGE_SHIFT;
}
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft) struct cftype *cft)
{ {
struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *memcg = mem_cgroup_from_css(css);
u64 val; enum res_type type = MEMFILE_TYPE(cft->private);
int name; int name = MEMFILE_ATTR(cft->private);
enum res_type type;
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
switch (type) { switch (type) {
case _MEM: case _MEM:
if (name == RES_USAGE) return res_counter_read_u64(&memcg->res, name);
val = mem_cgroup_usage(memcg, false);
else
val = res_counter_read_u64(&memcg->res, name);
break;
case _MEMSWAP: case _MEMSWAP:
if (name == RES_USAGE) return res_counter_read_u64(&memcg->memsw, name);
val = mem_cgroup_usage(memcg, true);
else
val = res_counter_read_u64(&memcg->memsw, name);
break;
case _KMEM: case _KMEM:
val = res_counter_read_u64(&memcg->kmem, name); return res_counter_read_u64(&memcg->kmem, name);
break; break;
default: default:
BUG(); BUG();
} }
return val;
} }
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
...@@ -5300,7 +5241,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) ...@@ -5300,7 +5241,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
if (!t) if (!t)
goto unlock; goto unlock;
usage = mem_cgroup_usage(memcg, swap); if (!swap)
usage = res_counter_read_u64(&memcg->res, RES_USAGE);
else
usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* /*
* current_threshold points to threshold just below or equal to usage. * current_threshold points to threshold just below or equal to usage.
...@@ -5396,15 +5340,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, ...@@ -5396,15 +5340,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
mutex_lock(&memcg->thresholds_lock); mutex_lock(&memcg->thresholds_lock);
if (type == _MEM) if (type == _MEM) {
thresholds = &memcg->thresholds; thresholds = &memcg->thresholds;
else if (type == _MEMSWAP) usage = res_counter_read_u64(&memcg->res, RES_USAGE);
} else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds; thresholds = &memcg->memsw_thresholds;
else usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
} else
BUG(); BUG();
usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
/* Check if a threshold crossed before adding a new one */ /* Check if a threshold crossed before adding a new one */
if (thresholds->primary) if (thresholds->primary)
__mem_cgroup_threshold(memcg, type == _MEMSWAP); __mem_cgroup_threshold(memcg, type == _MEMSWAP);
...@@ -5484,18 +5428,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, ...@@ -5484,18 +5428,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
int i, j, size; int i, j, size;
mutex_lock(&memcg->thresholds_lock); mutex_lock(&memcg->thresholds_lock);
if (type == _MEM)
if (type == _MEM) {
thresholds = &memcg->thresholds; thresholds = &memcg->thresholds;
else if (type == _MEMSWAP) usage = res_counter_read_u64(&memcg->res, RES_USAGE);
} else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds; thresholds = &memcg->memsw_thresholds;
else usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
} else
BUG(); BUG();
if (!thresholds->primary) if (!thresholds->primary)
goto unlock; goto unlock;
usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
/* Check if a threshold crossed before removing */ /* Check if a threshold crossed before removing */
__mem_cgroup_threshold(memcg, type == _MEMSWAP); __mem_cgroup_threshold(memcg, type == _MEMSWAP);
...@@ -6249,9 +6194,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) ...@@ -6249,9 +6194,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
* core guarantees its existence. * core guarantees its existence.
*/ */
} else { } else {
res_counter_init(&memcg->res, NULL); res_counter_init(&memcg->res, &root_mem_cgroup->res);
res_counter_init(&memcg->memsw, NULL); res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
res_counter_init(&memcg->kmem, NULL); res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
/* /*
* Deeper hierachy with use_hierarchy == false doesn't make * Deeper hierachy with use_hierarchy == false doesn't make
* much sense so let cgroup subsystem know about this * much sense so let cgroup subsystem know about this
...@@ -6387,13 +6332,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) ...@@ -6387,13 +6332,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
/* Handlers for move charge at task migration. */ /* Handlers for move charge at task migration. */
static int mem_cgroup_do_precharge(unsigned long count) static int mem_cgroup_do_precharge(unsigned long count)
{ {
int ret = 0; int ret;
if (mem_cgroup_is_root(mc.to)) {
mc.precharge += count;
/* we don't need css_get for root */
return ret;
}
/* Try a single bulk charge without reclaim first */ /* Try a single bulk charge without reclaim first */
ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
...@@ -6700,21 +6639,18 @@ static void __mem_cgroup_clear_mc(void) ...@@ -6700,21 +6639,18 @@ static void __mem_cgroup_clear_mc(void)
/* we must fixup refcnts and charges */ /* we must fixup refcnts and charges */
if (mc.moved_swap) { if (mc.moved_swap) {
/* uncharge swap account from the old cgroup */ /* uncharge swap account from the old cgroup */
if (!mem_cgroup_is_root(mc.from))
res_counter_uncharge(&mc.from->memsw, res_counter_uncharge(&mc.from->memsw,
PAGE_SIZE * mc.moved_swap); PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++) for (i = 0; i < mc.moved_swap; i++)
css_put(&mc.from->css); css_put(&mc.from->css);
if (!mem_cgroup_is_root(mc.to)) {
/* /*
* we charged both to->res and to->memsw, so we should * we charged both to->res and to->memsw, so we should
* uncharge to->res. * uncharge to->res.
*/ */
res_counter_uncharge(&mc.to->res, res_counter_uncharge(&mc.to->res,
PAGE_SIZE * mc.moved_swap); PAGE_SIZE * mc.moved_swap);
}
/* we've already done css_get(mc.to) */ /* we've already done css_get(mc.to) */
mc.moved_swap = 0; mc.moved_swap = 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment