Commit a8c49af3 authored by Yosry Ahmed's avatar Yosry Ahmed Committed by Linus Torvalds

memcg: add per-memcg total kernel memory stat

Currently memcg stats show several types of kernel memory: kernel stack,
page tables, sock, vmalloc, and slab.  However, there are other
allocations with __GFP_ACCOUNT (or supersets such as GFP_KERNEL_ACCOUNT)
that are not accounted in any of those stats, a few examples are:

 - various kvm allocations (e.g. allocated pages to create vcpus)
 - io_uring
 - tmp_page in pipes during pipe_write()
 - bpf ringbuffers
 - unix sockets

Keeping track of the total kernel memory is essential for the ease of
migration from cgroup v1 to v2 as there are large discrepancies between
v1's kmem.usage_in_bytes and the sum of the available kernel memory
stats in v2.  Adding separate memcg stats for all __GFP_ACCOUNT kernel
allocations is an impractical maintenance burden as there a lot of those
all over the kernel code, with more use cases likely to show up in the
future.

Therefore, add a "kernel" memcg stat that is analogous to kmem page
counter, with added benefits such as using rstat infrastructure which
aggregates stats more efficiently.  Additionally, this provides a
lighter alternative in case the legacy kmem is deprecated in the future

[yosryahmed@google.com: v2]
  Link: https://lkml.kernel.org/r/20220203193856.972500-1-yosryahmed@google.com

Link: https://lkml.kernel.org/r/20220201200823.3283171-1-yosryahmed@google.comSigned-off-by: default avatarYosry Ahmed <yosryahmed@google.com>
Acked-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 086f694a
...@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back. ...@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
Amount of memory used to cache filesystem data, Amount of memory used to cache filesystem data,
including tmpfs and shared memory. including tmpfs and shared memory.
kernel (npn)
Amount of total kernel memory, including
(kernel_stack, pagetables, percpu, vmalloc, slab) in
addition to other kernel memory use cases.
kernel_stack kernel_stack
Amount of memory allocated to kernel stacks. Amount of memory allocated to kernel stacks.
......
...@@ -34,6 +34,7 @@ enum memcg_stat_item { ...@@ -34,6 +34,7 @@ enum memcg_stat_item {
MEMCG_SOCK, MEMCG_SOCK,
MEMCG_PERCPU_B, MEMCG_PERCPU_B,
MEMCG_VMALLOC, MEMCG_VMALLOC,
MEMCG_KMEM,
MEMCG_NR_STAT, MEMCG_NR_STAT,
}; };
......
...@@ -1371,6 +1371,7 @@ struct memory_stat { ...@@ -1371,6 +1371,7 @@ struct memory_stat {
static const struct memory_stat memory_stats[] = { static const struct memory_stat memory_stats[] = {
{ "anon", NR_ANON_MAPPED }, { "anon", NR_ANON_MAPPED },
{ "file", NR_FILE_PAGES }, { "file", NR_FILE_PAGES },
{ "kernel", MEMCG_KMEM },
{ "kernel_stack", NR_KERNEL_STACK_KB }, { "kernel_stack", NR_KERNEL_STACK_KB },
{ "pagetables", NR_PAGETABLE }, { "pagetables", NR_PAGETABLE },
{ "percpu", MEMCG_PERCPU_B }, { "percpu", MEMCG_PERCPU_B },
...@@ -2114,6 +2115,7 @@ static DEFINE_MUTEX(percpu_charge_mutex); ...@@ -2114,6 +2115,7 @@ static DEFINE_MUTEX(percpu_charge_mutex);
static void drain_obj_stock(struct obj_stock *stock); static void drain_obj_stock(struct obj_stock *stock);
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg); struct mem_cgroup *root_memcg);
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages);
#else #else
static inline void drain_obj_stock(struct obj_stock *stock) static inline void drain_obj_stock(struct obj_stock *stock)
...@@ -2124,6 +2126,9 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, ...@@ -2124,6 +2126,9 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
{ {
return false; return false;
} }
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
}
#endif #endif
/** /**
...@@ -2979,6 +2984,18 @@ static void memcg_free_cache_id(int id) ...@@ -2979,6 +2984,18 @@ static void memcg_free_cache_id(int id)
ida_simple_remove(&memcg_cache_ida, id); ida_simple_remove(&memcg_cache_ida, id);
} }
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
if (nr_pages > 0)
page_counter_charge(&memcg->kmem, nr_pages);
else
page_counter_uncharge(&memcg->kmem, -nr_pages);
}
}
/* /*
* obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
* @objcg: object cgroup to uncharge * @objcg: object cgroup to uncharge
...@@ -2991,8 +3008,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, ...@@ -2991,8 +3008,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
memcg = get_mem_cgroup_from_objcg(objcg); memcg = get_mem_cgroup_from_objcg(objcg);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg_account_kmem(memcg, -nr_pages);
page_counter_uncharge(&memcg->kmem, nr_pages);
refill_stock(memcg, nr_pages); refill_stock(memcg, nr_pages);
css_put(&memcg->css); css_put(&memcg->css);
...@@ -3018,8 +3034,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, ...@@ -3018,8 +3034,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
if (ret) if (ret)
goto out; goto out;
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg_account_kmem(memcg, nr_pages);
page_counter_charge(&memcg->kmem, nr_pages);
out: out:
css_put(&memcg->css); css_put(&memcg->css);
...@@ -6801,8 +6816,8 @@ static void uncharge_batch(const struct uncharge_gather *ug) ...@@ -6801,8 +6816,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
if (do_memsw_account()) if (do_memsw_account())
page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory); page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem) if (ug->nr_kmem)
page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem); memcg_account_kmem(ug->memcg, -ug->nr_kmem);
memcg_oom_recover(ug->memcg); memcg_oom_recover(ug->memcg);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment