Commit 5d65c8d7 authored by Barry Song's avatar Barry Song Committed by Andrew Morton

mm: count the number of anonymous THPs per size

Patch series "mm: count the number of anonymous THPs per size", v4.

Knowing the number of transparent anon THPs in the system is crucial
for performance analysis. It helps in understanding the ratio and
distribution of THPs versus small folios throughout the system.

Additionally, partial unmapping by userspace can lead to significant waste
of THPs over time and increase memory reclamation pressure. We need this
information for comprehensive system tuning.


This patch (of 2):

Let's track for each anonymous THP size, how many of them are currently
allocated.  We'll track the complete lifespan of an anon THP, starting
when it becomes an anon THP ("large anon folio") (->mapping gets set),
until it gets freed (->mapping gets cleared).

Introduce a new "nr_anon" counter per THP size and adjust the
corresponding counter in the following cases:
* We allocate a new THP and call folio_add_new_anon_rmap() to map
   it the first time and turn it into an anon THP.
* We split an anon THP into multiple smaller ones.
* We migrate an anon THP, when we prepare the destination.
* We free an anon THP back to the buddy.

Note that AnonPages in /proc/meminfo currently tracks the total number of
*mapped* anonymous *pages*, and therefore has slightly different
semantics.  In the future, we might also want to track "nr_anon_mapped"
for each THP size, which might be helpful when comparing it to the number
of allocated anon THPs (long-term pinning, stuck in swapcache, memory
leaks, ...).

Further note that for now, we only track anon THPs after they got their
->mapping set, for example via folio_add_new_anon_rmap().  If we would
allocate some in the swapcache, they will only show up in the statistics
for now after they have been mapped to user space the first time, where we
call folio_add_new_anon_rmap().

[akpm@linux-foundation.org: documentation fixups, per David]
  Link: https://lkml.kernel.org/r/3e8add35-e26b-443b-8a04-1078f4bc78f6@redhat.com
Link: https://lkml.kernel.org/r/20240824010441.21308-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240824010441.21308-2-21cnbao@gmail.comSigned-off-by: default avatarBarry Song <v-songbaohua@oppo.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shuai Yuan <yuanshuai@oppo.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 70e59a75
...@@ -551,6 +551,11 @@ split_deferred ...@@ -551,6 +551,11 @@ split_deferred
it would free up some memory. Pages on split queue are going to it would free up some memory. Pages on split queue are going to
be split under memory pressure, if splitting is possible. be split under memory pressure, if splitting is possible.
nr_anon
the number of anonymous THP we have in the whole system. These THPs
might be currently entirely mapped or have partially unmapped/unused
subpages.
As the system ages, allocating huge pages may be expensive as the As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help huge page for use. There are some counters in ``/proc/vmstat`` to help
......
...@@ -126,6 +126,7 @@ enum mthp_stat_item { ...@@ -126,6 +126,7 @@ enum mthp_stat_item {
MTHP_STAT_SPLIT, MTHP_STAT_SPLIT,
MTHP_STAT_SPLIT_FAILED, MTHP_STAT_SPLIT_FAILED,
MTHP_STAT_SPLIT_DEFERRED, MTHP_STAT_SPLIT_DEFERRED,
MTHP_STAT_NR_ANON,
__MTHP_STAT_COUNT __MTHP_STAT_COUNT
}; };
...@@ -136,14 +137,24 @@ struct mthp_stat { ...@@ -136,14 +137,24 @@ struct mthp_stat {
DECLARE_PER_CPU(struct mthp_stat, mthp_stats); DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
static inline void count_mthp_stat(int order, enum mthp_stat_item item) static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
{ {
if (order <= 0 || order > PMD_ORDER) if (order <= 0 || order > PMD_ORDER)
return; return;
this_cpu_inc(mthp_stats.stats[order][item]); this_cpu_add(mthp_stats.stats[order][item], delta);
}
static inline void count_mthp_stat(int order, enum mthp_stat_item item)
{
mod_mthp_stat(order, item, 1);
} }
#else #else
static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta)
{
}
static inline void count_mthp_stat(int order, enum mthp_stat_item item) static inline void count_mthp_stat(int order, enum mthp_stat_item item)
{ {
} }
......
...@@ -597,6 +597,7 @@ DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE); ...@@ -597,6 +597,7 @@ DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT); DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT);
DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED); DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED);
DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED); DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED);
DEFINE_MTHP_STAT_ATTR(nr_anon, MTHP_STAT_NR_ANON);
static struct attribute *anon_stats_attrs[] = { static struct attribute *anon_stats_attrs[] = {
&anon_fault_alloc_attr.attr, &anon_fault_alloc_attr.attr,
...@@ -607,6 +608,7 @@ static struct attribute *anon_stats_attrs[] = { ...@@ -607,6 +608,7 @@ static struct attribute *anon_stats_attrs[] = {
&swpout_fallback_attr.attr, &swpout_fallback_attr.attr,
#endif #endif
&split_deferred_attr.attr, &split_deferred_attr.attr,
&nr_anon_attr.attr,
NULL, NULL,
}; };
...@@ -3314,8 +3316,9 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, ...@@ -3314,8 +3316,9 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
struct deferred_split *ds_queue = get_deferred_split_queue(folio); struct deferred_split *ds_queue = get_deferred_split_queue(folio);
/* reset xarray order to new order after split */ /* reset xarray order to new order after split */
XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order); XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
struct anon_vma *anon_vma = NULL; bool is_anon = folio_test_anon(folio);
struct address_space *mapping = NULL; struct address_space *mapping = NULL;
struct anon_vma *anon_vma = NULL;
int order = folio_order(folio); int order = folio_order(folio);
int extra_pins, ret; int extra_pins, ret;
pgoff_t end; pgoff_t end;
...@@ -3327,7 +3330,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, ...@@ -3327,7 +3330,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
if (new_order >= folio_order(folio)) if (new_order >= folio_order(folio))
return -EINVAL; return -EINVAL;
if (folio_test_anon(folio)) { if (is_anon) {
/* order-1 is not supported for anonymous THP. */ /* order-1 is not supported for anonymous THP. */
if (new_order == 1) { if (new_order == 1) {
VM_WARN_ONCE(1, "Cannot split to order-1 folio"); VM_WARN_ONCE(1, "Cannot split to order-1 folio");
...@@ -3367,7 +3370,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, ...@@ -3367,7 +3370,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
if (folio_test_writeback(folio)) if (folio_test_writeback(folio))
return -EBUSY; return -EBUSY;
if (folio_test_anon(folio)) { if (is_anon) {
/* /*
* The caller does not necessarily hold an mmap_lock that would * The caller does not necessarily hold an mmap_lock that would
* prevent the anon_vma disappearing so we first we take a * prevent the anon_vma disappearing so we first we take a
...@@ -3480,6 +3483,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, ...@@ -3480,6 +3483,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
} }
} }
if (is_anon) {
mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
mod_mthp_stat(new_order, MTHP_STAT_NR_ANON, 1 << (order - new_order));
}
__split_huge_page(page, list, end, new_order); __split_huge_page(page, list, end, new_order);
ret = 0; ret = 0;
} else { } else {
......
...@@ -449,6 +449,8 @@ static int __folio_migrate_mapping(struct address_space *mapping, ...@@ -449,6 +449,8 @@ static int __folio_migrate_mapping(struct address_space *mapping,
/* No turning back from here */ /* No turning back from here */
newfolio->index = folio->index; newfolio->index = folio->index;
newfolio->mapping = folio->mapping; newfolio->mapping = folio->mapping;
if (folio_test_anon(folio) && folio_test_large(folio))
mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
if (folio_test_swapbacked(folio)) if (folio_test_swapbacked(folio))
__folio_set_swapbacked(newfolio); __folio_set_swapbacked(newfolio);
...@@ -473,6 +475,8 @@ static int __folio_migrate_mapping(struct address_space *mapping, ...@@ -473,6 +475,8 @@ static int __folio_migrate_mapping(struct address_space *mapping,
*/ */
newfolio->index = folio->index; newfolio->index = folio->index;
newfolio->mapping = folio->mapping; newfolio->mapping = folio->mapping;
if (folio_test_anon(folio) && folio_test_large(folio))
mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
folio_ref_add(newfolio, nr); /* add cache reference */ folio_ref_add(newfolio, nr); /* add cache reference */
if (folio_test_swapbacked(folio)) { if (folio_test_swapbacked(folio)) {
__folio_set_swapbacked(newfolio); __folio_set_swapbacked(newfolio);
......
...@@ -1084,8 +1084,11 @@ __always_inline bool free_pages_prepare(struct page *page, ...@@ -1084,8 +1084,11 @@ __always_inline bool free_pages_prepare(struct page *page,
(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; (page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
} }
} }
if (PageMappingFlags(page)) if (PageMappingFlags(page)) {
if (PageAnon(page))
mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
page->mapping = NULL; page->mapping = NULL;
}
if (is_check_pages_enabled()) { if (is_check_pages_enabled()) {
if (free_page_is_bad(page)) if (free_page_is_bad(page))
bad++; bad++;
......
...@@ -1467,6 +1467,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, ...@@ -1467,6 +1467,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
} }
__folio_mod_stat(folio, nr, nr_pmdmapped); __folio_mod_stat(folio, nr, nr_pmdmapped);
mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
} }
static __always_inline void __folio_add_file_rmap(struct folio *folio, static __always_inline void __folio_add_file_rmap(struct folio *folio,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment