Commit c3f2d783 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2024-08-17-19-34' of...

Merge tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "16 hotfixes. All except one are for MM. 10 of these are cc:stable and
  the others pertain to post-6.10 issues.

  As usual with these merges, singletons and doubletons all over the
  place, no identifiable-by-me theme. Please see the lovingly curated
  changelogs to get the skinny"

* tag 'mm-hotfixes-stable-2024-08-17-19-34' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/migrate: fix deadlock in migrate_pages_batch() on large folios
  alloc_tag: mark pages reserved during CMA activation as not tagged
  alloc_tag: introduce clear_page_tag_ref() helper function
  crash: fix riscv64 crash memory reserve dead loop
  selftests: memfd_secret: don't build memfd_secret test on unsupported arches
  mm: fix endless reclaim on machines with unaccepted memory
  selftests/mm: compaction_test: fix off by one in check_compaction()
  mm/numa: no task_numa_fault() call if PMD is changed
  mm/numa: no task_numa_fault() call if PTE is changed
  mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0
  mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
  mm: don't account memmap per-node
  mm: add system wide stats items category
  mm: don't account memmap on failure
  mm/hugetlb: fix hugetlb vs. core-mm PT locking
  mseal: fix is_madv_discard()
parents 810996a3 2e6506e1
......@@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason)
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
if (huge_page_size(h) == PMD_SIZE)
const unsigned long size = huge_page_size(h);
VM_WARN_ON(size == PAGE_SIZE);
/*
* hugetlb must use the exact same PT locks as core-mm page table
* walkers would. When modifying a PTE table, hugetlb must take the
* PTE PT lock, when modifying a PMD table, hugetlb must take the PMD
* PT lock etc.
*
* The expectation is that any hugetlb folio smaller than a PMD is
* always mapped into a single PTE table and that any hugetlb folio
* smaller than a PUD (but at least as big as a PMD) is always mapped
* into a single PMD table.
*
* If that does not hold for an architecture, then that architecture
* must disable split PT locks such that all *_lockptr() functions
* will give us the same result: the per-MM PT lock.
*
* Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where
* PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr()
* and core-mm would use pmd_lockptr(). However, in such configurations
* split PMD locks are disabled -- they don't make sense on a single
* PGDIR page table -- and the end result is the same.
*/
if (size >= PUD_SIZE)
return pud_lockptr(mm, (pud_t *) pte);
else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE))
return pmd_lockptr(mm, (pmd_t *) pte);
VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
return &mm->page_table_lock;
/* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */
return ptep_lockptr(mm, pte);
}
#ifndef hugepages_supported
......
......@@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
}
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
{
BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
return ptlock_ptr(virt_to_ptdesc(pte));
}
static inline bool ptlock_init(struct ptdesc *ptdesc)
{
/*
......@@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return &mm->page_table_lock;
}
static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
{
return &mm->page_table_lock;
}
static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void ptlock_free(struct ptdesc *ptdesc) {}
......
......@@ -220,8 +220,6 @@ enum node_stat_item {
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
NR_MEMMAP, /* page metadata allocated through buddy allocator */
NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
NR_VM_NODE_STAT_ITEMS
};
......
......@@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref)
page_ext_put(page_ext_from_codetag_ref(ref));
}
static inline void clear_page_tag_ref(struct page *page)
{
if (mem_alloc_profiling_enabled()) {
union codetag_ref *ref = get_page_tag_ref(page);
if (ref) {
set_codetag_empty(ref);
put_page_tag_ref(ref);
}
}
}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr)
{
......@@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
static inline void put_page_tag_ref(union codetag_ref *ref) {}
static inline void clear_page_tag_ref(struct page *page) {}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
unsigned int nr) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
......
......@@ -34,10 +34,13 @@ struct reclaim_stat {
unsigned nr_lazyfree_fail;
};
enum writeback_stat_item {
/* Stat data for system wide items */
enum vm_stat_item {
NR_DIRTY_THRESHOLD,
NR_DIRTY_BG_THRESHOLD,
NR_VM_WRITEBACK_STAT_ITEMS,
NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */
NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */
NR_VM_STAT_ITEMS,
};
#ifdef CONFIG_VM_EVENT_COUNTERS
......@@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru)
return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
}
static inline const char *writeback_stat_name(enum writeback_stat_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS +
item];
}
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
static inline const char *vm_event_name(enum vm_event_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS +
NR_VM_WRITEBACK_STAT_ITEMS +
NR_VM_STAT_ITEMS +
item];
}
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
......@@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}
void __meminit mod_node_early_perpage_metadata(int nid, long delta);
void __meminit store_early_perpage_metadata(void);
void memmap_boot_pages_add(long delta);
void memmap_pages_add(long delta);
#endif /* _LINUX_VMSTAT_H */
......@@ -423,6 +423,7 @@ void __init reserve_crashkernel_generic(char *cmdline,
if (high && search_end == CRASH_ADDR_HIGH_MAX) {
search_end = CRASH_ADDR_LOW_MAX;
search_base = 0;
if (search_end != CRASH_ADDR_HIGH_MAX)
goto retry;
}
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
......
......@@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
goto out;
return 0;
}
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
......@@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
flags |= TNF_MIGRATED;
nid = target_nid;
} else {
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
return 0;
}
flags |= TNF_MIGRATE_FAIL;
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
goto out;
}
goto out_map;
}
out:
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
return 0;
}
out_map:
/* Restore the PMD */
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
......@@ -1753,7 +1747,10 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
goto out;
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
return 0;
}
/*
......
......@@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
static inline void free_vmemmap_page(struct page *page)
{
if (PageReserved(page)) {
memmap_boot_pages_add(-1);
free_bootmem_page(page);
mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1);
} else {
memmap_pages_add(-1);
__free_page(page);
mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1);
}
}
......@@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
copy_page(page_to_virt(walk.reuse_page),
(void *)walk.reuse_addr);
list_add(&walk.reuse_page->lru, vmemmap_pages);
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1);
memmap_pages_add(1);
}
/*
......@@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
for (i = 0; i < nr_pages; i++) {
page = alloc_pages_node(nid, gfp_mask, 0);
if (!page) {
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i);
if (!page)
goto out;
}
list_add(&page->lru, list);
}
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages);
memmap_pages_add(nr_pages);
return 0;
out:
......
......@@ -2417,7 +2417,7 @@ struct memory_failure_entry {
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
spinlock_t lock;
raw_spinlock_t lock;
struct work_struct work;
};
......@@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags)
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
bool buffer_overflow;
struct memory_failure_entry entry = {
.pfn = pfn,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
if (kfifo_put(&mf_cpu->fifo, entry))
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
if (!buffer_overflow)
schedule_work_on(smp_processor_id(), &mf_cpu->work);
else
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
if (buffer_overflow)
pr_err("buffer overflow when queuing memory failure at %#lx\n",
pfn);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
......@@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work)
mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
if (entry.flags & MF_SOFT_OFFLINE)
......@@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void)
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
spin_lock_init(&mf_cpu->lock);
raw_spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
......
......@@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
return 0;
}
pte = pte_modify(old_pte, vma->vm_page_prot);
......@@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (!migrate_misplaced_folio(folio, vma, target_nid)) {
nid = target_nid;
flags |= TNF_MIGRATED;
} else {
task_numa_fault(last_cpupid, nid, nr_pages, flags);
return 0;
}
flags |= TNF_MIGRATE_FAIL;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
if (unlikely(!vmf->pte))
goto out;
return 0;
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
goto out_map;
}
out:
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, nr_pages, flags);
return 0;
}
out_map:
/*
* Make it present again, depending on how arch implements
......@@ -5387,7 +5383,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
writable);
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, nr_pages, flags);
return 0;
}
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
......
......@@ -1479,11 +1479,17 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
return rc;
}
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
static inline int try_split_folio(struct folio *folio, struct list_head *split_folios,
enum migrate_mode mode)
{
int rc;
if (mode == MIGRATE_ASYNC) {
if (!folio_trylock(folio))
return -EAGAIN;
} else {
folio_lock(folio);
}
rc = split_folio_to_list(folio, split_folios);
folio_unlock(folio);
if (!rc)
......@@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from,
*/
if (nr_pages > 2 &&
!list_empty(&folio->_deferred_list)) {
if (try_split_folio(folio, split_folios) == 0) {
if (!try_split_folio(folio, split_folios, mode)) {
nr_failed++;
stats->nr_thp_failed += is_thp;
stats->nr_thp_split += is_thp;
......@@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from,
if (!thp_migration_supported() && is_thp) {
nr_failed++;
stats->nr_thp_failed++;
if (!try_split_folio(folio, split_folios)) {
if (!try_split_folio(folio, split_folios, mode)) {
stats->nr_thp_split++;
stats->nr_split++;
continue;
......@@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from,
stats->nr_thp_failed += is_thp;
/* Large folio NUMA faulting doesn't split to retry. */
if (is_large && !nosplit) {
int ret = try_split_folio(folio, split_folios);
int ret = try_split_folio(folio, split_folios, mode);
if (!ret) {
stats->nr_thp_split += is_thp;
......
......@@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
panic("Failed to allocate %ld bytes for node %d memory map\n",
size, pgdat->node_id);
pgdat->node_mem_map = map + offset;
mod_node_early_perpage_metadata(pgdat->node_id,
DIV_ROUND_UP(size, PAGE_SIZE));
memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
__func__, pgdat->node_id, (unsigned long)pgdat,
(unsigned long)pgdat->node_mem_map);
......@@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page)
set_pageblock_migratetype(page, MIGRATE_CMA);
set_page_refcounted(page);
/* pages were reserved and not allocated */
clear_page_tag_ref(page);
__free_pages(page, pageblock_order);
adjust_managed_page_count(page, pageblock_nr_pages);
......@@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
}
/* pages were reserved and not allocated */
if (mem_alloc_profiling_enabled()) {
union codetag_ref *ref = get_page_tag_ref(page);
if (ref) {
set_codetag_empty(ref);
put_page_tag_ref(ref);
}
}
clear_page_tag_ref(page);
__free_pages_core(page, order, MEMINIT_EARLY);
}
......
......@@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma)
static bool is_madv_discard(int behavior)
{
return behavior &
(MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED |
MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK);
switch (behavior) {
case MADV_FREE:
case MADV_DONTNEED:
case MADV_DONTNEED_LOCKED:
case MADV_REMOVE:
case MADV_DONTFORK:
case MADV_WIPEONFORK:
return true;
}
return false;
}
static bool is_ro_anon(struct vm_area_struct *vma)
......
......@@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
static bool page_contains_unaccepted(struct page *page, unsigned int order);
static void accept_page(struct page *page, unsigned int order);
static bool try_to_accept_memory(struct zone *zone, unsigned int order);
static bool cond_accept_memory(struct zone *zone, unsigned int order);
static inline bool has_unaccepted_memory(void);
static bool __free_unaccepted(struct page *page);
......@@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
if (!(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
#ifdef CONFIG_UNACCEPTED_MEMORY
unusable_free += zone_page_state(z, NR_UNACCEPTED);
#endif
return unusable_free;
}
......@@ -3368,6 +3365,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
}
}
cond_accept_memory(zone, order);
/*
* Detect whether the number of free pages is below high
* watermark. If so, we will decrease pcp->high and free
......@@ -3393,10 +3392,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
gfp_mask)) {
int ret;
if (has_unaccepted_memory()) {
if (try_to_accept_memory(zone, order))
if (cond_accept_memory(zone, order))
goto try_this_zone;
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
......@@ -3450,10 +3447,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
return page;
} else {
if (has_unaccepted_memory()) {
if (try_to_accept_memory(zone, order))
if (cond_accept_memory(zone, order))
goto try_this_zone;
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* Try again if zone has deferred pages */
......@@ -5755,7 +5750,6 @@ void __init setup_per_cpu_pageset(void)
for_each_online_pgdat(pgdat)
pgdat->per_cpu_nodestats =
alloc_percpu(struct per_cpu_nodestat);
store_early_perpage_metadata();
}
__meminit void zone_pcp_init(struct zone *zone)
......@@ -5821,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
void free_reserved_page(struct page *page)
{
if (mem_alloc_profiling_enabled()) {
union codetag_ref *ref = get_page_tag_ref(page);
if (ref) {
set_codetag_empty(ref);
put_page_tag_ref(ref);
}
}
clear_page_tag_ref(page);
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
......@@ -6951,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
struct page *page;
bool last;
if (list_empty(&zone->unaccepted_pages))
return false;
spin_lock_irqsave(&zone->lock, flags);
page = list_first_entry_or_null(&zone->unaccepted_pages,
struct page, lru);
......@@ -6979,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
static bool try_to_accept_memory(struct zone *zone, unsigned int order)
static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
long to_accept;
int ret = false;
bool ret = false;
if (!has_unaccepted_memory())
return false;
if (list_empty(&zone->unaccepted_pages))
return false;
/* How much to accept to get to high watermark? */
to_accept = high_wmark_pages(zone) -
(zone_page_state(zone, NR_FREE_PAGES) -
__zone_watermark_unusable_free(zone, order, 0));
__zone_watermark_unusable_free(zone, order, 0) -
zone_page_state(zone, NR_UNACCEPTED));
/* Accept at least one page */
do {
while (to_accept > 0) {
if (!try_to_accept_memory_one(zone))
break;
ret = true;
to_accept -= MAX_ORDER_NR_PAGES;
} while (to_accept > 0);
}
return ret;
}
......@@ -7038,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order)
{
}
static bool try_to_accept_memory(struct zone *zone, unsigned int order)
static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
return false;
}
......
......@@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid)
return -ENOMEM;
NODE_DATA(nid)->node_page_ext = base;
total_usage += table_size;
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT,
DIV_ROUND_UP(table_size, PAGE_SIZE));
memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE));
return 0;
}
......@@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid)
else
addr = vzalloc_node(size, nid);
if (addr) {
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP,
DIV_ROUND_UP(size, PAGE_SIZE));
}
if (addr)
memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
return addr;
}
......@@ -323,25 +320,18 @@ static void free_page_ext(void *addr)
{
size_t table_size;
struct page *page;
struct pglist_data *pgdat;
table_size = page_ext_size * PAGES_PER_SECTION;
memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));
if (is_vmalloc_addr(addr)) {
page = vmalloc_to_page(addr);
pgdat = page_pgdat(page);
vfree(addr);
} else {
page = virt_to_page(addr);
pgdat = page_pgdat(page);
BUG_ON(PageReserved(page));
kmemleak_free(addr);
free_pages_exact(addr, table_size);
}
mod_node_page_state(pgdat, NR_MEMMAP,
-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));
}
static void __free_page_ext(unsigned long pfn)
......
......@@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
if (r < 0)
return NULL;
if (system_state == SYSTEM_BOOTING) {
mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start,
PAGE_SIZE));
} else {
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP,
DIV_ROUND_UP(end - start, PAGE_SIZE));
}
if (system_state == SYSTEM_BOOTING)
memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
else
memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
return pfn_to_page(pfn);
}
......@@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
sparsemap_buf_end = sparsemap_buf + size;
#ifndef CONFIG_SPARSEMEM_VMEMMAP
mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE));
memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
#endif
}
......@@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
unsigned long start = (unsigned long) pfn_to_page(pfn);
unsigned long end = start + nr_pages * sizeof(struct page);
mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP,
-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
vmemmap_free(start, end, altmap);
}
static void free_map_bootmem(struct page *memmap)
......
......@@ -3584,16 +3584,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
page = alloc_pages_noprof(alloc_gfp, order);
else
page = alloc_pages_node_noprof(nid, alloc_gfp, order);
if (unlikely(!page)) {
if (!nofail)
if (unlikely(!page))
break;
/* fall back to the zero order allocations */
alloc_gfp |= __GFP_NOFAIL;
order = 0;
continue;
}
/*
* Higher order allocations must be able to be treated as
* indepdenent small pages by callers (as they can with
......
......@@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat,
}
#endif
/*
* Count number of pages "struct page" and "struct page_ext" consume.
* nr_memmap_boot_pages: # of pages allocated by boot allocator
* nr_memmap_pages: # of pages that were allocated by buddy allocator
*/
static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0);
static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0);
void memmap_boot_pages_add(long delta)
{
atomic_long_add(delta, &nr_memmap_boot_pages);
}
void memmap_pages_add(long delta)
{
atomic_long_add(delta, &nr_memmap_pages);
}
#ifdef CONFIG_COMPACTION
struct contig_page_info {
......@@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = {
"pgdemote_kswapd",
"pgdemote_direct",
"pgdemote_khugepaged",
"nr_memmap",
"nr_memmap_boot",
/* enum writeback_stat_item counters */
/* system-wide enum vm_stat_item counters */
"nr_dirty_threshold",
"nr_dirty_background_threshold",
"nr_memmap_pages",
"nr_memmap_boot_pages",
#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
/* enum vm_event_item counters */
......@@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = {
#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
NR_VM_NUMA_EVENT_ITEMS + \
NR_VM_NODE_STAT_ITEMS + \
NR_VM_WRITEBACK_STAT_ITEMS + \
NR_VM_STAT_ITEMS + \
(IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
NR_VM_EVENT_ITEMS : 0))
......@@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
v + NR_DIRTY_THRESHOLD);
v += NR_VM_WRITEBACK_STAT_ITEMS;
v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages);
v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages);
v += NR_VM_STAT_ITEMS;
#ifdef CONFIG_VM_EVENT_COUNTERS
all_vm_events(v);
......@@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void)
module_init(extfrag_debug_init);
#endif
/*
* Page metadata size (struct page and page_ext) in pages
*/
static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata;
void __meminit mod_node_early_perpage_metadata(int nid, long delta)
{
early_perpage_metadata[nid] += delta;
}
void __meminit store_early_perpage_metadata(void)
{
int nid;
struct pglist_data *pgdat;
for_each_online_pgdat(pgdat) {
nid = pgdat->node_id;
mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT,
early_perpage_metadata[nid]);
}
}
......@@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
TEST_GEN_FILES += memfd_secret
endif
TEST_GEN_FILES += migration
TEST_GEN_FILES += mkdirty
TEST_GEN_FILES += mlock-random-test
......
......@@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int fd, ret = -1;
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages);
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
/* We want to test with 80% of available memory. Else, OOM killer comes
in to play */
......
......@@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
if [ -x ./memfd_secret ]
then
(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
CATEGORY="memfd_secret" run_test ./memfd_secret
fi
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
CATEGORY="ksm" run_test ./ksm_tests -H -s 100
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment