Commit 26d21b18 authored by Lance Yang's avatar Lance Yang Committed by Andrew Morton

mm/rmap: remove duplicated exit code in pagewalk loop

Patch series "Reclaim lazyfree THP without splitting", v8.

This series adds support for reclaiming PMD-mapped THP marked as lazyfree
without needing to first split the large folio via
split_huge_pmd_address().

When the user no longer requires the pages, they would use
madvise(MADV_FREE) to mark the pages as lazy free.  Subsequently, they
typically would not re-write to that memory again.

During memory reclaim, if we detect that the large folio and its PMD are
both still marked as clean and there are no unexpected references(such as
GUP), so we can just discard the memory lazily, improving the efficiency
of memory reclamation in this case.

Performance Testing
===================

On an Intel i5 CPU, reclaiming 1GiB of lazyfree THPs using
mem_cgroup_force_empty() results in the following runtimes in seconds
(shorter is better):

--------------------------------------------
|     Old       |      New       |  Change  |
--------------------------------------------
|   0.683426    |    0.049197    |  -92.80% |
--------------------------------------------


This patch (of 8):

Introduce the labels walk_done and walk_abort as exit points to eliminate
duplicated exit code in the pagewalk loop.

Link: https://lkml.kernel.org/r/20240614015138.31461-1-ioworker0@gmail.com
Link: https://lkml.kernel.org/r/20240614015138.31461-2-ioworker0@gmail.comSigned-off-by: default avatarLance Yang <ioworker0@gmail.com>
Reviewed-by: default avatarZi Yan <ziy@nvidia.com>
Reviewed-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Reviewed-by: default avatarBarry Song <baohua@kernel.org>
Cc: Bang Li <libang.li@antgroup.com>
Cc: Fangrui Song <maskray@google.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 9ba85f55
......@@ -1681,9 +1681,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
/* Restore the mlock which got missed */
if (!folio_test_large(folio))
mlock_vma_folio(folio, vma);
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;
goto walk_abort;
}
pfn = pte_pfn(ptep_get(pvmw.pte));
......@@ -1721,11 +1719,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*/
if (!anon) {
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (!hugetlb_vma_trylock_write(vma)) {
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;
}
if (!hugetlb_vma_trylock_write(vma))
goto walk_abort;
if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
hugetlb_vma_unlock_write(vma);
flush_tlb_range(vma,
......@@ -1740,8 +1735,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* actual page and drop map count
* to zero.
*/
page_vma_mapped_walk_done(&pvmw);
break;
goto walk_done;
}
hugetlb_vma_unlock_write(vma);
}
......@@ -1813,9 +1807,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (unlikely(folio_test_swapbacked(folio) !=
folio_test_swapcache(folio))) {
WARN_ON_ONCE(1);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
goto walk_abort;
}
/* MADV_FREE page check */
......@@ -1854,23 +1846,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*/
set_pte_at(mm, address, pvmw.pte, pteval);
folio_set_swapbacked(folio);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
goto walk_abort;
}
if (swap_duplicate(entry) < 0) {
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
goto walk_abort;
}
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
swap_free(entry);
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
goto walk_abort;
}
/* See folio_try_share_anon_rmap(): clear PTE first. */
......@@ -1878,9 +1864,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
folio_try_share_anon_rmap_pte(folio, subpage)) {
swap_free(entry);
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
goto walk_abort;
}
if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
......@@ -1920,6 +1904,12 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
folio_put(folio);
continue;
walk_abort:
ret = false;
walk_done:
page_vma_mapped_walk_done(&pvmw);
break;
}
mmu_notifier_invalidate_range_end(&range);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment