Commit 50875896 authored by Chuanhua Han's avatar Chuanhua Han Committed by Andrew Morton

mm: swap: entirely map large folios found in swapcache

When a large folio is found in the swapcache, the current implementation
requires calling do_swap_page() nr_pages times, resulting in nr_pages page
faults.  This patch opts to map the entire large folio at once to minimize
page faults.  Additionally, redundant checks and early exits for ARM64 MTE
restoring are removed.

Link: https://lkml.kernel.org/r/20240529082824.150954-7-21cnbao@gmail.comSigned-off-by: default avatarChuanhua Han <hanchuanhua@oppo.com>
Co-developed-by: default avatarBarry Song <v-songbaohua@oppo.com>
Signed-off-by: default avatarBarry Song <v-songbaohua@oppo.com>
Reviewed-by: default avatarRyan Roberts <ryan.roberts@arm.com>
Reviewed-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 4c3f9664
...@@ -4010,6 +4010,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -4010,6 +4010,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
pte_t pte; pte_t pte;
vm_fault_t ret = 0; vm_fault_t ret = 0;
void *shadow = NULL; void *shadow = NULL;
int nr_pages;
unsigned long page_idx;
unsigned long address;
pte_t *ptep;
if (!pte_unmap_same(vmf)) if (!pte_unmap_same(vmf))
goto out; goto out;
...@@ -4208,6 +4212,38 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -4208,6 +4212,38 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out_nomap; goto out_nomap;
} }
nr_pages = 1;
page_idx = 0;
address = vmf->address;
ptep = vmf->pte;
if (folio_test_large(folio) && folio_test_swapcache(folio)) {
int nr = folio_nr_pages(folio);
unsigned long idx = folio_page_idx(folio, page);
unsigned long folio_start = address - idx * PAGE_SIZE;
unsigned long folio_end = folio_start + nr * PAGE_SIZE;
pte_t *folio_ptep;
pte_t folio_pte;
if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start)))
goto check_folio;
if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end)))
goto check_folio;
folio_ptep = vmf->pte - idx;
folio_pte = ptep_get(folio_ptep);
if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) ||
swap_pte_batch(folio_ptep, nr, folio_pte) != nr)
goto check_folio;
page_idx = idx;
address = folio_start;
ptep = folio_ptep;
nr_pages = nr;
entry = folio->swap;
page = &folio->page;
}
check_folio:
/* /*
* PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte
* must never point at an anonymous page in the swapcache that is * must never point at an anonymous page in the swapcache that is
...@@ -4267,12 +4303,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -4267,12 +4303,12 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* We're already holding a reference on the page but haven't mapped it * We're already holding a reference on the page but haven't mapped it
* yet. * yet.
*/ */
swap_free(entry); swap_free_nr(entry, nr_pages);
if (should_try_to_free_swap(folio, vma, vmf->flags)) if (should_try_to_free_swap(folio, vma, vmf->flags))
folio_free_swap(folio); folio_free_swap(folio);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES); add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
dec_mm_counter(vma->vm_mm, MM_SWAPENTS); add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages);
pte = mk_pte(page, vma->vm_page_prot); pte = mk_pte(page, vma->vm_page_prot);
/* /*
...@@ -4289,27 +4325,28 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -4289,27 +4325,28 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
} }
rmap_flags |= RMAP_EXCLUSIVE; rmap_flags |= RMAP_EXCLUSIVE;
} }
flush_icache_page(vma, page); folio_ref_add(folio, nr_pages - 1);
flush_icache_pages(vma, page, nr_pages);
if (pte_swp_soft_dirty(vmf->orig_pte)) if (pte_swp_soft_dirty(vmf->orig_pte))
pte = pte_mksoft_dirty(pte); pte = pte_mksoft_dirty(pte);
if (pte_swp_uffd_wp(vmf->orig_pte)) if (pte_swp_uffd_wp(vmf->orig_pte))
pte = pte_mkuffd_wp(pte); pte = pte_mkuffd_wp(pte);
vmf->orig_pte = pte; vmf->orig_pte = pte_advance_pfn(pte, page_idx);
/* ksm created a completely new copy */ /* ksm created a completely new copy */
if (unlikely(folio != swapcache && swapcache)) { if (unlikely(folio != swapcache && swapcache)) {
folio_add_new_anon_rmap(folio, vma, vmf->address); folio_add_new_anon_rmap(folio, vma, address);
folio_add_lru_vma(folio, vma); folio_add_lru_vma(folio, vma);
} else { } else {
folio_add_anon_rmap_pte(folio, page, vma, vmf->address, folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address,
rmap_flags); rmap_flags);
} }
VM_BUG_ON(!folio_test_anon(folio) || VM_BUG_ON(!folio_test_anon(folio) ||
(pte_write(pte) && !PageAnonExclusive(page))); (pte_write(pte) && !PageAnonExclusive(page)));
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); set_ptes(vma->vm_mm, address, ptep, pte, nr_pages);
arch_do_swap_page_nr(vma->vm_mm, vma, vmf->address, arch_do_swap_page_nr(vma->vm_mm, vma, address,
pte, vmf->orig_pte, 1); pte, pte, nr_pages);
folio_unlock(folio); folio_unlock(folio);
if (folio != swapcache && swapcache) { if (folio != swapcache && swapcache) {
...@@ -4333,7 +4370,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -4333,7 +4370,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
} }
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); update_mmu_cache_range(vmf, vma, address, ptep, nr_pages);
unlock: unlock:
if (vmf->pte) if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl); pte_unmap_unlock(vmf->pte, vmf->ptl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment