Commit 8d1e24c0 authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by Andrew Morton

khugepaged: use a folio throughout collapse_file()

Pull folios from the page cache instead of pages.  Half of this work had
been done already, but we were still operating on pages for a large chunk
of this function.  There is no attempt in this patch to handle large
folios that are smaller than a THP; that will have to wait for a future
patch.

[willy@infradead.org: the unlikely() is embedded in IS_ERR()]
  Link: https://lkml.kernel.org/r/ZhIWX8K0E2tSyMSr@casper.infradead.org
Link: https://lkml.kernel.org/r/20240403171838.1445826-7-willy@infradead.orgSigned-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 610ff817
...@@ -1780,9 +1780,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1780,9 +1780,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
struct collapse_control *cc) struct collapse_control *cc)
{ {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
struct page *page; struct page *dst;
struct page *tmp, *dst; struct folio *folio, *tmp, *new_folio;
struct folio *folio, *new_folio;
pgoff_t index = 0, end = start + HPAGE_PMD_NR; pgoff_t index = 0, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist); LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
...@@ -1820,11 +1819,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1820,11 +1819,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
for (index = start; index < end; index++) { for (index = start; index < end; index++) {
xas_set(&xas, index); xas_set(&xas, index);
page = xas_load(&xas); folio = xas_load(&xas);
VM_BUG_ON(index != xas.xa_index); VM_BUG_ON(index != xas.xa_index);
if (is_shmem) { if (is_shmem) {
if (!page) { if (!folio) {
/* /*
* Stop if extent has been truncated or * Stop if extent has been truncated or
* hole-punched, and is now completely * hole-punched, and is now completely
...@@ -1840,7 +1839,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1840,7 +1839,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
continue; continue;
} }
if (xa_is_value(page) || !PageUptodate(page)) { if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
/* swap in or instantiate fallocated page */ /* swap in or instantiate fallocated page */
if (shmem_get_folio(mapping->host, index, if (shmem_get_folio(mapping->host, index,
...@@ -1850,28 +1849,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1850,28 +1849,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
} }
/* drain lru cache to help isolate_lru_page() */ /* drain lru cache to help isolate_lru_page() */
lru_add_drain(); lru_add_drain();
page = folio_file_page(folio, index); } else if (folio_trylock(folio)) {
} else if (trylock_page(page)) { folio_get(folio);
get_page(page);
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
} else { } else {
result = SCAN_PAGE_LOCK; result = SCAN_PAGE_LOCK;
goto xa_locked; goto xa_locked;
} }
} else { /* !is_shmem */ } else { /* !is_shmem */
if (!page || xa_is_value(page)) { if (!folio || xa_is_value(folio)) {
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
page_cache_sync_readahead(mapping, &file->f_ra, page_cache_sync_readahead(mapping, &file->f_ra,
file, index, file, index,
end - index); end - index);
/* drain lru cache to help isolate_lru_page() */ /* drain lru cache to help isolate_lru_page() */
lru_add_drain(); lru_add_drain();
page = find_lock_page(mapping, index); folio = filemap_lock_folio(mapping, index);
if (unlikely(page == NULL)) { if (IS_ERR(folio)) {
result = SCAN_FAIL; result = SCAN_FAIL;
goto xa_unlocked; goto xa_unlocked;
} }
} else if (PageDirty(page)) { } else if (folio_test_dirty(folio)) {
/* /*
* khugepaged only works on read-only fd, * khugepaged only works on read-only fd,
* so this page is dirty because it hasn't * so this page is dirty because it hasn't
...@@ -1889,12 +1887,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1889,12 +1887,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
filemap_flush(mapping); filemap_flush(mapping);
result = SCAN_FAIL; result = SCAN_FAIL;
goto xa_unlocked; goto xa_unlocked;
} else if (PageWriteback(page)) { } else if (folio_test_writeback(folio)) {
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
result = SCAN_FAIL; result = SCAN_FAIL;
goto xa_unlocked; goto xa_unlocked;
} else if (trylock_page(page)) { } else if (folio_trylock(folio)) {
get_page(page); folio_get(folio);
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
} else { } else {
result = SCAN_PAGE_LOCK; result = SCAN_PAGE_LOCK;
...@@ -1903,35 +1901,31 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1903,35 +1901,31 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
} }
/* /*
* The page must be locked, so we can drop the i_pages lock * The folio must be locked, so we can drop the i_pages lock
* without racing with truncate. * without racing with truncate.
*/ */
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
/* make sure the page is up to date */ /* make sure the folio is up to date */
if (unlikely(!PageUptodate(page))) { if (unlikely(!folio_test_uptodate(folio))) {
result = SCAN_FAIL; result = SCAN_FAIL;
goto out_unlock; goto out_unlock;
} }
/* /*
* If file was truncated then extended, or hole-punched, before * If file was truncated then extended, or hole-punched, before
* we locked the first page, then a THP might be there already. * we locked the first folio, then a THP might be there already.
* This will be discovered on the first iteration. * This will be discovered on the first iteration.
*/ */
if (PageTransCompound(page)) { if (folio_test_large(folio)) {
struct page *head = compound_head(page); result = folio_order(folio) == HPAGE_PMD_ORDER &&
folio->index == start
result = compound_order(head) == HPAGE_PMD_ORDER &&
head->index == start
/* Maybe PMD-mapped */ /* Maybe PMD-mapped */
? SCAN_PTE_MAPPED_HUGEPAGE ? SCAN_PTE_MAPPED_HUGEPAGE
: SCAN_PAGE_COMPOUND; : SCAN_PAGE_COMPOUND;
goto out_unlock; goto out_unlock;
} }
folio = page_folio(page);
if (folio_mapping(folio) != mapping) { if (folio_mapping(folio) != mapping) {
result = SCAN_TRUNCATED; result = SCAN_TRUNCATED;
goto out_unlock; goto out_unlock;
...@@ -1941,7 +1935,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1941,7 +1935,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
folio_test_writeback(folio))) { folio_test_writeback(folio))) {
/* /*
* khugepaged only works on read-only fd, so this * khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed * folio is dirty because it hasn't been flushed
* since first write. * since first write.
*/ */
result = SCAN_FAIL; result = SCAN_FAIL;
...@@ -1965,33 +1959,34 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -1965,33 +1959,34 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
xas_lock_irq(&xas); xas_lock_irq(&xas);
VM_BUG_ON_PAGE(page != xa_load(xas.xa, index), page); VM_BUG_ON_FOLIO(folio != xa_load(xas.xa, index), folio);
/* /*
* We control three references to the page: * We control three references to the folio:
* - we hold a pin on it; * - we hold a pin on it;
* - one reference from page cache; * - one reference from page cache;
* - one from isolate_lru_page; * - one from lru_isolate_folio;
* If those are the only references, then any new usage of the * If those are the only references, then any new usage
* page will have to fetch it from the page cache. That requires * of the folio will have to fetch it from the page
* locking the page to handle truncate, so any new usage will be * cache. That requires locking the folio to handle
* blocked until we unlock page after collapse/during rollback. * truncate, so any new usage will be blocked until we
* unlock folio after collapse/during rollback.
*/ */
if (page_count(page) != 3) { if (folio_ref_count(folio) != 3) {
result = SCAN_PAGE_COUNT; result = SCAN_PAGE_COUNT;
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
putback_lru_page(page); folio_putback_lru(folio);
goto out_unlock; goto out_unlock;
} }
/* /*
* Accumulate the pages that are being collapsed. * Accumulate the folios that are being collapsed.
*/ */
list_add_tail(&page->lru, &pagelist); list_add_tail(&folio->lru, &pagelist);
continue; continue;
out_unlock: out_unlock:
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
goto xa_unlocked; goto xa_unlocked;
} }
...@@ -2030,17 +2025,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -2030,17 +2025,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
} }
/* /*
* The old pages are locked, so they won't change anymore. * The old folios are locked, so they won't change anymore.
*/ */
index = start; index = start;
dst = folio_page(new_folio, 0); dst = folio_page(new_folio, 0);
list_for_each_entry(page, &pagelist, lru) { list_for_each_entry(folio, &pagelist, lru) {
while (index < page->index) { while (index < folio->index) {
clear_highpage(dst); clear_highpage(dst);
index++; index++;
dst++; dst++;
} }
if (copy_mc_highpage(dst, page) > 0) { if (copy_mc_highpage(dst, folio_page(folio, 0)) > 0) {
result = SCAN_COPY_MC; result = SCAN_COPY_MC;
goto rollback; goto rollback;
} }
...@@ -2152,15 +2147,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -2152,15 +2147,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
folio_unlock(new_folio); folio_unlock(new_folio);
/* /*
* The collapse has succeeded, so free the old pages. * The collapse has succeeded, so free the old folios.
*/ */
list_for_each_entry_safe(page, tmp, &pagelist, lru) { list_for_each_entry_safe(folio, tmp, &pagelist, lru) {
list_del(&page->lru); list_del(&folio->lru);
page->mapping = NULL; folio->mapping = NULL;
ClearPageActive(page); folio_clear_active(folio);
ClearPageUnevictable(page); folio_clear_unevictable(folio);
unlock_page(page); folio_unlock(folio);
folio_put_refs(page_folio(page), 3); folio_put_refs(folio, 3);
} }
goto out; goto out;
...@@ -2174,11 +2169,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, ...@@ -2174,11 +2169,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
shmem_uncharge(mapping->host, nr_none); shmem_uncharge(mapping->host, nr_none);
} }
list_for_each_entry_safe(page, tmp, &pagelist, lru) { list_for_each_entry_safe(folio, tmp, &pagelist, lru) {
list_del(&page->lru); list_del(&folio->lru);
unlock_page(page); folio_unlock(folio);
putback_lru_page(page); folio_putback_lru(folio);
put_page(page); folio_put(folio);
} }
/* /*
* Undo the updates of filemap_nr_thps_inc for non-SHMEM * Undo the updates of filemap_nr_thps_inc for non-SHMEM
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment