Commit 809bc865 authored by Baolin Wang's avatar Baolin Wang Committed by Andrew Morton

mm: shmem: support large folio swap out

Shmem will support large folio allocation [1] [2] to get a better
performance, however, the memory reclaim still splits the precious large
folios when trying to swap out shmem, which may lead to the memory
fragmentation issue and can not take advantage of the large folio for
shmeme.

Moreover, the swap code already supports for swapping out large folio
without split, hence this patch set supports the large folio swap out for
shmem.

Note the i915_gem_shmem driver still need to be split when swapping, thus
add a new flag 'split_large_folio' for writeback_control to indicate
spliting the large folio.

[1] https://lore.kernel.org/all/cover.1717495894.git.baolin.wang@linux.alibaba.com/
[2] https://lore.kernel.org/all/20240515055719.32577-1-da.gomez@samsung.com/

[hughd@google.com: shmem_writepage() split folio at EOF before swapout]
  Link: https://lkml.kernel.org/r/aef55f8d-6040-692d-65e3-16150cce4440@google.com
[baolin.wang@linux.alibaba.com: remove the wbc->split_large_folio per Hugh]
  Link: https://lkml.kernel.org/r/1236a002daa301b3b9ba73d6c0fab348427cf295.1724833399.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/d80c21abd20e1b0f5ca66b330f074060fb2f082d.1723434324.git.baolin.wang@linux.alibaba.comSigned-off-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 12885cbe
......@@ -79,6 +79,9 @@ struct writeback_control {
*/
struct swap_iocb **swap_plug;
/* Target list for splitting a large folio */
struct list_head *list;
/* internal fields used by the ->writepages implementation: */
struct folio_batch fbatch;
pgoff_t index;
......
......@@ -795,7 +795,6 @@ static int shmem_add_to_page_cache(struct folio *folio,
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
VM_BUG_ON(expected && folio_test_large(folio));
folio_ref_add(folio, nr);
folio->mapping = mapping;
......@@ -1460,6 +1459,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
swp_entry_t swap;
pgoff_t index;
int nr_pages;
bool split = false;
/*
* Our capabilities prevent regular writeback or sync from ever calling
......@@ -1478,14 +1478,26 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
goto redirty;
/*
* If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
* "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
* and its shmem_writeback() needs them to be split when swapping.
* If CONFIG_THP_SWAP is not enabled, the large folio should be
* split when swapping.
*
* And shrinkage of pages beyond i_size does not split swap, so
* swapout of a large folio crossing i_size needs to split too
* (unless fallocate has been used to preallocate beyond EOF).
*/
if (folio_test_large(folio)) {
index = shmem_fallocend(inode,
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
if ((index > folio->index && index < folio_next_index(folio)) ||
!IS_ENABLED(CONFIG_THP_SWAP))
split = true;
}
if (split) {
try_split:
/* Ensure the subpages are still dirty */
folio_test_set_dirty(folio);
if (split_huge_page(page) < 0)
if (split_huge_page_to_list_to_order(page, wbc->list, 0))
goto redirty;
folio = page_folio(page);
folio_clear_dirty(folio);
......@@ -1527,8 +1539,12 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
}
swap = folio_alloc_swap(folio);
if (!swap.val)
if (!swap.val) {
if (nr_pages > 1)
goto try_split;
goto redirty;
}
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
......
......@@ -628,7 +628,7 @@ typedef enum {
* Calls ->writepage().
*/
static pageout_t pageout(struct folio *folio, struct address_space *mapping,
struct swap_iocb **plug)
struct swap_iocb **plug, struct list_head *folio_list)
{
/*
* If the folio is dirty, only perform writeback if that write
......@@ -676,6 +676,14 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping,
.swap_plug = plug,
};
/*
* The large shmem folio can be split if CONFIG_THP_SWAP is
* not enabled or contiguous swap entries are failed to
* allocate.
*/
if (shmem_mapping(mapping) && folio_test_large(folio))
wbc.list = folio_list;
folio_set_reclaim(folio);
res = mapping->a_ops->writepage(&folio->page, &wbc);
if (res < 0)
......@@ -1257,11 +1265,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
goto activate_locked_split;
}
}
} else if (folio_test_swapbacked(folio) &&
folio_test_large(folio)) {
/* Split shmem folio */
if (split_folio_to_list(folio, folio_list))
goto keep_locked;
}
/*
......@@ -1362,12 +1365,25 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
* starts and then write it out here.
*/
try_to_unmap_flush_dirty();
switch (pageout(folio, mapping, &plug)) {
switch (pageout(folio, mapping, &plug, folio_list)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
/*
* If shmem folio is split when writeback to swap,
* the tail pages will make their own pass through
* this function and be accounted then.
*/
if (nr_pages > 1 && !folio_test_large(folio)) {
sc->nr_scanned -= (nr_pages - 1);
nr_pages = 1;
}
goto activate_locked;
case PAGE_SUCCESS:
if (nr_pages > 1 && !folio_test_large(folio)) {
sc->nr_scanned -= (nr_pages - 1);
nr_pages = 1;
}
stat->nr_pageout += nr_pages;
if (folio_test_writeback(folio))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment