Commit 12885cbe authored by Baolin Wang's avatar Baolin Wang Committed by Andrew Morton

mm: shmem: split large entry if the swapin folio is not large

Now the swap device can only swap-in order 0 folio, even though a large
folio is swapped out.  This requires us to split the large entry
previously saved in the shmem pagecache to support the swap in of small
folios.

[hughd@google.com: fix warnings from kmalloc_fix_flags()]
  Link: https://lkml.kernel.org/r/e2a2ba5d-864c-50aa-7579-97cba1c7dd0c@google.com
[baolin.wang@linux.alibaba.com: drop the 'new_order' parameter]
  Link: https://lkml.kernel.org/r/39c71ccf-669b-4d9f-923c-f6b9c4ceb8df@linux.alibaba.com
Link: https://lkml.kernel.org/r/4a0f12f27c54a62eb4d9ca1265fed3a62531a63e.1723434324.git.baolin.wang@linux.alibaba.comSigned-off-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 872339c3
...@@ -1999,6 +1999,84 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, ...@@ -1999,6 +1999,84 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
swap_free_nr(swap, nr_pages); swap_free_nr(swap, nr_pages);
} }
static int shmem_split_large_entry(struct inode *inode, pgoff_t index,
swp_entry_t swap, gfp_t gfp)
{
struct address_space *mapping = inode->i_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, index, 0);
void *alloced_shadow = NULL;
int alloced_order = 0, i;
/* Convert user data gfp flags to xarray node gfp flags */
gfp &= GFP_RECLAIM_MASK;
for (;;) {
int order = -1, split_order = 0;
void *old = NULL;
xas_lock_irq(&xas);
old = xas_load(&xas);
if (!xa_is_value(old) || swp_to_radix_entry(swap) != old) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
order = xas_get_order(&xas);
/* Swap entry may have changed before we re-acquire the lock */
if (alloced_order &&
(old != alloced_shadow || order != alloced_order)) {
xas_destroy(&xas);
alloced_order = 0;
}
/* Try to split large swap entry in pagecache */
if (order > 0) {
if (!alloced_order) {
split_order = order;
goto unlock;
}
xas_split(&xas, old, order);
/*
* Re-set the swap entry after splitting, and the swap
* offset of the original large entry must be continuous.
*/
for (i = 0; i < 1 << order; i++) {
pgoff_t aligned_index = round_down(index, 1 << order);
swp_entry_t tmp;
tmp = swp_entry(swp_type(swap), swp_offset(swap) + i);
__xa_store(&mapping->i_pages, aligned_index + i,
swp_to_radix_entry(tmp), 0);
}
}
unlock:
xas_unlock_irq(&xas);
/* split needed, alloc here and retry. */
if (split_order) {
xas_split_alloc(&xas, old, split_order, gfp);
if (xas_error(&xas))
goto error;
alloced_shadow = old;
alloced_order = split_order;
xas_reset(&xas);
continue;
}
if (!xas_nomem(&xas, gfp))
break;
}
error:
if (xas_error(&xas))
return xas_error(&xas);
return alloced_order;
}
/* /*
* Swap in the folio pointed to by *foliop. * Swap in the folio pointed to by *foliop.
* Caller has to make sure that *foliop contains a valid swapped folio. * Caller has to make sure that *foliop contains a valid swapped folio.
...@@ -2036,12 +2114,37 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, ...@@ -2036,12 +2114,37 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
/* Look it up and read it in.. */ /* Look it up and read it in.. */
folio = swap_cache_get_folio(swap, NULL, 0); folio = swap_cache_get_folio(swap, NULL, 0);
if (!folio) { if (!folio) {
int split_order;
/* Or update major stats only when swapin succeeds?? */ /* Or update major stats only when swapin succeeds?? */
if (fault_type) { if (fault_type) {
*fault_type |= VM_FAULT_MAJOR; *fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
count_memcg_event_mm(fault_mm, PGMAJFAULT); count_memcg_event_mm(fault_mm, PGMAJFAULT);
} }
/*
* Now swap device can only swap in order 0 folio, then we
* should split the large swap entry stored in the pagecache
* if necessary.
*/
split_order = shmem_split_large_entry(inode, index, swap, gfp);
if (split_order < 0) {
error = split_order;
goto failed;
}
/*
* If the large swap entry has already been split, it is
* necessary to recalculate the new swap entry based on
* the old order alignment.
*/
if (split_order > 0) {
pgoff_t offset = index - round_down(index, 1 << split_order);
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
/* Here we actually start the io */ /* Here we actually start the io */
folio = shmem_swapin_cluster(swap, gfp, info, index); folio = shmem_swapin_cluster(swap, gfp, info, index);
if (!folio) { if (!folio) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment