Commit 15520a3f authored by Peter Xu's avatar Peter Xu Committed by Andrew Morton

mm: use pte markers for swap errors

PTE markers are ideal mechanism for things like SWP_SWAPIN_ERROR.  Using a
whole swap entry type for this purpose can be an overkill, especially if
we already have PTE markers.  Define a new bit for swapin error and
replace it with pte markers.  Then we can safely drop SWP_SWAPIN_ERROR and
give one device slot back to swap.

We used to have SWP_SWAPIN_ERROR taking the page pfn as part of the swap
entry, but it's never used.  Neither do I see how it can be useful because
normally the swapin failure should not be caused by a bad page but bad
swap device.  Drop it alongside.

Link: https://lkml.kernel.org/r/20221030214151.402274-3-peterx@redhat.comSigned-off-by: default avatarPeter Xu <peterx@redhat.com>
Reviewed-by: default avatarHuang Ying <ying.huang@intel.com>
Reviewed-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent ca92ea3d
...@@ -55,10 +55,6 @@ static inline int current_is_kswapd(void) ...@@ -55,10 +55,6 @@ static inline int current_is_kswapd(void)
* actions on faults. * actions on faults.
*/ */
#define SWP_SWAPIN_ERROR_NUM 1
#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
SWP_PTE_MARKER_NUM)
/* /*
* PTE markers are used to persist information onto PTEs that otherwise * PTE markers are used to persist information onto PTEs that otherwise
* should be a none pte. As its name "PTE" hints, it should only be * should be a none pte. As its name "PTE" hints, it should only be
...@@ -121,7 +117,7 @@ static inline int current_is_kswapd(void) ...@@ -121,7 +117,7 @@ static inline int current_is_kswapd(void)
#define MAX_SWAPFILES \ #define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM) SWP_PTE_MARKER_NUM)
/* /*
* Magic header for a swap area. The first part of the union is * Magic header for a swap area. The first part of the union is
......
...@@ -162,16 +162,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) ...@@ -162,16 +162,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
return xa_mk_value(entry.val); return xa_mk_value(entry.val);
} }
static inline swp_entry_t make_swapin_error_entry(struct page *page)
{
return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
}
static inline int is_swapin_error_entry(swp_entry_t entry)
{
return swp_type(entry) == SWP_SWAPIN_ERROR;
}
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{ {
...@@ -409,8 +399,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) ...@@ -409,8 +399,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
typedef unsigned long pte_marker; typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0) #define PTE_MARKER_UFFD_WP BIT(0)
#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP) #define PTE_MARKER_SWAPIN_ERROR BIT(1)
#define PTE_MARKER_MASK (BIT(2) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker) static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{ {
...@@ -437,6 +428,17 @@ static inline pte_t make_pte_marker(pte_marker marker) ...@@ -437,6 +428,17 @@ static inline pte_t make_pte_marker(pte_marker marker)
return swp_entry_to_pte(make_pte_marker_entry(marker)); return swp_entry_to_pte(make_pte_marker_entry(marker));
} }
static inline swp_entry_t make_swapin_error_entry(void)
{
return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
}
static inline int is_swapin_error_entry(swp_entry_t entry)
{
return is_pte_marker_entry(entry) &&
(pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
}
/* /*
* This is a special version to check pte_none() just to cover the case when * This is a special version to check pte_none() just to cover the case when
* the pte is a pte marker. It existed because in many cases the pte marker * the pte is a pte marker. It existed because in many cases the pte marker
......
...@@ -3668,6 +3668,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) ...@@ -3668,6 +3668,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
if (WARN_ON_ONCE(!marker)) if (WARN_ON_ONCE(!marker))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
/* Higher priority than uffd-wp when data corrupted */
if (marker & PTE_MARKER_SWAPIN_ERROR)
return VM_FAULT_SIGBUS;
if (pte_marker_entry_uffd_wp(entry)) if (pte_marker_entry_uffd_wp(entry))
return pte_marker_handle_uffd_wp(vmf); return pte_marker_handle_uffd_wp(vmf);
...@@ -3727,8 +3731,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ...@@ -3727,8 +3731,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
put_page(vmf->page); put_page(vmf->page);
} else if (is_hwpoison_entry(entry)) { } else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON; ret = VM_FAULT_HWPOISON;
} else if (is_swapin_error_entry(entry)) {
ret = VM_FAULT_SIGBUS;
} else if (is_pte_marker_entry(entry)) { } else if (is_pte_marker_entry(entry)) {
ret = handle_pte_marker(vmf); ret = handle_pte_marker(vmf);
} else { } else {
......
...@@ -1682,7 +1682,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, ...@@ -1682,7 +1682,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
swp_entry_t swapin_error; swp_entry_t swapin_error;
void *old; void *old;
swapin_error = make_swapin_error_entry(&folio->page); swapin_error = make_swapin_error_entry();
old = xa_cmpxchg_irq(&mapping->i_pages, index, old = xa_cmpxchg_irq(&mapping->i_pages, index,
swp_to_radix_entry(swap), swp_to_radix_entry(swap),
swp_to_radix_entry(swapin_error), 0); swp_to_radix_entry(swapin_error), 0);
......
...@@ -1781,7 +1781,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1781,7 +1781,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
pte_t pteval; pte_t pteval;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS); dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
pteval = swp_entry_to_pte(make_swapin_error_entry(page)); pteval = swp_entry_to_pte(make_swapin_error_entry());
set_pte_at(vma->vm_mm, addr, pte, pteval); set_pte_at(vma->vm_mm, addr, pte, pteval);
swap_free(entry); swap_free(entry);
ret = 0; ret = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment