Commit bb3aadf7 authored by Rick Edgecombe's avatar Rick Edgecombe

x86/mm: Start actually marking _PAGE_SAVED_DIRTY

The recently introduced _PAGE_SAVED_DIRTY should be used instead of the
HW Dirty bit whenever a PTE is Write=0, in order to not inadvertently
create shadow stack PTEs. Update pte_mk*() helpers to do this, and apply
the same changes to pmd and pud. Since there is no x86 version of
pte_mkwrite() to hold this arch specific logic, create one. Add it to
x86/mm/pgtable.c instead of x86/asm/include/pgtable.h as future patches
will require it to live in pgtable.c and it will make the diff easier
for reviewers.

Since CPUs without shadow stack support could create Write=0,Dirty=1
PTEs, only return true for pte_shstk() if the CPU also supports shadow
stack. This will prevent these HW creates PTEs as showing as true for
pte_write().

For pte_modify() this is a bit trickier. It takes a "raw" pgprot_t which
was not necessarily created with any of the existing PTE bit helpers.
That means that it can return a pte_t with Write=0,Dirty=1, a shadow
stack PTE, when it did not intend to create one.

Modify it to also move _PAGE_DIRTY to _PAGE_SAVED_DIRTY. To avoid
creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
1. Marking Write=0 PTEs Dirty=1
2. Marking Dirty=1 PTEs Write=0

The first case cannot happen as the existing behavior of pte_modify() is to
filter out any Dirty bit passed in newprot. Handle the second case by
shifting _PAGE_DIRTY=1 to _PAGE_SAVED_DIRTY=1 if the PTE was write
protected by the pte_modify() call. Apply the same changes to pmd_modify().
Co-developed-by: default avatarYu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: default avatarYu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: default avatarRick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Acked-by: default avatarMike Rapoport (IBM) <rppt@kernel.org>
Tested-by: default avatarPengfei Xu <pengfei.xu@intel.com>
Tested-by: default avatarJohn Allen <john.allen@amd.com>
Tested-by: default avatarKees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/all/20230613001108.3040476-13-rick.p.edgecombe%40intel.com
parent 1f6f66f6
...@@ -125,9 +125,15 @@ extern pmdval_t early_pmd_flags; ...@@ -125,9 +125,15 @@ extern pmdval_t early_pmd_flags;
* The following only work if pte_present() is true. * The following only work if pte_present() is true.
* Undefined behaviour if not.. * Undefined behaviour if not..
*/ */
static inline int pte_dirty(pte_t pte) static inline bool pte_dirty(pte_t pte)
{ {
return pte_flags(pte) & _PAGE_DIRTY; return pte_flags(pte) & _PAGE_DIRTY_BITS;
}
static inline bool pte_shstk(pte_t pte)
{
return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
(pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
} }
static inline int pte_young(pte_t pte) static inline int pte_young(pte_t pte)
...@@ -135,9 +141,16 @@ static inline int pte_young(pte_t pte) ...@@ -135,9 +141,16 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED; return pte_flags(pte) & _PAGE_ACCESSED;
} }
static inline int pmd_dirty(pmd_t pmd) static inline bool pmd_dirty(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
}
static inline bool pmd_shstk(pmd_t pmd)
{ {
return pmd_flags(pmd) & _PAGE_DIRTY; return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
(pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
(_PAGE_DIRTY | _PAGE_PSE);
} }
#define pmd_young pmd_young #define pmd_young pmd_young
...@@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd) ...@@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED; return pmd_flags(pmd) & _PAGE_ACCESSED;
} }
static inline int pud_dirty(pud_t pud) static inline bool pud_dirty(pud_t pud)
{ {
return pud_flags(pud) & _PAGE_DIRTY; return pud_flags(pud) & _PAGE_DIRTY_BITS;
} }
static inline int pud_young(pud_t pud) static inline int pud_young(pud_t pud)
...@@ -158,13 +171,21 @@ static inline int pud_young(pud_t pud) ...@@ -158,13 +171,21 @@ static inline int pud_young(pud_t pud)
static inline int pte_write(pte_t pte) static inline int pte_write(pte_t pte)
{ {
return pte_flags(pte) & _PAGE_RW; /*
* Shadow stack pages are logically writable, but do not have
* _PAGE_RW. Check for them separately from _PAGE_RW itself.
*/
return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
} }
#define pmd_write pmd_write #define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
return pmd_flags(pmd) & _PAGE_RW; /*
* Shadow stack pages are logically writable, but do not have
* _PAGE_RW. Check for them separately from _PAGE_RW itself.
*/
return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
} }
#define pud_write pud_write #define pud_write pud_write
...@@ -351,7 +372,14 @@ static inline pte_t pte_clear_saveddirty(pte_t pte) ...@@ -351,7 +372,14 @@ static inline pte_t pte_clear_saveddirty(pte_t pte)
static inline pte_t pte_wrprotect(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte)
{ {
return pte_clear_flags(pte, _PAGE_RW); pte = pte_clear_flags(pte, _PAGE_RW);
/*
* Blindly clearing _PAGE_RW might accidentally create
* a shadow stack PTE (Write=0,Dirty=1). Move the hardware
* dirty value to the software bit, if present.
*/
return pte_mksaveddirty(pte);
} }
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
...@@ -389,7 +417,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte) ...@@ -389,7 +417,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte)
static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkclean(pte_t pte)
{ {
return pte_clear_flags(pte, _PAGE_DIRTY); return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
} }
static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_mkold(pte_t pte)
...@@ -404,7 +432,16 @@ static inline pte_t pte_mkexec(pte_t pte) ...@@ -404,7 +432,16 @@ static inline pte_t pte_mkexec(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte)
{ {
return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
return pte_mksaveddirty(pte);
}
static inline pte_t pte_mkwrite_shstk(pte_t pte)
{
pte = pte_clear_flags(pte, _PAGE_RW);
return pte_set_flags(pte, _PAGE_DIRTY);
} }
static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkyoung(pte_t pte)
...@@ -417,6 +454,10 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) ...@@ -417,6 +454,10 @@ static inline pte_t pte_mkwrite_novma(pte_t pte)
return pte_set_flags(pte, _PAGE_RW); return pte_set_flags(pte, _PAGE_RW);
} }
struct vm_area_struct;
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
#define pte_mkwrite pte_mkwrite
static inline pte_t pte_mkhuge(pte_t pte) static inline pte_t pte_mkhuge(pte_t pte)
{ {
return pte_set_flags(pte, _PAGE_PSE); return pte_set_flags(pte, _PAGE_PSE);
...@@ -481,7 +522,14 @@ static inline pmd_t pmd_clear_saveddirty(pmd_t pmd) ...@@ -481,7 +522,14 @@ static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline pmd_t pmd_wrprotect(pmd_t pmd)
{ {
return pmd_clear_flags(pmd, _PAGE_RW); pmd = pmd_clear_flags(pmd, _PAGE_RW);
/*
* Blindly clearing _PAGE_RW might accidentally create
* a shadow stack PMD (RW=0, Dirty=1). Move the hardware
* dirty value to the software bit.
*/
return pmd_mksaveddirty(pmd);
} }
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
...@@ -508,12 +556,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd) ...@@ -508,12 +556,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkclean(pmd_t pmd)
{ {
return pmd_clear_flags(pmd, _PAGE_DIRTY); return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
} }
static inline pmd_t pmd_mkdirty(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd)
{ {
return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
return pmd_mksaveddirty(pmd);
}
static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
{
pmd = pmd_clear_flags(pmd, _PAGE_RW);
return pmd_set_flags(pmd, _PAGE_DIRTY);
} }
static inline pmd_t pmd_mkdevmap(pmd_t pmd) static inline pmd_t pmd_mkdevmap(pmd_t pmd)
...@@ -536,6 +593,9 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) ...@@ -536,6 +593,9 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_RW); return pmd_set_flags(pmd, _PAGE_RW);
} }
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#define pmd_mkwrite pmd_mkwrite
static inline pud_t pud_set_flags(pud_t pud, pudval_t set) static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{ {
pudval_t v = native_pud_val(pud); pudval_t v = native_pud_val(pud);
...@@ -575,17 +635,26 @@ static inline pud_t pud_mkold(pud_t pud) ...@@ -575,17 +635,26 @@ static inline pud_t pud_mkold(pud_t pud)
static inline pud_t pud_mkclean(pud_t pud) static inline pud_t pud_mkclean(pud_t pud)
{ {
return pud_clear_flags(pud, _PAGE_DIRTY); return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
} }
static inline pud_t pud_wrprotect(pud_t pud) static inline pud_t pud_wrprotect(pud_t pud)
{ {
return pud_clear_flags(pud, _PAGE_RW); pud = pud_clear_flags(pud, _PAGE_RW);
/*
* Blindly clearing _PAGE_RW might accidentally create
* a shadow stack PUD (RW=0, Dirty=1). Move the hardware
* dirty value to the software bit.
*/
return pud_mksaveddirty(pud);
} }
static inline pud_t pud_mkdirty(pud_t pud) static inline pud_t pud_mkdirty(pud_t pud)
{ {
return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
return pud_mksaveddirty(pud);
} }
static inline pud_t pud_mkdevmap(pud_t pud) static inline pud_t pud_mkdevmap(pud_t pud)
...@@ -605,7 +674,9 @@ static inline pud_t pud_mkyoung(pud_t pud) ...@@ -605,7 +674,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
static inline pud_t pud_mkwrite(pud_t pud) static inline pud_t pud_mkwrite(pud_t pud)
{ {
return pud_set_flags(pud, _PAGE_RW); pud = pud_set_flags(pud, _PAGE_RW);
return pud_clear_saveddirty(pud);
} }
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
...@@ -722,6 +793,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); ...@@ -722,6 +793,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ {
pteval_t val = pte_val(pte), oldval = val; pteval_t val = pte_val(pte), oldval = val;
pte_t pte_result;
/* /*
* Chop off the NX bit (if present), and add the NX portion of * Chop off the NX bit (if present), and add the NX portion of
...@@ -730,17 +802,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -730,17 +802,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
val &= _PAGE_CHG_MASK; val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
return __pte(val);
pte_result = __pte(val);
/*
* To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
* 1. Marking Write=0 PTEs Dirty=1
* 2. Marking Dirty=1 PTEs Write=0
*
* The first case cannot happen because the _PAGE_CHG_MASK will filter
* out any Dirty bit passed in newprot. Handle the second case by
* going through the mksaveddirty exercise. Only do this if the old
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
*/
if (oldval & _PAGE_RW)
pte_result = pte_mksaveddirty(pte_result);
else
pte_result = pte_clear_saveddirty(pte_result);
return pte_result;
} }
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{ {
pmdval_t val = pmd_val(pmd), oldval = val; pmdval_t val = pmd_val(pmd), oldval = val;
pmd_t pmd_result;
val &= _HPAGE_CHG_MASK; val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
return __pmd(val);
pmd_result = __pmd(val);
/*
* To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
* 1. Marking Write=0 PMDs Dirty=1
* 2. Marking Dirty=1 PMDs Write=0
*
* The first case cannot happen because the _PAGE_CHG_MASK will filter
* out any Dirty bit passed in newprot. Handle the second case by
* going through the mksaveddirty exercise. Only do this if the old
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
*/
if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result);
else
pmd_result = pmd_clear_saveddirty(pmd_result);
return pmd_result;
} }
/* /*
......
...@@ -872,3 +872,17 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) ...@@ -872,3 +872,17 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
pte = pte_mkwrite_novma(pte);
return pte_clear_saveddirty(pte);
}
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
pmd = pmd_mkwrite_novma(pmd);
return pmd_clear_saveddirty(pmd);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment