Commit 2f4b829c authored by Catalin Marinas's avatar Catalin Marinas Committed by Will Deacon

arm64: Add support for hardware updates of the access and dirty pte bits

The ARMv8.1 architecture extensions introduce support for hardware
updates of the access and dirty information in page table entries. With
TCR_EL1.HA enabled, when the CPU accesses an address with the PTE_AF bit
cleared in the page table, instead of raising an access flag fault the
CPU sets the actual page table entry bit. To ensure that kernel
modifications to the page tables do not inadvertently revert a change
introduced by hardware updates, the exclusive monitor (ldxr/stxr) is
adopted in the pte accessors.

When TCR_EL1.HD is enabled, a write access to a memory location with the
DBM (Dirty Bit Management) bit set in the corresponding pte
automatically clears the read-only bit (AP[2]). Such DBM bit maps onto
the Linux PTE_WRITE bit and to check whether a writable (DBM set) page
is dirty, the kernel tests the PTE_RDONLY bit. In order to allow
read-only and dirty pages, the kernel needs to preserve the software
dirty bit. The hardware dirty status is transferred to the software
dirty bit in ptep_set_wrprotect() (using load/store exclusive loop) and
pte_modify().
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent b08d4640
...@@ -469,6 +469,23 @@ config ARM64_VA_BITS ...@@ -469,6 +469,23 @@ config ARM64_VA_BITS
default 42 if ARM64_VA_BITS_42 default 42 if ARM64_VA_BITS_42
default 48 if ARM64_VA_BITS_48 default 48 if ARM64_VA_BITS_48
config ARM64_HW_AFDBM
bool "Support for hardware updates of the Access and Dirty page flags"
default y
help
The ARMv8.1 architecture extensions introduce support for
hardware updates of the access and dirty information in page
table entries. When enabled in TCR_EL1 (HA and HD bits) on
capable processors, accesses to pages with PTE_AF cleared will
set this bit instead of raising an access flag fault.
Similarly, writes to read-only pages with the DBM bit set will
clear the read-only bit (AP[2]) instead of raising a
permission fault.
Kernels built with this configuration option enabled continue
to work on pre-ARMv8.1 hardware and the performance impact is
minimal. If unsure, say Y.
config CPU_BIG_ENDIAN config CPU_BIG_ENDIAN
bool "Build big-endian kernel" bool "Build big-endian kernel"
help help
......
...@@ -104,6 +104,7 @@ ...@@ -104,6 +104,7 @@
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
...@@ -168,5 +169,7 @@ ...@@ -168,5 +169,7 @@
#define TCR_TG1_64K (UL(3) << 30) #define TCR_TG1_64K (UL(3) << 30)
#define TCR_ASID16 (UL(1) << 36) #define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37) #define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
#endif #endif
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#ifndef __ASM_PGTABLE_H #ifndef __ASM_PGTABLE_H
#define __ASM_PGTABLE_H #define __ASM_PGTABLE_H
#include <asm/bug.h>
#include <asm/proc-fns.h> #include <asm/proc-fns.h>
#include <asm/memory.h> #include <asm/memory.h>
...@@ -27,7 +28,11 @@ ...@@ -27,7 +28,11 @@
#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#ifdef CONFIG_ARM64_HW_AFDBM
#define PTE_WRITE (PTE_DBM) /* same as DBM */
#else
#define PTE_WRITE (_AT(pteval_t, 1) << 57) #define PTE_WRITE (_AT(pteval_t, 1) << 57)
#endif
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
/* /*
...@@ -48,6 +53,9 @@ ...@@ -48,6 +53,9 @@
#define FIRST_USER_ADDRESS 0UL #define FIRST_USER_ADDRESS 0UL
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/mmdebug.h>
extern void __pte_error(const char *file, int line, unsigned long val); extern void __pte_error(const char *file, int line, unsigned long val);
extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val);
extern void __pud_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val);
...@@ -137,12 +145,20 @@ extern struct page *empty_zero_page; ...@@ -137,12 +145,20 @@ extern struct page *empty_zero_page;
* The following only work if pte_present(). Undefined behaviour otherwise. * The following only work if pte_present(). Undefined behaviour otherwise.
*/ */
#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) #define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY))
#else
#define pte_hw_dirty(pte) (0)
#endif
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) && PTE_VALID))
#define pte_valid_user(pte) \ #define pte_valid_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
#define pte_valid_not_user(pte) \ #define pte_valid_not_user(pte) \
...@@ -209,20 +225,49 @@ static inline void set_pte(pte_t *ptep, pte_t pte) ...@@ -209,20 +225,49 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
} }
} }
struct mm_struct;
struct vm_area_struct;
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
* (PTE_WRITE == PTE_DBM):
*
* Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw)
* 0 0 | 1 0 0
* 0 1 | 1 1 0
* 1 0 | 1 0 1
* 1 1 | 0 1 x
*
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes:
*
* PTE_DIRTY || !PTE_RDONLY
*/
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte) pte_t *ptep, pte_t pte)
{ {
if (pte_valid_user(pte)) { if (pte_valid_user(pte)) {
if (!pte_special(pte) && pte_exec(pte)) if (!pte_special(pte) && pte_exec(pte))
__sync_icache_dcache(pte, addr); __sync_icache_dcache(pte, addr);
if (pte_dirty(pte) && pte_write(pte)) if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY; pte_val(pte) &= ~PTE_RDONLY;
else else
pte_val(pte) |= PTE_RDONLY; pte_val(pte) |= PTE_RDONLY;
} }
/*
* If the existing pte is valid, check for potential race with
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
pte_valid(*ptep)) {
BUG_ON(!pte_young(pte));
BUG_ON(pte_write(*ptep) && !pte_dirty(pte));
}
set_pte(ptep, pte); set_pte(ptep, pte);
} }
...@@ -461,6 +506,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -461,6 +506,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ {
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
newprot |= PTE_DIRTY;
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
return pte; return pte;
} }
...@@ -470,6 +518,101 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) ...@@ -470,6 +518,101 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
} }
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Atomic pte/pmd modifications.
*/
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pte_t *ptep)
{
pteval_t pteval;
unsigned int tmp, res;
asm volatile("// ptep_test_and_clear_young\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
" and %0, %0, %4 // clear PTE_AF\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
return res;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
{
return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
pteval_t old_pteval;
unsigned int tmp;
asm volatile("// ptep_get_and_clear\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" stxr %w1, xzr, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
return __pte(old_pteval);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* ptep_set_wrprotect - mark read-only while trasferring potential hardware
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pteval_t pteval;
unsigned long tmp;
asm volatile("// ptep_set_wrprotect\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n"
" csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n"
" orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n"
" and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
: "cc");
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#endif
#endif /* CONFIG_ARM64_HW_AFDBM */
extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
......
...@@ -196,6 +196,19 @@ ENTRY(__cpu_setup) ...@@ -196,6 +196,19 @@ ENTRY(__cpu_setup)
*/ */
mrs x9, ID_AA64MMFR0_EL1 mrs x9, ID_AA64MMFR0_EL1
bfi x10, x9, #32, #3 bfi x10, x9, #32, #3
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Hardware update of the Access and Dirty bits.
*/
mrs x9, ID_AA64MMFR1_EL1
and x9, x9, #0xf
cbz x9, 2f
cmp x9, #2
b.lt 1f
orr x10, x10, #TCR_HD // hardware Dirty flag update
1: orr x10, x10, #TCR_HA // hardware Access flag update
2:
#endif /* CONFIG_ARM64_HW_AFDBM */
msr tcr_el1, x10 msr tcr_el1, x10
ret // return to head.S ret // return to head.S
ENDPROC(__cpu_setup) ENDPROC(__cpu_setup)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment