Commit baeedc71 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'prep-for-5level'

Merge 5-level page table prep from Kirill Shutemov:
 "Here's relatively low-risk part of 5-level paging patchset. Merging it
  now will make x86 5-level paging enabling in v4.12 easier.

  The first patch is actually x86-specific: detect 5-level paging
  support. It boils down to single define.

  The rest of patchset converts Linux MMU abstraction from 4- to 5-level
  paging.

  Enabling of new abstraction in most cases requires adding single line
  of code in arch-specific code. The rest is taken care by asm-generic/.

  Changes to mm/ code are mostly mechanical: add support for new page
  table level -- p4d_t -- where we deal with pud_t now.

  v2:
   - fix build on microblaze (Michal);
   - comment for __ARCH_HAS_5LEVEL_HACK in kasan_populate_zero_shadow();
   - acks from Michal"

* emailed patches from Kirill A Shutemov <kirill.shutemov@linux.intel.com>:
  mm: introduce __p4d_alloc()
  mm: convert generic code to 5-level paging
  asm-generic: introduce <asm-generic/pgtable-nop4d.h>
  arch, mm: convert all architectures to use 5level-fixup.h
  asm-generic: introduce __ARCH_USE_5LEVEL_HACK
  asm-generic: introduce 5level-fixup.h
  x86/cpufeature: Add 5-level paging detection
parents 8fe3ccae 90eceff1
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#define _ASM_ARC_HUGEPAGE_H #define _ASM_ARC_HUGEPAGE_H
#include <linux/types.h> #include <linux/types.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
static inline pte_t pmd_pte(pmd_t pmd) static inline pte_t pmd_pte(pmd_t pmd)
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/mmu.h> #include <asm/mmu.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#include <linux/const.h> #include <linux/const.h>
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#else #else
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/pgtable-hwdef.h> #include <asm/pgtable-hwdef.h>
......
...@@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; ...@@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t;
#define __pgprot(x) ((pgprot_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } )
#if CONFIG_PGTABLE_LEVELS == 2 #if CONFIG_PGTABLE_LEVELS == 2
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#elif CONFIG_PGTABLE_LEVELS == 3 #elif CONFIG_PGTABLE_LEVELS == 3
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
#elif CONFIG_PGTABLE_LEVELS == 4
#include <asm-generic/5level-fixup.h>
#endif #endif
#endif /* __ASM_PGTABLE_TYPES_H */ #endif /* __ASM_PGTABLE_TYPES_H */
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
#define __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
/* /*
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define _CRIS_PGTABLE_H #define _CRIS_PGTABLE_H
#include <asm/page.h> #include <asm/page.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#ifndef _ASM_PGTABLE_H #ifndef _ASM_PGTABLE_H
#define _ASM_PGTABLE_H #define _ASM_PGTABLE_H
#include <asm-generic/5level-fixup.h>
#include <asm/mem-layout.h> #include <asm/mem-layout.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/processor.h> #include <asm/processor.h>
......
#ifndef _H8300_PGTABLE_H #ifndef _H8300_PGTABLE_H
#define _H8300_PGTABLE_H #define _H8300_PGTABLE_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
#include <asm-generic/pgtable.h> #include <asm-generic/pgtable.h>
#define pgtable_cache_init() do { } while (0) #define pgtable_cache_init() do { } while (0)
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
*/ */
#include <linux/swap.h> #include <linux/swap.h>
#include <asm/page.h> #include <asm/page.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
/* A handy thing to have if one has the RAM. Declared in head.S */ /* A handy thing to have if one has the RAM. Declared in head.S */
......
...@@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; ...@@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr;
#if CONFIG_PGTABLE_LEVELS == 3 #if CONFIG_PGTABLE_LEVELS == 3
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
#endif #endif
#include <asm-generic/5level-fixup.h>
#include <asm-generic/pgtable.h> #include <asm-generic/pgtable.h>
#endif /* _ASM_IA64_PGTABLE_H */ #endif /* _ASM_IA64_PGTABLE_H */
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define _METAG_PGTABLE_H #define _METAG_PGTABLE_H
#include <asm/pgtable-bits.h> #include <asm/pgtable-bits.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
......
...@@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; ...@@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t;
# else /* CONFIG_MMU */ # else /* CONFIG_MMU */
typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { unsigned long ste[64]; } pmd_t;
typedef struct { pmd_t pue[1]; } pud_t; typedef struct { pmd_t pue[1]; } pud_t;
typedef struct { pud_t pge[1]; } pgd_t; typedef struct { pud_t p4e[1]; } p4d_t;
typedef struct { p4d_t pge[1]; } pgd_t;
# endif /* CONFIG_MMU */ # endif /* CONFIG_MMU */
# define pte_val(x) ((x).pte) # define pte_val(x) ((x).pte)
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/cachectl.h> #include <asm/cachectl.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
extern int temp_tlb_entry; extern int temp_tlb_entry;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <asm/cachectl.h> #include <asm/cachectl.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#define __ARCH_USE_5LEVEL_HACK
#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#else #else
......
...@@ -57,6 +57,7 @@ typedef struct page *pgtable_t; ...@@ -57,6 +57,7 @@ typedef struct page *pgtable_t;
#define __pgd(x) ((pgd_t) { (x) }) #define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) })
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/pgtable-bits.h> #include <asm/pgtable-bits.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#define FIRST_USER_ADDRESS 0UL #define FIRST_USER_ADDRESS 0UL
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#ifndef __ASM_OPENRISC_PGTABLE_H #ifndef __ASM_OPENRISC_PGTABLE_H
#define __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
#ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
#define _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#include <asm/book3s/32/hash.h> #include <asm/book3s/32/hash.h>
......
#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
#include <asm-generic/5level-fixup.h>
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/mmdebug.h> #include <linux/mmdebug.h>
#endif #endif
/* /*
* Common bits between hash and Radix page table * Common bits between hash and Radix page table
*/ */
......
#ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
#define _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
#define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
#include <asm-generic/5level-fixup.h>
/* /*
* Entries per page directory level. The PTE level must use a 64b record * Entries per page directory level. The PTE level must use a 64b record
* for each page table entry. The PMD and PGD level use a 32b record for * for each page table entry. The PMD and PGD level use a 32b record for
......
#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
#define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
* the S390 page table tree. * the S390 page table tree.
*/ */
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm-generic/5level-fixup.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/mm_types.h> #include <linux/mm_types.h>
#include <linux/page-flags.h> #include <linux/page-flags.h>
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define _ASM_SCORE_PGTABLE_H #define _ASM_SCORE_PGTABLE_H
#include <linux/const.h> #include <linux/const.h>
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
......
#ifndef __ASM_SH_PGTABLE_2LEVEL_H #ifndef __ASM_SH_PGTABLE_2LEVEL_H
#define __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
/* /*
......
#ifndef __ASM_SH_PGTABLE_3LEVEL_H #ifndef __ASM_SH_PGTABLE_3LEVEL_H
#define __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
/* /*
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
* the SpitFire page tables. * the SpitFire page tables.
*/ */
#include <asm-generic/5level-fixup.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/const.h> #include <linux/const.h>
#include <asm/types.h> #include <asm/types.h>
......
...@@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; ...@@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
#define MAXMEM (_VMALLOC_START - PAGE_OFFSET) #define MAXMEM (_VMALLOC_START - PAGE_OFFSET)
/* We have no pmd or pud since we are strictly a two-level page table */ /* We have no pmd or pud since we are strictly a two-level page table */
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
static inline int pud_huge_page(pud_t pud) { return 0; } static inline int pud_huge_page(pud_t pud) { return 0; }
......
...@@ -59,6 +59,7 @@ ...@@ -59,6 +59,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/* We have no pud since we are a three-level page table. */ /* We have no pud since we are a three-level page table. */
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
/* /*
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#ifndef __UM_PGTABLE_2LEVEL_H #ifndef __UM_PGTABLE_2LEVEL_H
#define __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */ /* PGDIR_SHIFT determines what a third-level page table entry can map */
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#ifndef __UM_PGTABLE_3LEVEL_H #ifndef __UM_PGTABLE_3LEVEL_H
#define __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
/* PGDIR_SHIFT determines what a third-level page table entry can map */ /* PGDIR_SHIFT determines what a third-level page table entry can map */
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#ifndef __UNICORE_PGTABLE_H__ #ifndef __UNICORE_PGTABLE_H__
#define __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#include <asm/cpu-single.h> #include <asm/cpu-single.h>
......
...@@ -289,7 +289,8 @@ ...@@ -289,7 +289,8 @@
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
......
...@@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) ...@@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
} }
#if CONFIG_PGTABLE_LEVELS > 3 #if CONFIG_PGTABLE_LEVELS > 3
#include <asm-generic/5level-fixup.h>
typedef struct { pudval_t pud; } pud_t; typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val) static inline pud_t native_make_pud(pmdval_t val)
...@@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) ...@@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud)
return pud.pud; return pud.pud;
} }
#else #else
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable-nopud.h>
static inline pudval_t native_pud_val(pud_t pud) static inline pudval_t native_pud_val(pud_t pud)
...@@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) ...@@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
return pmd.pmd; return pmd.pmd;
} }
#else #else
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
static inline pmdval_t native_pmd_val(pmd_t pmd) static inline pmdval_t native_pmd_val(pmd_t pmd)
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#ifndef _XTENSA_PGTABLE_H #ifndef _XTENSA_PGTABLE_H
#define _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h> #include <asm-generic/pgtable-nopmd.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/kmem_layout.h> #include <asm/kmem_layout.h>
......
...@@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, ...@@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
int write, unsigned long *paddr, int *pageshift) int write, unsigned long *paddr, int *pageshift)
{ {
pgd_t *pgdp; pgd_t *pgdp;
pmd_t *pmdp; p4d_t *p4dp;
pud_t *pudp; pud_t *pudp;
pmd_t *pmdp;
pte_t pte; pte_t pte;
pgdp = pgd_offset(vma->vm_mm, vaddr); pgdp = pgd_offset(vma->vm_mm, vaddr);
if (unlikely(pgd_none(*pgdp))) if (unlikely(pgd_none(*pgdp)))
goto err; goto err;
pudp = pud_offset(pgdp, vaddr); p4dp = p4d_offset(pgdp, vaddr);
if (unlikely(p4d_none(*p4dp)))
goto err;
pudp = pud_offset(p4dp, vaddr);
if (unlikely(pud_none(*pudp))) if (unlikely(pud_none(*pudp)))
goto err; goto err;
......
...@@ -265,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, ...@@ -265,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
{ {
struct mm_struct *mm = ctx->mm; struct mm_struct *mm = ctx->mm;
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd, _pmd; pmd_t *pmd, _pmd;
pte_t *pte; pte_t *pte;
...@@ -275,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, ...@@ -275,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
goto out; goto out;
pud = pud_offset(pgd, address); p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
goto out;
pud = pud_offset(p4d, address);
if (!pud_present(*pud)) if (!pud_present(*pud))
goto out; goto out;
pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address);
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
NULL: pmd_offset(pud, address)) NULL: pmd_offset(pud, address))
#define pud_alloc(mm, pgd, address) (pgd)
#define pud_offset(pgd, start) (pgd) #define pud_offset(pgd, start) (pgd)
#define pud_none(pud) 0 #define pud_none(pud) 0
#define pud_bad(pud) 0 #define pud_bad(pud) 0
...@@ -35,4 +34,6 @@ ...@@ -35,4 +34,6 @@
#undef pud_addr_end #undef pud_addr_end
#define pud_addr_end(addr, end) (end) #define pud_addr_end(addr, end) (end)
#include <asm-generic/5level-fixup.h>
#endif #endif
#ifndef _5LEVEL_FIXUP_H
#define _5LEVEL_FIXUP_H
#define __ARCH_HAS_5LEVEL_HACK
#define __PAGETABLE_P4D_FOLDED
#define P4D_SHIFT PGDIR_SHIFT
#define P4D_SIZE PGDIR_SIZE
#define P4D_MASK PGDIR_MASK
#define PTRS_PER_P4D 1
#define p4d_t pgd_t
#define pud_alloc(mm, p4d, address) \
((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
NULL : pud_offset(p4d, address))
#define p4d_alloc(mm, pgd, address) (pgd)
#define p4d_offset(pgd, start) (pgd)
#define p4d_none(p4d) 0
#define p4d_bad(p4d) 0
#define p4d_present(p4d) 1
#define p4d_ERROR(p4d) do { } while (0)
#define p4d_clear(p4d) pgd_clear(p4d)
#define p4d_val(p4d) pgd_val(p4d)
#define p4d_populate(mm, p4d, pud) pgd_populate(mm, p4d, pud)
#define p4d_page(p4d) pgd_page(p4d)
#define p4d_page_vaddr(p4d) pgd_page_vaddr(p4d)
#define __p4d(x) __pgd(x)
#define set_p4d(p4dp, p4d) set_pgd(p4dp, p4d)
#undef p4d_free_tlb
#define p4d_free_tlb(tlb, x, addr) do { } while (0)
#define p4d_free(mm, x) do { } while (0)
#define __p4d_free_tlb(tlb, x, addr) do { } while (0)
#undef p4d_addr_end
#define p4d_addr_end(addr, end) (end)
#endif
#ifndef _PGTABLE_NOP4D_HACK_H
#define _PGTABLE_NOP4D_HACK_H
#ifndef __ASSEMBLY__
#include <asm-generic/5level-fixup.h>
#define __PAGETABLE_PUD_FOLDED
/*
* Having the pud type consist of a pgd gets the size right, and allows
* us to conceptually access the pgd entry that this pud is folded into
* without casting.
*/
typedef struct { pgd_t pgd; } pud_t;
#define PUD_SHIFT PGDIR_SHIFT
#define PTRS_PER_PUD 1
#define PUD_SIZE (1UL << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
/*
* The "pgd_xxx()" functions here are trivial for a folded two-level
* setup: the pud is never bad, and a pud always exists (as it's folded
* into the pgd entry)
*/
static inline int pgd_none(pgd_t pgd) { return 0; }
static inline int pgd_bad(pgd_t pgd) { return 0; }
static inline int pgd_present(pgd_t pgd) { return 1; }
static inline void pgd_clear(pgd_t *pgd) { }
#define pud_ERROR(pud) (pgd_ERROR((pud).pgd))
#define pgd_populate(mm, pgd, pud) do { } while (0)
/*
* (puds are folded into pgds so this doesn't get actually called,
* but the define is needed for a generic inline function.)
*/
#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
{
return (pud_t *)pgd;
}
#define pud_val(x) (pgd_val((x).pgd))
#define __pud(x) ((pud_t) { __pgd(x) })
#define pgd_page(pgd) (pud_page((pud_t){ pgd }))
#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd }))
/*
* allocating and freeing a pud is trivial: the 1-entry pud is
* inside the pgd, so has no extra memory associated with it.
*/
#define pud_alloc_one(mm, address) NULL
#define pud_free(mm, x) do { } while (0)
#define __pud_free_tlb(tlb, x, a) do { } while (0)
#undef pud_addr_end
#define pud_addr_end(addr, end) (end)
#endif /* __ASSEMBLY__ */
#endif /* _PGTABLE_NOP4D_HACK_H */
#ifndef _PGTABLE_NOP4D_H
#define _PGTABLE_NOP4D_H
#ifndef __ASSEMBLY__
#define __PAGETABLE_P4D_FOLDED
typedef struct { pgd_t pgd; } p4d_t;
#define P4D_SHIFT PGDIR_SHIFT
#define PTRS_PER_P4D 1
#define P4D_SIZE (1UL << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
/*
* The "pgd_xxx()" functions here are trivial for a folded two-level
* setup: the p4d is never bad, and a p4d always exists (as it's folded
* into the pgd entry)
*/
static inline int pgd_none(pgd_t pgd) { return 0; }
static inline int pgd_bad(pgd_t pgd) { return 0; }
static inline int pgd_present(pgd_t pgd) { return 1; }
static inline void pgd_clear(pgd_t *pgd) { }
#define p4d_ERROR(p4d) (pgd_ERROR((p4d).pgd))
#define pgd_populate(mm, pgd, p4d) do { } while (0)
/*
* (p4ds are folded into pgds so this doesn't get actually called,
* but the define is needed for a generic inline function.)
*/
#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
return (p4d_t *)pgd;
}
#define p4d_val(x) (pgd_val((x).pgd))
#define __p4d(x) ((p4d_t) { __pgd(x) })
#define pgd_page(pgd) (p4d_page((p4d_t){ pgd }))
#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd }))
/*
* allocating and freeing a p4d is trivial: the 1-entry p4d is
* inside the pgd, so has no extra memory associated with it.
*/
#define p4d_alloc_one(mm, address) NULL
#define p4d_free(mm, x) do { } while (0)
#define __p4d_free_tlb(tlb, x, a) do { } while (0)
#undef p4d_addr_end
#define p4d_addr_end(addr, end) (end)
#endif /* __ASSEMBLY__ */
#endif /* _PGTABLE_NOP4D_H */
...@@ -3,52 +3,57 @@ ...@@ -3,52 +3,57 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#ifdef __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nop4d-hack.h>
#else
#include <asm-generic/pgtable-nop4d.h>
#define __PAGETABLE_PUD_FOLDED #define __PAGETABLE_PUD_FOLDED
/* /*
* Having the pud type consist of a pgd gets the size right, and allows * Having the pud type consist of a p4d gets the size right, and allows
* us to conceptually access the pgd entry that this pud is folded into * us to conceptually access the p4d entry that this pud is folded into
* without casting. * without casting.
*/ */
typedef struct { pgd_t pgd; } pud_t; typedef struct { p4d_t p4d; } pud_t;
#define PUD_SHIFT PGDIR_SHIFT #define PUD_SHIFT P4D_SHIFT
#define PTRS_PER_PUD 1 #define PTRS_PER_PUD 1
#define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_SIZE (1UL << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1)) #define PUD_MASK (~(PUD_SIZE-1))
/* /*
* The "pgd_xxx()" functions here are trivial for a folded two-level * The "p4d_xxx()" functions here are trivial for a folded two-level
* setup: the pud is never bad, and a pud always exists (as it's folded * setup: the pud is never bad, and a pud always exists (as it's folded
* into the pgd entry) * into the p4d entry)
*/ */
static inline int pgd_none(pgd_t pgd) { return 0; } static inline int p4d_none(p4d_t p4d) { return 0; }
static inline int pgd_bad(pgd_t pgd) { return 0; } static inline int p4d_bad(p4d_t p4d) { return 0; }
static inline int pgd_present(pgd_t pgd) { return 1; } static inline int p4d_present(p4d_t p4d) { return 1; }
static inline void pgd_clear(pgd_t *pgd) { } static inline void p4d_clear(p4d_t *p4d) { }
#define pud_ERROR(pud) (pgd_ERROR((pud).pgd)) #define pud_ERROR(pud) (p4d_ERROR((pud).p4d))
#define pgd_populate(mm, pgd, pud) do { } while (0) #define p4d_populate(mm, p4d, pud) do { } while (0)
/* /*
* (puds are folded into pgds so this doesn't get actually called, * (puds are folded into p4ds so this doesn't get actually called,
* but the define is needed for a generic inline function.) * but the define is needed for a generic inline function.)
*/ */
#define set_pgd(pgdptr, pgdval) set_pud((pud_t *)(pgdptr), (pud_t) { pgdval }) #define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })
static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{ {
return (pud_t *)pgd; return (pud_t *)p4d;
} }
#define pud_val(x) (pgd_val((x).pgd)) #define pud_val(x) (p4d_val((x).p4d))
#define __pud(x) ((pud_t) { __pgd(x) } ) #define __pud(x) ((pud_t) { __p4d(x) })
#define pgd_page(pgd) (pud_page((pud_t){ pgd })) #define p4d_page(p4d) (pud_page((pud_t){ p4d }))
#define pgd_page_vaddr(pgd) (pud_page_vaddr((pud_t){ pgd })) #define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d }))
/* /*
* allocating and freeing a pud is trivial: the 1-entry pud is * allocating and freeing a pud is trivial: the 1-entry pud is
* inside the pgd, so has no extra memory associated with it. * inside the p4d, so has no extra memory associated with it.
*/ */
#define pud_alloc_one(mm, address) NULL #define pud_alloc_one(mm, address) NULL
#define pud_free(mm, x) do { } while (0) #define pud_free(mm, x) do { } while (0)
...@@ -58,4 +63,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address) ...@@ -58,4 +63,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
#define pud_addr_end(addr, end) (end) #define pud_addr_end(addr, end) (end)
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* !__ARCH_USE_5LEVEL_HACK */
#endif /* _PGTABLE_NOPUD_H */ #endif /* _PGTABLE_NOPUD_H */
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/errno.h> #include <linux/errno.h>
#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
CONFIG_PGTABLE_LEVELS defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
#endif #endif
/* /*
...@@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) ...@@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
(__boundary - 1 < (end) - 1)? __boundary: (end); \ (__boundary - 1 < (end) - 1)? __boundary: (end); \
}) })
#ifndef p4d_addr_end
#define p4d_addr_end(addr, end) \
({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
#endif
#ifndef pud_addr_end #ifndef pud_addr_end
#define pud_addr_end(addr, end) \ #define pud_addr_end(addr, end) \
({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
...@@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) ...@@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
* Do the tests inline, but report and clear the bad entry in mm/memory.c. * Do the tests inline, but report and clear the bad entry in mm/memory.c.
*/ */
void pgd_clear_bad(pgd_t *); void pgd_clear_bad(pgd_t *);
void p4d_clear_bad(p4d_t *);
void pud_clear_bad(pud_t *); void pud_clear_bad(pud_t *);
void pmd_clear_bad(pmd_t *); void pmd_clear_bad(pmd_t *);
...@@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd) ...@@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd)
return 0; return 0;
} }
static inline int p4d_none_or_clear_bad(p4d_t *p4d)
{
if (p4d_none(*p4d))
return 1;
if (unlikely(p4d_bad(*p4d))) {
p4d_clear_bad(p4d);
return 1;
}
return 0;
}
static inline int pud_none_or_clear_bad(pud_t *pud) static inline int pud_none_or_clear_bad(pud_t *pud)
{ {
if (pud_none(*pud)) if (pud_none(*pud))
...@@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd) ...@@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd)
#endif /* CONFIG_MMU */ #endif /* CONFIG_MMU */
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
#ifndef __PAGETABLE_P4D_FOLDED
int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
int p4d_clear_huge(p4d_t *p4d);
#else
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
return 0;
}
static inline int p4d_clear_huge(p4d_t *p4d)
{
return 0;
}
#endif /* !__PAGETABLE_P4D_FOLDED */
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud); int pud_clear_huge(pud_t *pud);
int pmd_clear_huge(pmd_t *pmd); int pmd_clear_huge(pmd_t *pmd);
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
return 0;
}
static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{ {
return 0; return 0;
...@@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) ...@@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{ {
return 0; return 0;
} }
static inline int p4d_clear_huge(p4d_t *p4d)
{
return 0;
}
static inline int pud_clear_huge(pud_t *pud) static inline int pud_clear_huge(pud_t *pud)
{ {
return 0; return 0;
......
...@@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, ...@@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
__pte_free_tlb(tlb, ptep, address); \ __pte_free_tlb(tlb, ptep, address); \
} while (0) } while (0)
#define pmd_free_tlb(tlb, pmdp, address) \
do { \
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
__pmd_free_tlb(tlb, pmdp, address); \
} while (0)
#ifndef __ARCH_HAS_4LEVEL_HACK #ifndef __ARCH_HAS_4LEVEL_HACK
#define pud_free_tlb(tlb, pudp, address) \ #define pud_free_tlb(tlb, pudp, address) \
do { \ do { \
...@@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, ...@@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
} while (0) } while (0)
#endif #endif
#define pmd_free_tlb(tlb, pmdp, address) \ #ifndef __ARCH_HAS_5LEVEL_HACK
#define p4d_free_tlb(tlb, pudp, address) \
do { \ do { \
__tlb_adjust_range(tlb, address, PAGE_SIZE); \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \
__pmd_free_tlb(tlb, pmdp, address); \ __p4d_free_tlb(tlb, pudp, address); \
} while (0) } while (0)
#endif
#define tlb_migrate_finish(mm) do {} while (0) #define tlb_migrate_finish(mm) do {} while (0)
......
...@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, ...@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int flags); pud_t *pud, int flags);
int pmd_huge(pmd_t pmd); int pmd_huge(pmd_t pmd);
int pud_huge(pud_t pmd); int pud_huge(pud_t pud);
unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot); unsigned long address, unsigned long end, pgprot_t newprot);
...@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, ...@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
#ifndef pgd_huge #ifndef pgd_huge
#define pgd_huge(x) 0 #define pgd_huge(x) 0
#endif #endif
#ifndef p4d_huge
#define p4d_huge(x) 0
#endif
#ifndef pgd_write #ifndef pgd_write
static inline int pgd_write(pgd_t pgd) static inline int pgd_write(pgd_t pgd)
......
...@@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; ...@@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE];
extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD];
extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
void kasan_populate_zero_shadow(const void *shadow_start, void kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end); const void *shadow_end);
......
...@@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, ...@@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
return ptep; return ptep;
} }
#ifdef __PAGETABLE_P4D_FOLDED
static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
{
return 0;
}
#else
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif
#ifdef __PAGETABLE_PUD_FOLDED #ifdef __PAGETABLE_PUD_FOLDED
static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
unsigned long address) unsigned long address)
{ {
return 0; return 0;
} }
#else #else
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
#endif #endif
#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
...@@ -1619,11 +1629,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); ...@@ -1619,11 +1629,22 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
* Remove it when 4level-fixup.h has been removed. * Remove it when 4level-fixup.h has been removed.
*/ */
#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
#ifndef __ARCH_HAS_5LEVEL_HACK
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
{
return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
NULL : p4d_offset(pgd, address);
}
static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
unsigned long address)
{ {
return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
NULL: pud_offset(pgd, address); NULL : pud_offset(p4d, address);
} }
#endif /* !__ARCH_HAS_5LEVEL_HACK */
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{ {
...@@ -2385,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, ...@@ -2385,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
struct page *sparse_mem_map_populate(unsigned long pnum, int nid); struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node); void *vmemmap_alloc_block(unsigned long size, int node);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
static int __read_mostly ioremap_p4d_capable;
static int __read_mostly ioremap_pud_capable; static int __read_mostly ioremap_pud_capable;
static int __read_mostly ioremap_pmd_capable; static int __read_mostly ioremap_pmd_capable;
static int __read_mostly ioremap_huge_disabled; static int __read_mostly ioremap_huge_disabled;
...@@ -35,6 +36,11 @@ void __init ioremap_huge_init(void) ...@@ -35,6 +36,11 @@ void __init ioremap_huge_init(void)
} }
} }
static inline int ioremap_p4d_enabled(void)
{
return ioremap_p4d_capable;
}
static inline int ioremap_pud_enabled(void) static inline int ioremap_pud_enabled(void)
{ {
return ioremap_pud_capable; return ioremap_pud_capable;
...@@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void) ...@@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void)
} }
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int ioremap_p4d_enabled(void) { return 0; }
static inline int ioremap_pud_enabled(void) { return 0; } static inline int ioremap_pud_enabled(void) { return 0; }
static inline int ioremap_pmd_enabled(void) { return 0; } static inline int ioremap_pmd_enabled(void) { return 0; }
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
...@@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, ...@@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
return 0; return 0;
} }
static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot) unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
phys_addr -= addr; phys_addr -= addr;
pud = pud_alloc(&init_mm, pgd, addr); pud = pud_alloc(&init_mm, p4d, addr);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
do { do {
...@@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, ...@@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
return 0; return 0;
} }
static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
p4d_t *p4d;
unsigned long next;
phys_addr -= addr;
p4d = p4d_alloc(&init_mm, pgd, addr);
if (!p4d)
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
if (ioremap_p4d_enabled() &&
((next - addr) == P4D_SIZE) &&
IS_ALIGNED(phys_addr + addr, P4D_SIZE)) {
if (p4d_set_huge(p4d, phys_addr + addr, prot))
continue;
}
if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot))
return -ENOMEM;
} while (p4d++, addr = next, addr != end);
return 0;
}
int ioremap_page_range(unsigned long addr, int ioremap_page_range(unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot) unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{ {
...@@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr, ...@@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr,
pgd = pgd_offset_k(addr); pgd = pgd_offset_k(addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot);
if (err) if (err)
break; break;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
......
...@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma, ...@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned int *page_mask) unsigned int *page_mask)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
spinlock_t *ptl; spinlock_t *ptl;
...@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma, ...@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
return no_page_table(vma, flags); return no_page_table(vma, flags);
p4d = p4d_offset(pgd, address);
pud = pud_offset(pgd, address); if (p4d_none(*p4d))
return no_page_table(vma, flags);
BUILD_BUG_ON(p4d_huge(*p4d));
if (unlikely(p4d_bad(*p4d)))
return no_page_table(vma, flags);
pud = pud_offset(p4d, address);
if (pud_none(*pud)) if (pud_none(*pud))
return no_page_table(vma, flags); return no_page_table(vma, flags);
if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
...@@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, ...@@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
struct page **page) struct page **page)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
...@@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, ...@@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
else else
pgd = pgd_offset_gate(mm, address); pgd = pgd_offset_gate(mm, address);
BUG_ON(pgd_none(*pgd)); BUG_ON(pgd_none(*pgd));
pud = pud_offset(pgd, address); p4d = p4d_offset(pgd, address);
BUG_ON(p4d_none(*p4d));
pud = pud_offset(p4d, address);
BUG_ON(pud_none(*pud)); BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address);
if (pmd_none(*pmd)) if (pmd_none(*pmd))
...@@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, ...@@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
return 1; return 1;
} }
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr) int write, struct page **pages, int *nr)
{ {
unsigned long next; unsigned long next;
pud_t *pudp; pud_t *pudp;
pudp = pud_offset(&pgd, addr); pudp = pud_offset(&p4d, addr);
do { do {
pud_t pud = READ_ONCE(*pudp); pud_t pud = READ_ONCE(*pudp);
...@@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, ...@@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1; return 1;
} }
static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
p4d_t *p4dp;
p4dp = p4d_offset(&pgd, addr);
do {
p4d_t p4d = READ_ONCE(*p4dp);
next = p4d_addr_end(addr, end);
if (p4d_none(p4d))
return 0;
BUILD_BUG_ON(p4d_huge(p4d));
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
P4D_SHIFT, next, write, pages, nr))
return 0;
} else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
return 1;
}
/* /*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values. * the regular GUP. It will only return non-negative values.
...@@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, ...@@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
PGDIR_SHIFT, next, write, pages, &nr)) PGDIR_SHIFT, next, write, pages, &nr))
break; break;
} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
break; break;
} while (pgdp++, addr = next, addr != end); } while (pgdp++, addr = next, addr != end);
local_irq_restore(flags); local_irq_restore(flags);
......
...@@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, ...@@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
bool freeze, struct page *page) bool freeze, struct page *page)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
...@@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, ...@@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
return; return;
pud = pud_offset(pgd, address); p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
return;
pud = pud_offset(p4d, address);
if (!pud_present(*pud)) if (!pud_present(*pud))
return; return;
......
...@@ -4555,7 +4555,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ...@@ -4555,7 +4555,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{ {
pgd_t *pgd = pgd_offset(mm, *addr); pgd_t *pgd = pgd_offset(mm, *addr);
pud_t *pud = pud_offset(pgd, *addr); p4d_t *p4d = p4d_offset(pgd, *addr);
pud_t *pud = pud_offset(p4d, *addr);
BUG_ON(page_count(virt_to_page(ptep)) == 0); BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1) if (page_count(virt_to_page(ptep)) == 1)
...@@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ...@@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz) unsigned long addr, unsigned long sz)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pte_t *pte = NULL; pte_t *pte = NULL;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr); p4d = p4d_offset(pgd, addr);
pud = pud_alloc(mm, p4d, addr);
if (pud) { if (pud) {
if (sz == PUD_SIZE) { if (sz == PUD_SIZE) {
pte = (pte_t *)pud; pte = (pte_t *)pud;
...@@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ...@@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd = NULL; pmd_t *pmd;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
if (pgd_present(*pgd)) { if (!pgd_present(*pgd))
pud = pud_offset(pgd, addr); return NULL;
if (pud_present(*pud)) { p4d = p4d_offset(pgd, addr);
if (pud_huge(*pud)) if (!p4d_present(*p4d))
return (pte_t *)pud; return NULL;
pmd = pmd_offset(pud, addr); pud = pud_offset(p4d, addr);
} if (!pud_present(*pud))
} return NULL;
if (pud_huge(*pud))
return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
return (pte_t *) pmd; return (pte_t *) pmd;
} }
......
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
*/ */
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
#if CONFIG_PGTABLE_LEVELS > 4
p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
#endif
#if CONFIG_PGTABLE_LEVELS > 3 #if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
#endif #endif
...@@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, ...@@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
} }
static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end) unsigned long end)
{ {
pud_t *pud = pud_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr);
unsigned long next; unsigned long next;
do { do {
...@@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, ...@@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
} }
static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
p4d_t *p4d = p4d_offset(pgd, addr);
unsigned long next;
do {
next = p4d_addr_end(addr, end);
if (p4d_none(*p4d)) {
p4d_populate(&init_mm, p4d,
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
}
zero_pud_populate(p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
/** /**
* kasan_populate_zero_shadow - populate shadow memory region with * kasan_populate_zero_shadow - populate shadow memory region with
* kasan_zero_page * kasan_zero_page
...@@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, ...@@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
...@@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, ...@@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
* 3,2 - level page tables where we don't have * 3,2 - level page tables where we don't have
* puds,pmds, so pgd_populate(), pud_populate() * puds,pmds, so pgd_populate(), pud_populate()
* is noops. * is noops.
*
* The ifndef is required to avoid build breakage.
*
* With 5level-fixup.h, pgd_populate() is not nop and
* we reference kasan_zero_p4d. It's not defined
* unless 5-level paging enabled.
*
* The ifndef can be dropped once all KASAN-enabled
* architectures will switch to pgtable-nop4d.h.
*/ */
pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); #ifndef __ARCH_HAS_5LEVEL_HACK
pud = pud_offset(pgd, addr); pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d));
#endif
p4d = p4d_offset(pgd, addr);
p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
...@@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, ...@@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
pgd_populate(&init_mm, pgd, pgd_populate(&init_mm, pgd,
early_alloc(PAGE_SIZE, NUMA_NO_NODE)); early_alloc(PAGE_SIZE, NUMA_NO_NODE));
} }
zero_pud_populate(pgd, addr, next); zero_p4d_populate(pgd, addr, next);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
} }
...@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, ...@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
mm_dec_nr_pmds(tlb->mm); mm_dec_nr_pmds(tlb->mm);
} }
static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling) unsigned long floor, unsigned long ceiling)
{ {
...@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, ...@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long start; unsigned long start;
start = addr; start = addr;
pud = pud_offset(pgd, addr); pud = pud_offset(p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
...@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, ...@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
free_pmd_range(tlb, pud, addr, next, floor, ceiling); free_pmd_range(tlb, pud, addr, next, floor, ceiling);
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
start &= P4D_MASK;
if (start < floor)
return;
if (ceiling) {
ceiling &= P4D_MASK;
if (!ceiling)
return;
}
if (end - 1 > ceiling - 1)
return;
pud = pud_offset(p4d, start);
p4d_clear(p4d);
pud_free_tlb(tlb, pud, start);
}
static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
p4d_t *p4d;
unsigned long next;
unsigned long start;
start = addr;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
free_pud_range(tlb, p4d, addr, next, floor, ceiling);
} while (p4d++, addr = next, addr != end);
start &= PGDIR_MASK; start &= PGDIR_MASK;
if (start < floor) if (start < floor)
return; return;
...@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, ...@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
if (end - 1 > ceiling - 1) if (end - 1 > ceiling - 1)
return; return;
pud = pud_offset(pgd, start); p4d = p4d_offset(pgd, start);
pgd_clear(pgd); pgd_clear(pgd);
pud_free_tlb(tlb, pud, start); p4d_free_tlb(tlb, p4d, start);
} }
/* /*
...@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb, ...@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
free_pud_range(tlb, pgd, addr, next, floor, ceiling); free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
} }
...@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, ...@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
pte_t pte, struct page *page) pte_t pte, struct page *page)
{ {
pgd_t *pgd = pgd_offset(vma->vm_mm, addr); pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
pud_t *pud = pud_offset(pgd, addr); p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr); pmd_t *pmd = pmd_offset(pud, addr);
struct address_space *mapping; struct address_space *mapping;
pgoff_t index; pgoff_t index;
...@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src ...@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
} }
static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
unsigned long addr, unsigned long end) unsigned long addr, unsigned long end)
{ {
pud_t *src_pud, *dst_pud; pud_t *src_pud, *dst_pud;
unsigned long next; unsigned long next;
dst_pud = pud_alloc(dst_mm, dst_pgd, addr); dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
if (!dst_pud) if (!dst_pud)
return -ENOMEM; return -ENOMEM;
src_pud = pud_offset(src_pgd, addr); src_pud = pud_offset(src_p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
...@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src ...@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
return 0; return 0;
} }
static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
p4d_t *src_p4d, *dst_p4d;
unsigned long next;
dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
if (!dst_p4d)
return -ENOMEM;
src_p4d = p4d_offset(src_pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(src_p4d))
continue;
if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
vma, addr, next))
return -ENOMEM;
} while (dst_p4d++, src_p4d++, addr = next, addr != end);
return 0;
}
int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
...@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(src_pgd)) if (pgd_none_or_clear_bad(src_pgd))
continue; continue;
if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
vma, addr, next))) { vma, addr, next))) {
ret = -ENOMEM; ret = -ENOMEM;
break; break;
...@@ -1267,14 +1323,14 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, ...@@ -1267,14 +1323,14 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
} }
static inline unsigned long zap_pud_range(struct mmu_gather *tlb, static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pgd_t *pgd, struct vm_area_struct *vma, p4d_t *p4d,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
struct zap_details *details) struct zap_details *details)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
pud = pud_offset(pgd, addr); pud = pud_offset(p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_trans_huge(*pud) || pud_devmap(*pud)) { if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
...@@ -1295,6 +1351,25 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, ...@@ -1295,6 +1351,25 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
return addr; return addr;
} }
static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
p4d_t *p4d;
unsigned long next;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
next = zap_pud_range(tlb, vma, p4d, addr, next, details);
} while (p4d++, addr = next, addr != end);
return addr;
}
void unmap_page_range(struct mmu_gather *tlb, void unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
...@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb, ...@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
next = zap_pud_range(tlb, vma, pgd, addr, next, details); next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma); tlb_end_vma(tlb, vma);
} }
...@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes); ...@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
spinlock_t **ptl) spinlock_t **ptl)
{ {
pgd_t *pgd = pgd_offset(mm, addr); pgd_t *pgd;
pud_t *pud = pud_alloc(mm, pgd, addr); p4d_t *p4d;
if (pud) { pud_t *pud;
pmd_t *pmd = pmd_alloc(mm, pud, addr); pmd_t *pmd;
if (pmd) {
VM_BUG_ON(pmd_trans_huge(*pmd)); pgd = pgd_offset(mm, addr);
return pte_alloc_map_lock(mm, pmd, addr, ptl); p4d = p4d_alloc(mm, pgd, addr);
} if (!p4d)
} return NULL;
return NULL; pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return NULL;
VM_BUG_ON(pmd_trans_huge(*pmd));
return pte_alloc_map_lock(mm, pmd, addr, ptl);
} }
/* /*
...@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, ...@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
return 0; return 0;
} }
static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
unsigned long pfn, pgprot_t prot) unsigned long pfn, pgprot_t prot)
{ {
...@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, ...@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long next; unsigned long next;
pfn -= addr >> PAGE_SHIFT; pfn -= addr >> PAGE_SHIFT;
pud = pud_alloc(mm, pgd, addr); pud = pud_alloc(mm, p4d, addr);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
do { do {
...@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, ...@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
return 0; return 0;
} }
static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
p4d_t *p4d;
unsigned long next;
pfn -= addr >> PAGE_SHIFT;
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
if (remap_pud_range(mm, p4d, addr, next,
pfn + (addr >> PAGE_SHIFT), prot))
return -ENOMEM;
} while (p4d++, addr = next, addr != end);
return 0;
}
/** /**
* remap_pfn_range - remap kernel memory to userspace * remap_pfn_range - remap kernel memory to userspace
* @vma: user vma to map to * @vma: user vma to map to
...@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, ...@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
flush_cache_range(vma, addr, end); flush_cache_range(vma, addr, end);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
err = remap_pud_range(mm, pgd, addr, next, err = remap_p4d_range(mm, pgd, addr, next,
pfn + (addr >> PAGE_SHIFT), prot); pfn + (addr >> PAGE_SHIFT), prot);
if (err) if (err)
break; break;
...@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, ...@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
return err; return err;
} }
static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
pte_fn_t fn, void *data) pte_fn_t fn, void *data)
{ {
...@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, ...@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long next; unsigned long next;
int err; int err;
pud = pud_alloc(mm, pgd, addr); pud = pud_alloc(mm, p4d, addr);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
do { do {
...@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, ...@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
return err; return err;
} }
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
pte_fn_t fn, void *data)
{
p4d_t *p4d;
unsigned long next;
int err;
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
if (err)
break;
} while (p4d++, addr = next, addr != end);
return err;
}
/* /*
* Scan a region of virtual memory, filling in page tables as necessary * Scan a region of virtual memory, filling in page tables as necessary
* and calling a provided function on each leaf page table. * and calling a provided function on each leaf page table.
...@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, ...@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
err = apply_to_pud_range(mm, pgd, addr, next, fn, data); err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
if (err) if (err)
break; break;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
...@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
}; };
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
int ret; int ret;
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
p4d = p4d_alloc(mm, pgd, address);
if (!p4d)
return VM_FAULT_OOM;
vmf.pud = pud_alloc(mm, pgd, address); vmf.pud = pud_alloc(mm, p4d, address);
if (!vmf.pud) if (!vmf.pud)
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
...@@ -3779,12 +3906,35 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -3779,12 +3906,35 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
} }
EXPORT_SYMBOL_GPL(handle_mm_fault); EXPORT_SYMBOL_GPL(handle_mm_fault);
#ifndef __PAGETABLE_P4D_FOLDED
/*
* Allocate p4d page table.
* We've already handled the fast-path in-line.
*/
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
p4d_t *new = p4d_alloc_one(mm, address);
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&mm->page_table_lock);
if (pgd_present(*pgd)) /* Another has populated it */
p4d_free(mm, new);
else
pgd_populate(mm, pgd, new);
spin_unlock(&mm->page_table_lock);
return 0;
}
#endif /* __PAGETABLE_P4D_FOLDED */
#ifndef __PAGETABLE_PUD_FOLDED #ifndef __PAGETABLE_PUD_FOLDED
/* /*
* Allocate page upper directory. * Allocate page upper directory.
* We've already handled the fast-path in-line. * We've already handled the fast-path in-line.
*/ */
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
{ {
pud_t *new = pud_alloc_one(mm, address); pud_t *new = pud_alloc_one(mm, address);
if (!new) if (!new)
...@@ -3793,10 +3943,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) ...@@ -3793,10 +3943,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
smp_wmb(); /* See comment in __pte_alloc */ smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (pgd_present(*pgd)) /* Another has populated it */ #ifndef __ARCH_HAS_5LEVEL_HACK
if (p4d_present(*p4d)) /* Another has populated it */
pud_free(mm, new); pud_free(mm, new);
else else
pgd_populate(mm, pgd, new); p4d_populate(mm, p4d, new);
#else
if (pgd_present(*p4d)) /* Another has populated it */
pud_free(mm, new);
else
pgd_populate(mm, p4d, new);
#endif /* __ARCH_HAS_5LEVEL_HACK */
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 0; return 0;
} }
...@@ -3839,6 +3996,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, ...@@ -3839,6 +3996,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *ptep; pte_t *ptep;
...@@ -3847,7 +4005,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, ...@@ -3847,7 +4005,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
goto out; goto out;
pud = pud_offset(pgd, address); p4d = p4d_offset(pgd, address);
if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
goto out;
pud = pud_offset(p4d, address);
if (pud_none(*pud) || unlikely(pud_bad(*pud))) if (pud_none(*pud) || unlikely(pud_bad(*pud)))
goto out; goto out;
......
...@@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, ...@@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
pte = get_locked_pte(vma->vm_mm, start, &ptl); pte = get_locked_pte(vma->vm_mm, start, &ptl);
/* Make sure we do not cross the page table boundary */ /* Make sure we do not cross the page table boundary */
end = pgd_addr_end(start, end); end = pgd_addr_end(start, end);
end = p4d_addr_end(start, end);
end = pud_addr_end(start, end); end = pud_addr_end(start, end);
end = pmd_addr_end(start, end); end = pmd_addr_end(start, end);
......
...@@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, ...@@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
} }
static inline unsigned long change_pud_range(struct vm_area_struct *vma, static inline unsigned long change_pud_range(struct vm_area_struct *vma,
pgd_t *pgd, unsigned long addr, unsigned long end, p4d_t *p4d, unsigned long addr, unsigned long end,
pgprot_t newprot, int dirty_accountable, int prot_numa) pgprot_t newprot, int dirty_accountable, int prot_numa)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
unsigned long pages = 0; unsigned long pages = 0;
pud = pud_offset(pgd, addr); pud = pud_offset(p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
...@@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, ...@@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
return pages; return pages;
} }
static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
pgd_t *pgd, unsigned long addr, unsigned long end,
pgprot_t newprot, int dirty_accountable, int prot_numa)
{
p4d_t *p4d;
unsigned long next;
unsigned long pages = 0;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
pages += change_pud_range(vma, p4d, addr, next, newprot,
dirty_accountable, prot_numa);
} while (p4d++, addr = next, addr != end);
return pages;
}
static unsigned long change_protection_range(struct vm_area_struct *vma, static unsigned long change_protection_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable, int prot_numa) int dirty_accountable, int prot_numa)
...@@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, ...@@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
pages += change_pud_range(vma, pgd, addr, next, newprot, pages += change_p4d_range(vma, pgd, addr, next, newprot,
dirty_accountable, prot_numa); dirty_accountable, prot_numa);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
...@@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) ...@@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
return NULL; return NULL;
pud = pud_offset(pgd, addr); p4d = p4d_offset(pgd, addr);
if (p4d_none_or_clear_bad(p4d))
return NULL;
pud = pud_offset(p4d, addr);
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
return NULL; return NULL;
...@@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr) unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr); p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr);
if (!pud) if (!pud)
return NULL; return NULL;
......
...@@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
struct mm_struct *mm = pvmw->vma->vm_mm; struct mm_struct *mm = pvmw->vma->vm_mm;
struct page *page = pvmw->page; struct page *page = pvmw->page;
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
/* The only possible pmd mapping has been handled on last iteration */ /* The only possible pmd mapping has been handled on last iteration */
...@@ -133,7 +134,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -133,7 +134,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pgd = pgd_offset(mm, pvmw->address); pgd = pgd_offset(mm, pvmw->address);
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
return false; return false;
pud = pud_offset(pgd, pvmw->address); p4d = p4d_offset(pgd, pvmw->address);
if (!p4d_present(*p4d))
return false;
pud = pud_offset(p4d, pvmw->address);
if (!pud_present(*pud)) if (!pud_present(*pud))
return false; return false;
pvmw->pmd = pmd_offset(pud, pvmw->address); pvmw->pmd = pmd_offset(pud, pvmw->address);
......
...@@ -69,14 +69,14 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ...@@ -69,14 +69,14 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
return err; return err;
} }
static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
struct mm_walk *walk) struct mm_walk *walk)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
int err = 0; int err = 0;
pud = pud_offset(pgd, addr); pud = pud_offset(p4d, addr);
do { do {
again: again:
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
...@@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, ...@@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
return err; return err;
} }
static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
p4d_t *p4d;
unsigned long next;
int err = 0;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d)) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
break;
continue;
}
if (walk->pmd_entry || walk->pte_entry)
err = walk_pud_range(p4d, addr, next, walk);
if (err)
break;
} while (p4d++, addr = next, addr != end);
return err;
}
static int walk_pgd_range(unsigned long addr, unsigned long end, static int walk_pgd_range(unsigned long addr, unsigned long end,
struct mm_walk *walk) struct mm_walk *walk)
{ {
...@@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, ...@@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
continue; continue;
} }
if (walk->pmd_entry || walk->pte_entry) if (walk->pmd_entry || walk->pte_entry)
err = walk_pud_range(pgd, addr, next, walk); err = walk_p4d_range(pgd, addr, next, walk);
if (err) if (err)
break; break;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
......
...@@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd) ...@@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd)
pgd_clear(pgd); pgd_clear(pgd);
} }
void p4d_clear_bad(p4d_t *p4d)
{
p4d_ERROR(*p4d);
p4d_clear(p4d);
}
void pud_clear_bad(pud_t *pud) void pud_clear_bad(pud_t *pud)
{ {
pud_ERROR(*pud); pud_ERROR(*pud);
......
...@@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) ...@@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd = NULL; pmd_t *pmd = NULL;
pmd_t pmde; pmd_t pmde;
...@@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) ...@@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
goto out; goto out;
pud = pud_offset(pgd, address); p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
goto out;
pud = pud_offset(p4d, address);
if (!pud_present(*pud)) if (!pud_present(*pud))
goto out; goto out;
......
...@@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) ...@@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
return pmd; return pmd;
} }
pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
{ {
pud_t *pud = pud_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr);
if (pud_none(*pud)) { if (pud_none(*pud)) {
void *p = vmemmap_alloc_block(PAGE_SIZE, node); void *p = vmemmap_alloc_block(PAGE_SIZE, node);
if (!p) if (!p)
...@@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) ...@@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
return pud; return pud;
} }
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
p4d_t *p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
void *p = vmemmap_alloc_block(PAGE_SIZE, node);
if (!p)
return NULL;
p4d_populate(&init_mm, p4d, p);
}
return p4d;
}
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
{ {
pgd_t *pgd = pgd_offset_k(addr); pgd_t *pgd = pgd_offset_k(addr);
...@@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start, ...@@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
{ {
unsigned long addr = start; unsigned long addr = start;
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
...@@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start, ...@@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
pgd = vmemmap_pgd_populate(addr, node); pgd = vmemmap_pgd_populate(addr, node);
if (!pgd) if (!pgd)
return -ENOMEM; return -ENOMEM;
pud = vmemmap_pud_populate(pgd, addr, node); p4d = vmemmap_p4d_populate(pgd, addr, node);
if (!p4d)
return -ENOMEM;
pud = vmemmap_pud_populate(p4d, addr, node);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
pmd = vmemmap_pmd_populate(pud, addr, node); pmd = vmemmap_pmd_populate(pud, addr, node);
......
...@@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, ...@@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
return 0; return 0;
} }
static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
unsigned long addr, unsigned long end, unsigned long addr, unsigned long end,
swp_entry_t entry, struct page *page) swp_entry_t entry, struct page *page)
{ {
...@@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, ...@@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long next; unsigned long next;
int ret; int ret;
pud = pud_offset(pgd, addr); pud = pud_offset(p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
...@@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, ...@@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
return 0; return 0;
} }
static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
swp_entry_t entry, struct page *page)
{
p4d_t *p4d;
unsigned long next;
int ret;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d))
continue;
ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
if (ret)
return ret;
} while (p4d++, addr = next, addr != end);
return 0;
}
static int unuse_vma(struct vm_area_struct *vma, static int unuse_vma(struct vm_area_struct *vma,
swp_entry_t entry, struct page *page) swp_entry_t entry, struct page *page)
{ {
...@@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma, ...@@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma,
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
ret = unuse_pud_range(vma, pgd, addr, next, entry, page); ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
if (ret) if (ret)
return ret; return ret;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
......
...@@ -128,19 +128,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm, ...@@ -128,19 +128,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm,
static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
pud = pud_alloc(mm, pgd, address); p4d = p4d_alloc(mm, pgd, address);
if (pud) if (!p4d)
/* return NULL;
* Note that we didn't run this because the pmd was pud = pud_alloc(mm, p4d, address);
* missing, the *pmd may be already established and in if (!pud)
* turn it may also be a trans_huge_pmd. return NULL;
*/ /*
pmd = pmd_alloc(mm, pud, address); * Note that we didn't run this because the pmd was
return pmd; * missing, the *pmd may be already established and in
* turn it may also be a trans_huge_pmd.
*/
return pmd_alloc(mm, pud, address);
} }
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
......
...@@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) ...@@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
} }
static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
pud = pud_offset(pgd, addr); pud = pud_offset(p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_clear_huge(pud)) if (pud_clear_huge(pud))
...@@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) ...@@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
} }
static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
{
p4d_t *p4d;
unsigned long next;
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_clear_huge(p4d))
continue;
if (p4d_none_or_clear_bad(p4d))
continue;
vunmap_pud_range(p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
static void vunmap_page_range(unsigned long addr, unsigned long end) static void vunmap_page_range(unsigned long addr, unsigned long end)
{ {
pgd_t *pgd; pgd_t *pgd;
...@@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) ...@@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
vunmap_pud_range(pgd, addr, next); vunmap_p4d_range(pgd, addr, next);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
} }
...@@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, ...@@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
return 0; return 0;
} }
static int vmap_pud_range(pgd_t *pgd, unsigned long addr, static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr) unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
pud = pud_alloc(&init_mm, pgd, addr); pud = pud_alloc(&init_mm, p4d, addr);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
do { do {
...@@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, ...@@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
return 0; return 0;
} }
static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
p4d_t *p4d;
unsigned long next;
p4d = p4d_alloc(&init_mm, pgd, addr);
if (!p4d)
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
return -ENOMEM;
} while (p4d++, addr = next, addr != end);
return 0;
}
/* /*
* Set up page tables in kva (addr, end). The ptes shall have prot "prot", and * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
* will have pfns corresponding to the "pages" array. * will have pfns corresponding to the "pages" array.
...@@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, ...@@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
pgd = pgd_offset_k(addr); pgd = pgd_offset_k(addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
if (err) if (err)
return err; return err;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
...@@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) ...@@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
unsigned long addr = (unsigned long) vmalloc_addr; unsigned long addr = (unsigned long) vmalloc_addr;
struct page *page = NULL; struct page *page = NULL;
pgd_t *pgd = pgd_offset_k(addr); pgd_t *pgd = pgd_offset_k(addr);
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
/* /*
* XXX we might need to change this if we add VIRTUAL_BUG_ON for * XXX we might need to change this if we add VIRTUAL_BUG_ON for
...@@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) ...@@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
*/ */
VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
if (!pgd_none(*pgd)) { if (pgd_none(*pgd))
pud_t *pud = pud_offset(pgd, addr); return NULL;
if (!pud_none(*pud)) { p4d = p4d_offset(pgd, addr);
pmd_t *pmd = pmd_offset(pud, addr); if (p4d_none(*p4d))
if (!pmd_none(*pmd)) { return NULL;
pte_t *ptep, pte; pud = pud_offset(p4d, addr);
if (pud_none(*pud))
ptep = pte_offset_map(pmd, addr); return NULL;
pte = *ptep; pmd = pmd_offset(pud, addr);
if (pte_present(pte)) if (pmd_none(*pmd))
page = pte_page(pte); return NULL;
pte_unmap(ptep);
} ptep = pte_offset_map(pmd, addr);
} pte = *ptep;
} if (pte_present(pte))
page = pte_page(pte);
pte_unmap(ptep);
return page; return page;
} }
EXPORT_SYMBOL(vmalloc_to_page); EXPORT_SYMBOL(vmalloc_to_page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment