Commit 368ced78 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Michael Ellerman

powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table

This is needed so that we can support both hash and radix page table
using single kernel. Radix kernel uses a 4 level table.

We now use physical address in upper page table tree levels. Even though
they are aligned to their size, for the masked bits we use the
bit positions as per PowerISA 3.0.
Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent ae9a71af
...@@ -303,7 +303,7 @@ config ZONE_DMA32 ...@@ -303,7 +303,7 @@ config ZONE_DMA32
config PGTABLE_LEVELS config PGTABLE_LEVELS
int int
default 2 if !PPC64 default 2 if !PPC64
default 3 if PPC_64K_PAGES default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64
default 4 default 4
source "init/Kconfig" source "init/Kconfig"
......
...@@ -58,39 +58,8 @@ ...@@ -58,39 +58,8 @@
#define _PAGE_4K_PFN 0 #define _PAGE_4K_PFN 0
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/* /*
* 4-level page tables related bits * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
*/ */
#define pgd_none(pgd) (!pgd_val(pgd))
#define pgd_bad(pgd) (pgd_val(pgd) == 0)
#define pgd_present(pgd) (pgd_val(pgd) != 0)
#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS)
static inline void pgd_clear(pgd_t *pgdp)
{
*pgdp = __pgd(0);
}
static inline pte_t pgd_pte(pgd_t pgd)
{
return __pte(pgd_val(pgd));
}
static inline pgd_t pte_pgd(pte_t pte)
{
return __pgd(pte_val(pte));
}
extern struct page *pgd_page(pgd_t pgd);
#define pud_offset(pgdp, addr) \
(((pud_t *) pgd_page_vaddr(*(pgdp))) + \
(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
/*
* On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
#define remap_4k_pfn(vma, addr, pfn, prot) \ #define remap_4k_pfn(vma, addr, pfn, prot) \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
......
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#include <asm-generic/pgtable-nopud.h>
#define PTE_INDEX_SIZE 8 #define PTE_INDEX_SIZE 8
#define PMD_INDEX_SIZE 10 #define PMD_INDEX_SIZE 5
#define PUD_INDEX_SIZE 0 #define PUD_INDEX_SIZE 5
#define PGD_INDEX_SIZE 12 #define PGD_INDEX_SIZE 12
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/* With 4k base page size, hugepage PTEs go at the PMD level */ /* With 4k base page size, hugepage PTEs go at the PMD level */
...@@ -20,8 +19,13 @@ ...@@ -20,8 +19,13 @@
#define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1)) #define PMD_MASK (~(PMD_SIZE-1))
/* PGDIR_SHIFT determines what a third-level page table entry can map */ /* PUD_SHIFT determines what a third-level page table entry can map */
#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
#define PUD_SIZE (1UL << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_MASK (~(PGDIR_SIZE-1))
...@@ -56,13 +60,12 @@ ...@@ -56,13 +60,12 @@
#define PTE_FRAG_SIZE_SHIFT 12 #define PTE_FRAG_SIZE_SHIFT 12
#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
/* /* Bits to mask out from a PMD to get to the PTE page */
* Bits to mask out from a PMD to get to the PTE page #define PMD_MASKED_BITS 0xc0000000000000ffUL
* PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned. /* Bits to mask out from a PUD to get to the PMD page */
*/ #define PUD_MASKED_BITS 0xc0000000000000ffUL
#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) /* Bits to mask out from a PGD to get to the PUD page */
/* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PGD_MASKED_BITS 0xc0000000000000ffUL
#define PUD_MASKED_BITS 0x1ff
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
...@@ -132,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); ...@@ -132,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
#else #else
#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif #endif
#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd })))
#define pte_pgd(pte) ((pgd_t)pte_pud(pte))
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
/* /*
* We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
......
...@@ -235,6 +235,7 @@ ...@@ -235,6 +235,7 @@
#define __pgtable_ptr_val(ptr) __pa(ptr) #define __pgtable_ptr_val(ptr) __pa(ptr)
#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
...@@ -363,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) ...@@ -363,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
:"cc"); :"cc");
} }
static inline int pgd_bad(pgd_t pgd)
{
return (pgd_val(pgd) == 0);
}
#define __HAVE_ARCH_PTE_SAME #define __HAVE_ARCH_PTE_SAME
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
static inline unsigned long pgd_page_vaddr(pgd_t pgd)
{
return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
}
/* Generic accessors to PTE bits */ /* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
......
...@@ -106,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) ...@@ -106,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
*pgdp = __pgd(val); *pgdp = __pgd(val);
} }
static inline void pgd_clear(pgd_t *pgdp)
{
*pgdp = __pgd(0);
}
#define pgd_none(pgd) (!pgd_val(pgd))
#define pgd_present(pgd) (!pgd_none(pgd))
static inline pte_t pgd_pte(pgd_t pgd)
{
return __pte(pgd_val(pgd));
}
static inline pgd_t pte_pgd(pte_t pte)
{
return __pgd(pte_val(pte));
}
extern struct page *pgd_page(pgd_t pgd);
/* /*
* Find an entry in a page-table-directory. We combine the address region * Find an entry in a page-table-directory. We combine the address region
* (the high order N bits) and the pgd portion of the address. * (the high order N bits) and the pgd portion of the address.
...@@ -113,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) ...@@ -113,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
#define pud_offset(pgdp, addr) \
(((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
#define pmd_offset(pudp,addr) \ #define pmd_offset(pudp,addr) \
(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
#define pte_offset_kernel(dir,addr) \ #define pte_offset_kernel(dir,addr) \
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
...@@ -130,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) ...@@ -130,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \ #define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \ #define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
......
...@@ -171,7 +171,29 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); ...@@ -171,7 +171,29 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
extern void __tlb_remove_table(void *_table); extern void __tlb_remove_table(void *_table);
#endif #endif
#define pud_populate(mm, pud, pmd) pud_set(pud, __pgtable_ptr_val(pmd)) #ifndef __PAGETABLE_PUD_FOLDED
/* book3s 64 is 4 level page table */
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
pgd_set(pgd, __pgtable_ptr_val(pud));
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
GFP_KERNEL|__GFP_REPEAT);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
}
#endif
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
pud_set(pud, __pgtable_ptr_val(pmd));
}
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte) pte_t *pte)
...@@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) ...@@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#define __pmd_free_tlb(tlb, pmd, addr) \ #define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
#ifndef CONFIG_PPC_64K_PAGES #ifndef __PAGETABLE_PUD_FOLDED
#define __pud_free_tlb(tlb, pud, addr) \ #define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
#endif /* CONFIG_PPC_64K_PAGES */ #endif /* __PAGETABLE_PUD_FOLDED */
#define check_pgt_cache() do { } while (0) #define check_pgt_cache() do { } while (0)
......
...@@ -21,15 +21,18 @@ static inline unsigned long pmd_val(pmd_t x) ...@@ -21,15 +21,18 @@ static inline unsigned long pmd_val(pmd_t x)
return x.pmd; return x.pmd;
} }
/* PUD level exusts only on 4k pages */ /*
#ifndef CONFIG_PPC_64K_PAGES * 64 bit hash always use 4 level table. Everybody else use 4 level
* only for 4K page size.
*/
#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long pud; } pud_t;
#define __pud(x) ((pud_t) { (x) }) #define __pud(x) ((pud_t) { (x) })
static inline unsigned long pud_val(pud_t x) static inline unsigned long pud_val(pud_t x)
{ {
return x.pud; return x.pud;
} }
#endif /* !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC64 */
/* PGD level */ /* PGD level */
...@@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd) ...@@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd)
return pmd; return pmd;
} }
#ifndef CONFIG_PPC_64K_PAGES #if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
typedef unsigned long pud_t; typedef unsigned long pud_t;
#define __pud(x) (x) #define __pud(x) (x)
static inline unsigned long pud_val(pud_t pud) static inline unsigned long pud_val(pud_t pud)
{ {
return pud; return pud;
} }
#endif /* !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC64 */
typedef unsigned long pgd_t; typedef unsigned long pgd_t;
......
...@@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) ...@@ -85,6 +85,11 @@ static void pgd_ctor(void *addr)
memset(addr, 0, PGD_TABLE_SIZE); memset(addr, 0, PGD_TABLE_SIZE);
} }
static void pud_ctor(void *addr)
{
memset(addr, 0, PUD_TABLE_SIZE);
}
static void pmd_ctor(void *addr) static void pmd_ctor(void *addr)
{ {
memset(addr, 0, PMD_TABLE_SIZE); memset(addr, 0, PMD_TABLE_SIZE);
...@@ -138,14 +143,18 @@ void pgtable_cache_init(void) ...@@ -138,14 +143,18 @@ void pgtable_cache_init(void)
{ {
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
/*
* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
panic("Couldn't allocate pgtable caches"); panic("Couldn't allocate pgtable caches");
/* In all current configs, when the PUD index exists it's the if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
* same size as either the pgd or pmd index. Verify that the panic("Couldn't allocate pud pgtable caches");
* initialization above has also created a PUD cache. This
* will need re-examiniation if we add new possibilities for
* the pagetable layout. */
BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
} }
#ifdef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_SPARSEMEM_VMEMMAP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment