Commit 41151e77 authored by Becky Bruce's avatar Becky Bruce Committed by Benjamin Herrenschmidt

powerpc: Hugetlb for BookE

Enable hugepages on Freescale BookE processors.  This allows the kernel to
use huge TLB entries to map pages, which can greatly reduce the number of
TLB misses and the amount of TLB thrashing experienced by applications with
large memory footprints.  Care should be taken when using this on FSL
processors, as the number of large TLB entries supported by the core is low
(16-64) on current processors.

The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g.
Page sizes larger than the max zone size are called "gigantic" pages and
must be allocated on the command line (and cannot be deallocated).

This is currently only fully implemented for Freescale 32-bit BookE
processors, but there is some infrastructure in the code for
64-bit BooKE.
Signed-off-by: default avatarBecky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 7df5659e
...@@ -429,8 +429,7 @@ config ARCH_POPULATES_NODE_MAP ...@@ -429,8 +429,7 @@ config ARCH_POPULATES_NODE_MAP
def_bool y def_bool y
config SYS_SUPPORTS_HUGETLBFS config SYS_SUPPORTS_HUGETLBFS
def_bool y bool
depends on PPC_BOOK3S_64
source "mm/Kconfig" source "mm/Kconfig"
......
#ifndef _ASM_POWERPC_HUGETLB_H #ifndef _ASM_POWERPC_HUGETLB_H
#define _ASM_POWERPC_HUGETLB_H #define _ASM_POWERPC_HUGETLB_H
#ifdef CONFIG_HUGETLB_PAGE
#include <asm/page.h> #include <asm/page.h>
extern struct kmem_cache *hugepte_cache;
extern void __init reserve_hugetlb_gpages(void);
static inline pte_t *hugepd_page(hugepd_t hpd)
{
BUG_ON(!hugepd_ok(hpd));
return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
}
static inline unsigned int hugepd_shift(hugepd_t hpd)
{
return hpd.pd & HUGEPD_SHIFT_MASK;
}
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
unsigned pdshift)
{
/*
* On 32-bit, we have multiple higher-level table entries that point to
* the same hugepte. Just use the first one since they're all
* identical. So for that case, idx=0.
*/
unsigned long idx = 0;
pte_t *dir = hugepd_page(*hpdp);
#ifdef CONFIG_PPC64
idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
#endif
return dir + idx;
}
pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
unsigned long addr, unsigned *shift); unsigned long addr, unsigned *shift);
void flush_dcache_icache_hugepage(struct page *page); void flush_dcache_icache_hugepage(struct page *page);
#if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT)
int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len); unsigned long len);
#else
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
unsigned long len)
{
return 0;
}
#endif
void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte);
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long end, unsigned long floor,
...@@ -50,8 +95,11 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, ...@@ -50,8 +95,11 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep) unsigned long addr, pte_t *ptep)
{ {
unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); #ifdef CONFIG_PPC64
return __pte(old); return __pte(pte_update(mm, addr, ptep, ~0UL, 1));
#else
return __pte(pte_update(ptep, ~0UL, 0));
#endif
} }
static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
...@@ -93,4 +141,15 @@ static inline void arch_release_hugepage(struct page *page) ...@@ -93,4 +141,15 @@ static inline void arch_release_hugepage(struct page *page)
{ {
} }
#else /* ! CONFIG_HUGETLB_PAGE */
static inline void reserve_hugetlb_gpages(void)
{
pr_err("Cannot reserve gpages without hugetlb enabled\n");
}
static inline void flush_hugetlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
}
#endif
#endif /* _ASM_POWERPC_HUGETLB_H */ #endif /* _ASM_POWERPC_HUGETLB_H */
...@@ -66,6 +66,7 @@ ...@@ -66,6 +66,7 @@
#define MAS2_M 0x00000004 #define MAS2_M 0x00000004
#define MAS2_G 0x00000002 #define MAS2_G 0x00000002
#define MAS2_E 0x00000001 #define MAS2_E 0x00000001
#define MAS2_WIMGE_MASK 0x0000001f
#define MAS2_EPN_MASK(size) (~0 << (size + 10)) #define MAS2_EPN_MASK(size) (~0 << (size + 10))
#define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) #define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags))
...@@ -80,6 +81,7 @@ ...@@ -80,6 +81,7 @@
#define MAS3_SW 0x00000004 #define MAS3_SW 0x00000004
#define MAS3_UR 0x00000002 #define MAS3_UR 0x00000002
#define MAS3_SR 0x00000001 #define MAS3_SR 0x00000001
#define MAS3_BAP_MASK 0x0000003f
#define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE 0x0000003e
#define MAS3_SPSIZE_SHIFT 1 #define MAS3_SPSIZE_SHIFT 1
...@@ -212,6 +214,11 @@ typedef struct { ...@@ -212,6 +214,11 @@ typedef struct {
unsigned int id; unsigned int id;
unsigned int active; unsigned int active;
unsigned long vdso_base; unsigned long vdso_base;
#ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize; /* SLB page size encodings */
u64 high_slices_psize; /* 4 bits per slice for now */
u16 user_psize; /* page size index */
#endif
} mm_context_t; } mm_context_t;
/* Page size definitions, common between 32 and 64-bit /* Page size definitions, common between 32 and 64-bit
......
...@@ -262,8 +262,7 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, ...@@ -262,8 +262,7 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access,
extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long pstart, unsigned long prot, unsigned long pstart, unsigned long prot,
int psize, int ssize); int psize, int ssize);
extern void add_gpage(unsigned long addr, unsigned long page_size, extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
unsigned long number_of_pages);
extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
extern void hpte_init_native(void); extern void hpte_init_native(void);
......
...@@ -175,14 +175,16 @@ extern u64 ppc64_rma_size; ...@@ -175,14 +175,16 @@ extern u64 ppc64_rma_size;
#define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */
#define MMU_PAGE_256K 4 #define MMU_PAGE_256K 4
#define MMU_PAGE_1M 5 #define MMU_PAGE_1M 5
#define MMU_PAGE_8M 6 #define MMU_PAGE_4M 6
#define MMU_PAGE_16M 7 #define MMU_PAGE_8M 7
#define MMU_PAGE_256M 8 #define MMU_PAGE_16M 8
#define MMU_PAGE_1G 9 #define MMU_PAGE_64M 9
#define MMU_PAGE_16G 10 #define MMU_PAGE_256M 10
#define MMU_PAGE_64G 11 #define MMU_PAGE_1G 11
#define MMU_PAGE_COUNT 12 #define MMU_PAGE_16G 12
#define MMU_PAGE_64G 13
#define MMU_PAGE_COUNT 14
#if defined(CONFIG_PPC_STD_MMU_64) #if defined(CONFIG_PPC_STD_MMU_64)
/* 64-bit classic hash table MMU */ /* 64-bit classic hash table MMU */
......
...@@ -36,6 +36,18 @@ ...@@ -36,6 +36,18 @@
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
#ifndef __ASSEMBLY__
#ifdef CONFIG_HUGETLB_PAGE
extern unsigned int HPAGE_SHIFT;
#else
#define HPAGE_SHIFT PAGE_SHIFT
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
#endif
/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
#define __HAVE_ARCH_GATE_AREA 1 #define __HAVE_ARCH_GATE_AREA 1
...@@ -158,6 +170,24 @@ extern phys_addr_t kernstart_addr; ...@@ -158,6 +170,24 @@ extern phys_addr_t kernstart_addr;
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET) #define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
#endif #endif
/*
* Use the top bit of the higher-level page table entries to indicate whether
* the entries we point to contain hugepages. This works because we know that
* the page tables live in kernel space. If we ever decide to support having
* page tables at arbitrary addresses, this breaks and will have to change.
*/
#ifdef CONFIG_PPC64
#define PD_HUGE 0x8000000000000000
#else
#define PD_HUGE 0x80000000
#endif
/*
* Some number of bits at the level of the page table that points to
* a hugepte are used to encode the size. This masks those bits.
*/
#define HUGEPD_SHIFT_MASK 0x3f
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#undef STRICT_MM_TYPECHECKS #undef STRICT_MM_TYPECHECKS
...@@ -243,7 +273,6 @@ typedef unsigned long pgprot_t; ...@@ -243,7 +273,6 @@ typedef unsigned long pgprot_t;
#endif #endif
typedef struct { signed long pd; } hugepd_t; typedef struct { signed long pd; } hugepd_t;
#define HUGEPD_SHIFT_MASK 0x3f
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
static inline int hugepd_ok(hugepd_t hpd) static inline int hugepd_ok(hugepd_t hpd)
......
...@@ -64,17 +64,6 @@ extern void copy_page(void *to, void *from); ...@@ -64,17 +64,6 @@ extern void copy_page(void *to, void *from);
/* Log 2 of page table size */ /* Log 2 of page table size */
extern u64 ppc64_pft_size; extern u64 ppc64_pft_size;
/* Large pages size */
#ifdef CONFIG_HUGETLB_PAGE
extern unsigned int HPAGE_SHIFT;
#else
#define HPAGE_SHIFT PAGE_SHIFT
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#ifdef CONFIG_PPC_MM_SLICES #ifdef CONFIG_PPC_MM_SLICES
......
...@@ -72,6 +72,9 @@ ...@@ -72,6 +72,9 @@
#define PTE_RPN_SHIFT (24) #define PTE_RPN_SHIFT (24)
#endif #endif
#define PTE_WIMGE_SHIFT (19)
#define PTE_BAP_SHIFT (2)
/* On 32-bit, we never clear the top part of the PTE */ /* On 32-bit, we never clear the top part of the PTE */
#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC32
#define _PTE_NONE_MASK 0xffffffff00000000ULL #define _PTE_NONE_MASK 0xffffffff00000000ULL
......
...@@ -236,8 +236,24 @@ _ENTRY(__early_start) ...@@ -236,8 +236,24 @@ _ENTRY(__early_start)
* if we find the pte (fall through): * if we find the pte (fall through):
* r11 is low pte word * r11 is low pte word
* r12 is pointer to the pte * r12 is pointer to the pte
* r10 is the pshift from the PGD, if we're a hugepage
*/ */
#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_PTE_64BIT
#ifdef CONFIG_HUGETLB_PAGE
#define FIND_PTE \
rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
blt 1000f; /* Normal non-huge page */ \
beq 2f; /* Bail if no table */ \
oris r11, r11, PD_HUGE@h; /* Put back address bit */ \
andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \
xor r12, r10, r11; /* drop size bits from pointer */ \
b 1001f; \
1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
li r10, 0; /* clear r10 */ \
1001: lwz r11, 4(r12); /* Get pte entry */
#else
#define FIND_PTE \ #define FIND_PTE \
rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
...@@ -245,7 +261,8 @@ _ENTRY(__early_start) ...@@ -245,7 +261,8 @@ _ENTRY(__early_start)
beq 2f; /* Bail if no table */ \ beq 2f; /* Bail if no table */ \
rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
lwz r11, 4(r12); /* Get pte entry */ lwz r11, 4(r12); /* Get pte entry */
#else #endif /* HUGEPAGE */
#else /* !PTE_64BIT */
#define FIND_PTE \ #define FIND_PTE \
rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
lwz r11, 0(r11); /* Get L1 entry */ \ lwz r11, 0(r11); /* Get L1 entry */ \
...@@ -402,8 +419,8 @@ interrupt_base: ...@@ -402,8 +419,8 @@ interrupt_base:
#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_PTE_64BIT
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
subf r10,r11,r12 /* create false data dep */ subf r13,r11,r12 /* create false data dep */
lwzx r13,r11,r10 /* Get upper pte bits */ lwzx r13,r11,r13 /* Get upper pte bits */
#else #else
lwz r13,0(r12) /* Get upper pte bits */ lwz r13,0(r12) /* Get upper pte bits */
#endif #endif
...@@ -483,8 +500,8 @@ interrupt_base: ...@@ -483,8 +500,8 @@ interrupt_base:
#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_PTE_64BIT
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
subf r10,r11,r12 /* create false data dep */ subf r13,r11,r12 /* create false data dep */
lwzx r13,r11,r10 /* Get upper pte bits */ lwzx r13,r11,r13 /* Get upper pte bits */
#else #else
lwz r13,0(r12) /* Get upper pte bits */ lwz r13,0(r12) /* Get upper pte bits */
#endif #endif
...@@ -548,7 +565,7 @@ interrupt_base: ...@@ -548,7 +565,7 @@ interrupt_base:
/* /*
* Both the instruction and data TLB miss get to this * Both the instruction and data TLB miss get to this
* point to load the TLB. * point to load the TLB.
* r10 - available to use * r10 - tsize encoding (if HUGETLB_PAGE) or available to use
* r11 - TLB (info from Linux PTE) * r11 - TLB (info from Linux PTE)
* r12 - available to use * r12 - available to use
* r13 - upper bits of PTE (if PTE_64BIT) or available to use * r13 - upper bits of PTE (if PTE_64BIT) or available to use
...@@ -558,21 +575,73 @@ interrupt_base: ...@@ -558,21 +575,73 @@ interrupt_base:
* Upon exit, we reload everything and RFI. * Upon exit, we reload everything and RFI.
*/ */
finish_tlb_load: finish_tlb_load:
#ifdef CONFIG_HUGETLB_PAGE
cmpwi 6, r10, 0 /* check for huge page */
beq 6, finish_tlb_load_cont /* !huge */
/* Alas, we need more scratch registers for hugepages */
mfspr r12, SPRN_SPRG_THREAD
stw r14, THREAD_NORMSAVE(4)(r12)
stw r15, THREAD_NORMSAVE(5)(r12)
stw r16, THREAD_NORMSAVE(6)(r12)
stw r17, THREAD_NORMSAVE(7)(r12)
/* Get the next_tlbcam_idx percpu var */
#ifdef CONFIG_SMP
lwz r12, THREAD_INFO-THREAD(r12)
lwz r15, TI_CPU(r12)
lis r14, __per_cpu_offset@h
ori r14, r14, __per_cpu_offset@l
rlwinm r15, r15, 2, 0, 29
lwzx r16, r14, r15
#else
li r16, 0
#endif
lis r17, next_tlbcam_idx@h
ori r17, r17, next_tlbcam_idx@l
add r17, r17, r16 /* r17 = *next_tlbcam_idx */
lwz r15, 0(r17) /* r15 = next_tlbcam_idx */
lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */
rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */
mtspr SPRN_MAS0, r14
/* Extract TLB1CFG(NENTRY) */
mfspr r16, SPRN_TLB1CFG
andi. r16, r16, 0xfff
/* Update next_tlbcam_idx, wrapping when necessary */
addi r15, r15, 1
cmpw r15, r16
blt 100f
lis r14, tlbcam_index@h
ori r14, r14, tlbcam_index@l
lwz r15, 0(r14)
100: stw r15, 0(r17)
/*
* Calc MAS1_TSIZE from r10 (which has pshift encoded)
* tlb_enc = (pshift - 10).
*/
subi r15, r10, 10
mfspr r16, SPRN_MAS1
rlwimi r16, r15, 7, 20, 24
mtspr SPRN_MAS1, r16
/* copy the pshift for use later */
mr r14, r10
/* fall through */
#endif /* CONFIG_HUGETLB_PAGE */
/* /*
* We set execute, because we don't have the granularity to * We set execute, because we don't have the granularity to
* properly set this at the page level (Linux problem). * properly set this at the page level (Linux problem).
* Many of these bits are software only. Bits we don't set * Many of these bits are software only. Bits we don't set
* here we (properly should) assume have the appropriate value. * here we (properly should) assume have the appropriate value.
*/ */
finish_tlb_load_cont:
mfspr r12, SPRN_MAS2
#ifdef CONFIG_PTE_64BIT
rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */
#else
rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
#endif
mtspr SPRN_MAS2, r12
#ifdef CONFIG_PTE_64BIT #ifdef CONFIG_PTE_64BIT
rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */
andi. r10, r11, _PAGE_DIRTY andi. r10, r11, _PAGE_DIRTY
...@@ -581,22 +650,40 @@ finish_tlb_load: ...@@ -581,22 +650,40 @@ finish_tlb_load:
andc r12, r12, r10 andc r12, r12, r10
1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ 1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */
rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */
mtspr SPRN_MAS3, r12 2: mtspr SPRN_MAS3, r12
BEGIN_MMU_FTR_SECTION BEGIN_MMU_FTR_SECTION
srwi r10, r13, 12 /* grab RPN[12:31] */ srwi r10, r13, 12 /* grab RPN[12:31] */
mtspr SPRN_MAS7, r10 mtspr SPRN_MAS7, r10
END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#else #else
li r10, (_PAGE_EXEC | _PAGE_PRESENT) li r10, (_PAGE_EXEC | _PAGE_PRESENT)
mr r13, r11
rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
and r12, r11, r10 and r12, r11, r10
andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
slwi r10, r12, 1 slwi r10, r12, 1
or r10, r10, r12 or r10, r10, r12
iseleq r12, r12, r10 iseleq r12, r12, r10
rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
mtspr SPRN_MAS3, r11 mtspr SPRN_MAS3, r13
#endif #endif
mfspr r12, SPRN_MAS2
#ifdef CONFIG_PTE_64BIT
rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */
#else
rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
#endif
#ifdef CONFIG_HUGETLB_PAGE
beq 6, 3f /* don't mask if page isn't huge */
li r13, 1
slw r13, r13, r14
subi r13, r13, 1
rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */
andc r12, r12, r13 /* mask off ea bits within the page */
#endif
3: mtspr SPRN_MAS2, r12
#ifdef CONFIG_E200 #ifdef CONFIG_E200
/* Round robin TLB1 entries assignment */ /* Round robin TLB1 entries assignment */
mfspr r12, SPRN_MAS0 mfspr r12, SPRN_MAS0
...@@ -622,11 +709,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) ...@@ -622,11 +709,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
mtspr SPRN_MAS0,r12 mtspr SPRN_MAS0,r12
#endif /* CONFIG_E200 */ #endif /* CONFIG_E200 */
tlb_write_entry:
tlbwe tlbwe
/* Done...restore registers and get out of here. */ /* Done...restore registers and get out of here. */
mfspr r10, SPRN_SPRG_THREAD mfspr r10, SPRN_SPRG_THREAD
lwz r11, THREAD_NORMSAVE(3)(r10) #ifdef CONFIG_HUGETLB_PAGE
beq 6, 8f /* skip restore for 4k page faults */
lwz r14, THREAD_NORMSAVE(4)(r10)
lwz r15, THREAD_NORMSAVE(5)(r10)
lwz r16, THREAD_NORMSAVE(6)(r10)
lwz r17, THREAD_NORMSAVE(7)(r10)
#endif
8: lwz r11, THREAD_NORMSAVE(3)(r10)
mtcr r11 mtcr r11
lwz r13, THREAD_NORMSAVE(2)(r10) lwz r13, THREAD_NORMSAVE(2)(r10)
lwz r12, THREAD_NORMSAVE(1)(r10) lwz r12, THREAD_NORMSAVE(1)(r10)
......
...@@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o ...@@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o
ifeq ($(CONFIG_HUGETLB_PAGE),y) ifeq ($(CONFIG_HUGETLB_PAGE),y)
obj-y += hugetlbpage.o obj-y += hugetlbpage.o
obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif endif
obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
......
...@@ -105,9 +105,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; ...@@ -105,9 +105,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M;
int mmu_highuser_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M;
u16 mmu_slb_size = 64; u16 mmu_slb_size = 64;
EXPORT_SYMBOL_GPL(mmu_slb_size); EXPORT_SYMBOL_GPL(mmu_slb_size);
#ifdef CONFIG_HUGETLB_PAGE
unsigned int HPAGE_SHIFT;
#endif
#ifdef CONFIG_PPC_64K_PAGES #ifdef CONFIG_PPC_64K_PAGES
int mmu_ci_restrictions; int mmu_ci_restrictions;
#endif #endif
......
/*
* PPC Huge TLB Page Support for Book3E MMU
*
* Copyright (C) 2009 David Gibson, IBM Corporation.
* Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
*
*/
#include <linux/mm.h>
#include <linux/hugetlb.h>
static inline int mmu_get_tsize(int psize)
{
return mmu_psize_defs[psize].enc;
}
static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
{
int found = 0;
mtspr(SPRN_MAS6, pid << 16);
if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
asm volatile(
"li %0,0\n"
"tlbsx. 0,%1\n"
"bne 1f\n"
"li %0,1\n"
"1:\n"
: "=&r"(found) : "r"(ea));
} else {
asm volatile(
"tlbsx 0,%1\n"
"mfspr %0,0x271\n"
"srwi %0,%0,31\n"
: "=&r"(found) : "r"(ea));
}
return found;
}
void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte)
{
unsigned long mas1, mas2;
u64 mas7_3;
unsigned long psize, tsize, shift;
unsigned long flags;
#ifdef CONFIG_PPC_FSL_BOOK3E
int index, lz, ncams;
struct vm_area_struct *vma;
#endif
if (unlikely(is_kernel_addr(ea)))
return;
#ifdef CONFIG_MM_SLICES
psize = mmu_get_tsize(get_slice_psize(mm, ea));
tsize = mmu_get_psize(psize);
shift = mmu_psize_defs[psize].shift;
#else
vma = find_vma(mm, ea);
psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */
asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize));
shift = 31 - lz;
tsize = 21 - lz;
#endif
/*
* We can't be interrupted while we're setting up the MAS
* regusters or after we've confirmed that no tlb exists.
*/
local_irq_save(flags);
if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
local_irq_restore(flags);
return;
}
#ifdef CONFIG_PPC_FSL_BOOK3E
ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
/* We have to use the CAM(TLB1) on FSL parts for hugepages */
index = __get_cpu_var(next_tlbcam_idx);
mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
/* Just round-robin the entries and wrap when we hit the end */
if (unlikely(index == ncams - 1))
__get_cpu_var(next_tlbcam_idx) = tlbcam_index;
else
__get_cpu_var(next_tlbcam_idx)++;
#endif
mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
mas2 = ea & ~((1UL << shift) - 1);
mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
if (!pte_dirty(pte))
mas7_3 &= ~(MAS3_SW|MAS3_UW);
mtspr(SPRN_MAS1, mas1);
mtspr(SPRN_MAS2, mas2);
if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
mtspr(SPRN_MAS7_MAS3, mas7_3);
} else {
mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
}
asm volatile ("tlbwe");
local_irq_restore(flags);
}
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
struct hstate *hstate = hstate_file(vma->vm_file);
unsigned long tsize = huge_page_shift(hstate) - 10;
__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0);
}
/* /*
* PPC64 (POWER4) Huge TLB Page Support for Kernel. * PPC Huge TLB Page Support for Kernel.
* *
* Copyright (C) 2003 David Gibson, IBM Corporation. * Copyright (C) 2003 David Gibson, IBM Corporation.
* Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
* *
* Based on the IA-32 version: * Based on the IA-32 version:
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
...@@ -11,24 +12,39 @@ ...@@ -11,24 +12,39 @@
#include <linux/io.h> #include <linux/io.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/of_fdt.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/setup.h>
#define PAGE_SHIFT_64K 16 #define PAGE_SHIFT_64K 16
#define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16M 24
#define PAGE_SHIFT_16G 34 #define PAGE_SHIFT_16G 34
#define MAX_NUMBER_GPAGES 1024 unsigned int HPAGE_SHIFT;
/* Tracks the 16G pages after the device tree is scanned and before the /*
* huge_boot_pages list is ready. */ * Tracks gpages after the device tree is scanned and before the
static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; * huge_boot_pages list is ready. On 64-bit implementations, this is
* just used to track 16G pages and so is a single array. 32-bit
* implementations may have more than one gpage size due to limitations
* of the memory allocators, so we need multiple arrays
*/
#ifdef CONFIG_PPC64
#define MAX_NUMBER_GPAGES 1024
static u64 gpage_freearray[MAX_NUMBER_GPAGES];
static unsigned nr_gpages; static unsigned nr_gpages;
#else
/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() #define MAX_NUMBER_GPAGES 128
* will choke on pointers to hugepte tables, which is handy for struct psize_gpages {
* catching screwups early. */ u64 gpage_list[MAX_NUMBER_GPAGES];
unsigned int nr_gpages;
};
static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
#endif
static inline int shift_to_mmu_psize(unsigned int shift) static inline int shift_to_mmu_psize(unsigned int shift)
{ {
...@@ -49,25 +65,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) ...@@ -49,25 +65,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
#define hugepd_none(hpd) ((hpd).pd == 0) #define hugepd_none(hpd) ((hpd).pd == 0)
static inline pte_t *hugepd_page(hugepd_t hpd)
{
BUG_ON(!hugepd_ok(hpd));
return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
}
static inline unsigned int hugepd_shift(hugepd_t hpd)
{
return hpd.pd & HUGEPD_SHIFT_MASK;
}
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
{
unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
pte_t *dir = hugepd_page(*hpdp);
return dir + idx;
}
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
{ {
pgd_t *pg; pgd_t *pg;
...@@ -93,7 +90,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift ...@@ -93,7 +90,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (is_hugepd(pm)) if (is_hugepd(pm))
hpdp = (hugepd_t *)pm; hpdp = (hugepd_t *)pm;
else if (!pmd_none(*pm)) { else if (!pmd_none(*pm)) {
return pte_offset_map(pm, ea); return pte_offset_kernel(pm, ea);
} }
} }
} }
...@@ -114,8 +111,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) ...@@ -114,8 +111,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
unsigned long address, unsigned pdshift, unsigned pshift) unsigned long address, unsigned pdshift, unsigned pshift)
{ {
pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), struct kmem_cache *cachep;
GFP_KERNEL|__GFP_REPEAT); pte_t *new;
#ifdef CONFIG_PPC64
cachep = PGT_CACHE(pdshift - pshift);
#else
int i;
int num_hugepd = 1 << (pshift - pdshift);
cachep = hugepte_cache;
#endif
new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
...@@ -124,10 +131,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, ...@@ -124,10 +131,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
return -ENOMEM; return -ENOMEM;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC64
if (!hugepd_none(*hpdp)) if (!hugepd_none(*hpdp))
kmem_cache_free(PGT_CACHE(pdshift - pshift), new); kmem_cache_free(cachep, new);
else else
hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#else
/*
* We have multiple higher-level entries that point to the same
* actual pte location. Fill in each as we go and backtrack on error.
* We need all of these so the DTLB pgtable walk code can find the
* right higher-level entry without knowing if it's a hugepage or not.
*/
for (i = 0; i < num_hugepd; i++, hpdp++) {
if (unlikely(!hugepd_none(*hpdp)))
break;
else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
for (i = i - 1 ; i >= 0; i--, hpdp--)
hpdp->pd = 0;
kmem_cache_free(cachep, new);
}
#endif
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
return 0; return 0;
} }
...@@ -169,11 +197,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -169,11 +197,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(hpdp, addr, pdshift); return hugepte_offset(hpdp, addr, pdshift);
} }
#ifdef CONFIG_PPC32
/* Build list of addresses of gigantic pages. This function is used in early /* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy or bootmem allocator is setup. * boot before the buddy or bootmem allocator is setup.
*/ */
void add_gpage(unsigned long addr, unsigned long page_size, void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
unsigned long number_of_pages) {
unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
int i;
if (addr == 0)
return;
gpage_freearray[idx].nr_gpages = number_of_pages;
for (i = 0; i < number_of_pages; i++) {
gpage_freearray[idx].gpage_list[i] = addr;
addr += page_size;
}
}
/*
* Moves the gigantic page addresses from the temporary list to the
* huge_boot_pages list.
*/
int alloc_bootmem_huge_page(struct hstate *hstate)
{
struct huge_bootmem_page *m;
int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
int nr_gpages = gpage_freearray[idx].nr_gpages;
if (nr_gpages == 0)
return 0;
#ifdef CONFIG_HIGHMEM
/*
* If gpages can be in highmem we can't use the trick of storing the
* data structure in the page; allocate space for this
*/
m = alloc_bootmem(sizeof(struct huge_bootmem_page));
m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
#else
m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
#endif
list_add(&m->list, &huge_boot_pages);
gpage_freearray[idx].nr_gpages = nr_gpages;
gpage_freearray[idx].gpage_list[nr_gpages] = 0;
m->hstate = hstate;
return 1;
}
/*
* Scan the command line hugepagesz= options for gigantic pages; store those in
* a list that we use to allocate the memory once all options are parsed.
*/
unsigned long gpage_npages[MMU_PAGE_COUNT];
static int __init do_gpage_early_setup(char *param, char *val)
{
static phys_addr_t size;
unsigned long npages;
/*
* The hugepagesz and hugepages cmdline options are interleaved. We
* use the size variable to keep track of whether or not this was done
* properly and skip over instances where it is incorrect. Other
* command-line parsing code will issue warnings, so we don't need to.
*
*/
if ((strcmp(param, "default_hugepagesz") == 0) ||
(strcmp(param, "hugepagesz") == 0)) {
size = memparse(val, NULL);
} else if (strcmp(param, "hugepages") == 0) {
if (size != 0) {
if (sscanf(val, "%lu", &npages) <= 0)
npages = 0;
gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
size = 0;
}
}
return 0;
}
/*
* This function allocates physical space for pages that are larger than the
* buddy allocator can handle. We want to allocate these in highmem because
* the amount of lowmem is limited. This means that this function MUST be
* called before lowmem_end_addr is set up in MMU_init() in order for the lmb
* allocate to grab highmem.
*/
void __init reserve_hugetlb_gpages(void)
{
static __initdata char cmdline[COMMAND_LINE_SIZE];
phys_addr_t size, base;
int i;
strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
/*
* Walk gpage list in reverse, allocating larger page sizes first.
* Skip over unsupported sizes, or sizes that have 0 gpages allocated.
* When we reach the point in the list where pages are no longer
* considered gpages, we're done.
*/
for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
continue;
else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
break;
size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
base = memblock_alloc_base(size * gpage_npages[i], size,
MEMBLOCK_ALLOC_ANYWHERE);
add_gpage(base, size, gpage_npages[i]);
}
}
#else /* PPC64 */
/* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy or bootmem allocator is setup.
*/
void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
{ {
if (!addr) if (!addr)
return; return;
...@@ -199,19 +348,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate) ...@@ -199,19 +348,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
m->hstate = hstate; m->hstate = hstate;
return 1; return 1;
} }
#endif
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{ {
return 0; return 0;
} }
#ifdef CONFIG_PPC32
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
struct hugepd_freelist {
struct rcu_head rcu;
unsigned int index;
void *ptes[0];
};
static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
static void hugepd_free_rcu_callback(struct rcu_head *head)
{
struct hugepd_freelist *batch =
container_of(head, struct hugepd_freelist, rcu);
unsigned int i;
for (i = 0; i < batch->index; i++)
kmem_cache_free(hugepte_cache, batch->ptes[i]);
free_page((unsigned long)batch);
}
static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
{
struct hugepd_freelist **batchp;
batchp = &__get_cpu_var(hugepd_freelist_cur);
if (atomic_read(&tlb->mm->mm_users) < 2 ||
cpumask_equal(mm_cpumask(tlb->mm),
cpumask_of(smp_processor_id()))) {
kmem_cache_free(hugepte_cache, hugepte);
return;
}
if (*batchp == NULL) {
*batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
(*batchp)->index = 0;
}
(*batchp)->ptes[(*batchp)->index++] = hugepte;
if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
*batchp = NULL;
}
}
#endif
static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
unsigned long floor, unsigned long ceiling) unsigned long floor, unsigned long ceiling)
{ {
pte_t *hugepte = hugepd_page(*hpdp); pte_t *hugepte = hugepd_page(*hpdp);
unsigned shift = hugepd_shift(*hpdp); int i;
unsigned long pdmask = ~((1UL << pdshift) - 1); unsigned long pdmask = ~((1UL << pdshift) - 1);
unsigned int num_hugepd = 1;
#ifdef CONFIG_PPC64
unsigned int shift = hugepd_shift(*hpdp);
#else
/* Note: On 32-bit the hpdp may be the first of several */
num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
#endif
start &= pdmask; start &= pdmask;
if (start < floor) if (start < floor)
...@@ -224,9 +433,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif ...@@ -224,9 +433,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (end - 1 > ceiling - 1) if (end - 1 > ceiling - 1)
return; return;
for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0; hpdp->pd = 0;
tlb->need_flush = 1; tlb->need_flush = 1;
#ifdef CONFIG_PPC64
pgtable_free_tlb(tlb, hugepte, pdshift - shift); pgtable_free_tlb(tlb, hugepte, pdshift - shift);
#else
hugepd_free(tlb, hugepte);
#endif
} }
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
...@@ -331,18 +546,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, ...@@ -331,18 +546,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
* too. * too.
*/ */
pgd = pgd_offset(tlb->mm, addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
pgd = pgd_offset(tlb->mm, addr);
if (!is_hugepd(pgd)) { if (!is_hugepd(pgd)) {
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
} else { } else {
#ifdef CONFIG_PPC32
/*
* Increment next by the size of the huge mapping since
* on 32-bit there may be more than one entry at the pgd
* level for a single hugepage, but all of them point to
* the same kmem cache that holds the hugepte.
*/
next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
#endif
free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
addr, next, floor, ceiling); addr, next, floor, ceiling);
} }
} while (pgd++, addr = next, addr != end); } while (addr = next, addr != end);
} }
struct page * struct page *
...@@ -466,17 +690,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, ...@@ -466,17 +690,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long len, unsigned long pgoff,
unsigned long flags) unsigned long flags)
{ {
#ifdef CONFIG_MM_SLICES
struct hstate *hstate = hstate_file(file); struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
#else
return get_unmapped_area(file, addr, len, pgoff, flags);
#endif
} }
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{ {
#ifdef CONFIG_MM_SLICES
unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
return 1UL << mmu_psize_to_shift(psize); return 1UL << mmu_psize_to_shift(psize);
#else
if (!is_vm_hugetlb_page(vma))
return PAGE_SIZE;
return huge_page_size(hstate_vma(vma));
#endif
}
static inline bool is_power_of_4(unsigned long x)
{
if (is_power_of_2(x))
return (__ilog2(x) % 2) ? false : true;
return false;
} }
static int __init add_huge_page_size(unsigned long long size) static int __init add_huge_page_size(unsigned long long size)
...@@ -486,9 +728,14 @@ static int __init add_huge_page_size(unsigned long long size) ...@@ -486,9 +728,14 @@ static int __init add_huge_page_size(unsigned long long size)
/* Check that it is a page size supported by the hardware and /* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */ * that it fits within pagetable and slice limits. */
#ifdef CONFIG_PPC_FSL_BOOK3E
if ((size < PAGE_SIZE) || !is_power_of_4(size))
return -EINVAL;
#else
if (!is_power_of_2(size) if (!is_power_of_2(size)
|| (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
return -EINVAL; return -EINVAL;
#endif
if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
return -EINVAL; return -EINVAL;
...@@ -525,6 +772,46 @@ static int __init hugepage_setup_sz(char *str) ...@@ -525,6 +772,46 @@ static int __init hugepage_setup_sz(char *str)
} }
__setup("hugepagesz=", hugepage_setup_sz); __setup("hugepagesz=", hugepage_setup_sz);
#ifdef CONFIG_FSL_BOOKE
struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{
int psize;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
if (!mmu_psize_defs[psize].shift)
continue;
shift = mmu_psize_to_shift(psize);
/* Don't treat normal page sizes as huge... */
if (shift != PAGE_SHIFT)
if (add_huge_page_size(1ULL << shift) < 0)
continue;
}
/*
* Create a kmem cache for hugeptes. The bottom bits in the pte have
* size information encoded in them, so align them to allow this
*/
hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
HUGEPD_SHIFT_MASK + 1, 0, NULL);
if (hugepte_cache == NULL)
panic("%s: Unable to create kmem cache for hugeptes\n",
__func__);
/* Default hpage size = 4M */
if (mmu_psize_defs[MMU_PAGE_4M].shift)
HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
else
panic("%s: Unable to set default huge page size\n", __func__);
return 0;
}
#else
static int __init hugetlbpage_init(void) static int __init hugetlbpage_init(void)
{ {
int psize; int psize;
...@@ -567,15 +854,23 @@ static int __init hugetlbpage_init(void) ...@@ -567,15 +854,23 @@ static int __init hugetlbpage_init(void)
return 0; return 0;
} }
#endif
module_init(hugetlbpage_init); module_init(hugetlbpage_init);
void flush_dcache_icache_hugepage(struct page *page) void flush_dcache_icache_hugepage(struct page *page)
{ {
int i; int i;
void *start;
BUG_ON(!PageCompound(page)); BUG_ON(!PageCompound(page));
for (i = 0; i < (1UL << compound_order(page)); i++) for (i = 0; i < (1UL << compound_order(page)); i++) {
if (!PageHighMem(page)) {
__flush_dcache_icache(page_address(page+i)); __flush_dcache_icache(page_address(page+i));
} else {
start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE);
__flush_dcache_icache(start);
kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
}
}
} }
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/prom.h> #include <asm/prom.h>
...@@ -44,6 +46,7 @@ ...@@ -44,6 +46,7 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/hugetlb.h>
#include "mmu_decl.h" #include "mmu_decl.h"
...@@ -123,6 +126,12 @@ void __init MMU_init(void) ...@@ -123,6 +126,12 @@ void __init MMU_init(void)
/* parse args from command line */ /* parse args from command line */
MMU_setup(); MMU_setup();
/*
* Reserve gigantic pages for hugetlb. This MUST occur before
* lowmem_end_addr is initialized below.
*/
reserve_hugetlb_gpages();
if (memblock.memory.cnt > 1) { if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII #ifndef CONFIG_WII
memblock.memory.cnt = 1; memblock.memory.cnt = 1;
......
...@@ -548,4 +548,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, ...@@ -548,4 +548,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
return; return;
hash_preload(vma->vm_mm, address, access, trap); hash_preload(vma->vm_mm, address, access, trap);
#endif /* CONFIG_PPC_STD_MMU */ #endif /* CONFIG_PPC_STD_MMU */
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
&& defined(CONFIG_HUGETLB_PAGE)
if (is_vm_hugetlb_page(vma))
book3e_hugetlb_preload(vma->vm_mm, address, *ptep);
#endif
} }
...@@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) ...@@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
mm->context.id = MMU_NO_CONTEXT; mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0; mm->context.active = 0;
#ifdef CONFIG_PPC_MM_SLICES
if (slice_mm_new_context(mm))
slice_set_user_psize(mm, mmu_virtual_psize);
#endif
return 0; return 0;
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/tlb.h> #include <asm/tlb.h>
...@@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, ...@@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
entry = set_access_flags_filter(entry, vma, dirty); entry = set_access_flags_filter(entry, vma, dirty);
changed = !pte_same(*(ptep), entry); changed = !pte_same(*(ptep), entry);
if (changed) { if (changed) {
if (!(vma->vm_flags & VM_HUGETLB)) if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address); assert_pte_locked(vma->vm_mm, address);
__ptep_set_access_flags(ptep, entry); __ptep_set_access_flags(ptep, entry);
flush_tlb_page_nohash(vma, address); flush_tlb_page_nohash(vma, address);
......
...@@ -553,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) ...@@ -553,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
clrrdi r10,r11,3 clrrdi r10,r11,3
ldx r15,r10,r15 ldx r15,r10,r15
cmpldi cr0,r15,0 cmpdi cr0,r15,0
beq virt_page_table_tlb_miss_fault bge virt_page_table_tlb_miss_fault
#ifndef CONFIG_PPC_64K_PAGES #ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */ /* Get to PUD entry */
rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3 clrrdi r10,r11,3
ldx r15,r10,r15 ldx r15,r10,r15
cmpldi cr0,r15,0 cmpdi cr0,r15,0
beq virt_page_table_tlb_miss_fault bge virt_page_table_tlb_miss_fault
#endif /* CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */ /* Get to PMD entry */
rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
clrrdi r10,r11,3 clrrdi r10,r11,3
ldx r15,r10,r15 ldx r15,r10,r15
cmpldi cr0,r15,0 cmpdi cr0,r15,0
beq virt_page_table_tlb_miss_fault bge virt_page_table_tlb_miss_fault
/* Ok, we're all right, we can now create a kernel translation for /* Ok, we're all right, we can now create a kernel translation for
* a 4K or 64K page from r16 -> r15. * a 4K or 64K page from r16 -> r15.
...@@ -802,24 +802,24 @@ htw_tlb_miss: ...@@ -802,24 +802,24 @@ htw_tlb_miss:
rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
clrrdi r10,r11,3 clrrdi r10,r11,3
ldx r15,r10,r15 ldx r15,r10,r15
cmpldi cr0,r15,0 cmpdi cr0,r15,0
beq htw_tlb_miss_fault bge htw_tlb_miss_fault
#ifndef CONFIG_PPC_64K_PAGES #ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */ /* Get to PUD entry */
rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3 clrrdi r10,r11,3
ldx r15,r10,r15 ldx r15,r10,r15
cmpldi cr0,r15,0 cmpdi cr0,r15,0
beq htw_tlb_miss_fault bge htw_tlb_miss_fault
#endif /* CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */ /* Get to PMD entry */
rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
clrrdi r10,r11,3 clrrdi r10,r11,3
ldx r15,r10,r15 ldx r15,r10,r15
cmpldi cr0,r15,0 cmpdi cr0,r15,0
beq htw_tlb_miss_fault bge htw_tlb_miss_fault
/* Ok, we're all right, we can now create an indirect entry for /* Ok, we're all right, we can now create an indirect entry for
* a 1M or 256M page. * a 1M or 256M page.
......
...@@ -36,14 +36,49 @@ ...@@ -36,14 +36,49 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/of_fdt.h> #include <linux/of_fdt.h>
#include <linux/hugetlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/code-patching.h> #include <asm/code-patching.h>
#include <asm/hugetlb.h>
#include "mmu_decl.h" #include "mmu_decl.h"
#ifdef CONFIG_PPC_BOOK3E /*
* This struct lists the sw-supported page sizes. The hardawre MMU may support
* other sizes not listed here. The .ind field is only used on MMUs that have
* indirect page table entries.
*/
#ifdef CONFIG_PPC_BOOK3E_MMU
#ifdef CONFIG_FSL_BOOKE
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
.shift = 12,
.enc = BOOK3E_PAGESZ_4K,
},
[MMU_PAGE_4M] = {
.shift = 22,
.enc = BOOK3E_PAGESZ_4M,
},
[MMU_PAGE_16M] = {
.shift = 24,
.enc = BOOK3E_PAGESZ_16M,
},
[MMU_PAGE_64M] = {
.shift = 26,
.enc = BOOK3E_PAGESZ_64M,
},
[MMU_PAGE_256M] = {
.shift = 28,
.enc = BOOK3E_PAGESZ_256M,
},
[MMU_PAGE_1G] = {
.shift = 30,
.enc = BOOK3E_PAGESZ_1GB,
},
};
#else
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = { [MMU_PAGE_4K] = {
.shift = 12, .shift = 12,
...@@ -77,6 +112,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { ...@@ -77,6 +112,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
.enc = BOOK3E_PAGESZ_1GB, .enc = BOOK3E_PAGESZ_1GB,
}, },
}; };
#endif /* CONFIG_FSL_BOOKE */
static inline int mmu_get_tsize(int psize) static inline int mmu_get_tsize(int psize)
{ {
return mmu_psize_defs[psize].enc; return mmu_psize_defs[psize].enc;
...@@ -87,7 +124,7 @@ static inline int mmu_get_tsize(int psize) ...@@ -87,7 +124,7 @@ static inline int mmu_get_tsize(int psize)
/* This isn't used on !Book3E for now */ /* This isn't used on !Book3E for now */
return 0; return 0;
} }
#endif #endif /* CONFIG_PPC_BOOK3E_MMU */
/* The variables below are currently only used on 64-bit Book3E /* The variables below are currently only used on 64-bit Book3E
* though this will probably be made common with other nohash * though this will probably be made common with other nohash
...@@ -266,6 +303,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, ...@@ -266,6 +303,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{ {
#ifdef CONFIG_HUGETLB_PAGE
if (is_vm_hugetlb_page(vma))
flush_hugetlb_page(vma, vmaddr);
#endif
__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
mmu_get_tsize(mmu_virtual_psize), 0); mmu_get_tsize(mmu_virtual_psize), 0);
} }
......
...@@ -69,6 +69,7 @@ config PPC_BOOK3S_64 ...@@ -69,6 +69,7 @@ config PPC_BOOK3S_64
bool "Server processors" bool "Server processors"
select PPC_FPU select PPC_FPU
select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
config PPC_BOOK3E_64 config PPC_BOOK3E_64
bool "Embedded processors" bool "Embedded processors"
...@@ -173,6 +174,7 @@ config BOOKE ...@@ -173,6 +174,7 @@ config BOOKE
config FSL_BOOKE config FSL_BOOKE
bool bool
depends on (E200 || E500) && PPC32 depends on (E200 || E500) && PPC32
select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT
default y default y
# this is for common code between PPC32 & PPC64 FSL BOOKE # this is for common code between PPC32 & PPC64 FSL BOOKE
...@@ -296,7 +298,7 @@ config PPC_BOOK3E_MMU ...@@ -296,7 +298,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES config PPC_MM_SLICES
bool bool
default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES) default y if (PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
default n default n
config VIRT_CPU_ACCOUNTING config VIRT_CPU_ACCOUNTING
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment