Commit 0dd5b7b0 authored by David S. Miller's avatar David S. Miller

sparc64: Fix physical memory management regressions with large max_phys_bits.

If max_phys_bits needs to be > 43 (f.e. for T4 chips), things like
DEBUG_PAGEALLOC stop working because the 3-level page tables only
can cover up to 43 bits.

Another problem is that when we increased MAX_PHYS_ADDRESS_BITS up to
47, several statically allocated tables became enormous.

Compounding this is that we will need to support up to 49 bits of
physical addressing for M7 chips.

The two tables in question are sparc64_valid_addr_bitmap and
kpte_linear_bitmap.

The first holds a bitmap, with 1 bit for each 4MB chunk of physical
memory, indicating whether that chunk actually exists in the machine
and is valid.

The second table is a set of 2-bit values which tell how large of a
mapping (4MB, 256MB, 2GB, 16GB, respectively) we can use at each 256MB
chunk of ram in the system.

These tables are huge and take up an enormous amount of the BSS
section of the sparc64 kernel image.  Specifically, the
sparc64_valid_addr_bitmap is 4MB, and the kpte_linear_bitmap is 128K.

So let's solve the space wastage and the DEBUG_PAGEALLOC problem
at the same time, by using the kernel page tables (as designed) to
manage this information.

We have to keep using large mappings when DEBUG_PAGEALLOC is disabled,
and we do this by encoding huge PMDs and PUDs.

On a T4-2 with 256GB of ram the kernel page table takes up 16K with
DEBUG_PAGEALLOC disabled and 256MB with it enabled.  Furthermore, this
memory is dynamically allocated at run time rather than coded
statically into the kernel image.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Acked-by: default avatarBob Picco <bob.picco@oracle.com>
parent 8c82dc0e
...@@ -128,9 +128,6 @@ extern unsigned long PAGE_OFFSET; ...@@ -128,9 +128,6 @@ extern unsigned long PAGE_OFFSET;
*/ */
#define MAX_PHYS_ADDRESS_BITS 47 #define MAX_PHYS_ADDRESS_BITS 47
/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
* and kpte_linear_bitmap.
*/
#define ILOG2_4MB 22 #define ILOG2_4MB 22
#define ILOG2_256MB 28 #define ILOG2_256MB 28
......
...@@ -79,22 +79,7 @@ ...@@ -79,22 +79,7 @@
#include <linux/sched.h> #include <linux/sched.h>
extern unsigned long sparc64_valid_addr_bitmap[]; bool kern_addr_valid(unsigned long addr);
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
static inline bool __kern_addr_valid(unsigned long paddr)
{
if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
return false;
return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
}
static inline bool kern_addr_valid(unsigned long addr)
{
unsigned long paddr = __pa(addr);
return __kern_addr_valid(paddr);
}
/* Entries per page directory level. */ /* Entries per page directory level. */
#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
...@@ -122,6 +107,7 @@ static inline bool kern_addr_valid(unsigned long addr) ...@@ -122,6 +107,7 @@ static inline bool kern_addr_valid(unsigned long addr)
#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
#define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */
#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */
#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE
/* Advertise support for _PAGE_SPECIAL */ /* Advertise support for _PAGE_SPECIAL */
#define __HAVE_ARCH_PTE_SPECIAL #define __HAVE_ARCH_PTE_SPECIAL
...@@ -668,26 +654,26 @@ static inline unsigned long pmd_large(pmd_t pmd) ...@@ -668,26 +654,26 @@ static inline unsigned long pmd_large(pmd_t pmd)
return pte_val(pte) & _PAGE_PMD_HUGE; return pte_val(pte) & _PAGE_PMD_HUGE;
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline unsigned long pmd_pfn(pmd_t pmd)
static inline unsigned long pmd_young(pmd_t pmd)
{ {
pte_t pte = __pte(pmd_val(pmd)); pte_t pte = __pte(pmd_val(pmd));
return pte_young(pte); return pte_pfn(pte);
} }
static inline unsigned long pmd_write(pmd_t pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline unsigned long pmd_young(pmd_t pmd)
{ {
pte_t pte = __pte(pmd_val(pmd)); pte_t pte = __pte(pmd_val(pmd));
return pte_write(pte); return pte_young(pte);
} }
static inline unsigned long pmd_pfn(pmd_t pmd) static inline unsigned long pmd_write(pmd_t pmd)
{ {
pte_t pte = __pte(pmd_val(pmd)); pte_t pte = __pte(pmd_val(pmd));
return pte_pfn(pte); return pte_write(pte);
} }
static inline unsigned long pmd_trans_huge(pmd_t pmd) static inline unsigned long pmd_trans_huge(pmd_t pmd)
...@@ -781,18 +767,15 @@ static inline int pmd_present(pmd_t pmd) ...@@ -781,18 +767,15 @@ static inline int pmd_present(pmd_t pmd)
* the top bits outside of the range of any physical address size we * the top bits outside of the range of any physical address size we
* support are clear as well. We also validate the physical itself. * support are clear as well. We also validate the physical itself.
*/ */
#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \ #define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK)
!__kern_addr_valid(pmd_val(pmd)))
#define pud_none(pud) (!pud_val(pud)) #define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \ #define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK)
!__kern_addr_valid(pud_val(pud)))
#define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_none(pgd) (!pgd_val(pgd))
#define pgd_bad(pgd) ((pgd_val(pgd) & ~PAGE_MASK) || \ #define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK)
!__kern_addr_valid(pgd_val(pgd)))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
void set_pmd_at(struct mm_struct *mm, unsigned long addr, void set_pmd_at(struct mm_struct *mm, unsigned long addr,
...@@ -835,6 +818,20 @@ static inline unsigned long __pmd_page(pmd_t pmd) ...@@ -835,6 +818,20 @@ static inline unsigned long __pmd_page(pmd_t pmd)
#define pgd_present(pgd) (pgd_val(pgd) != 0U) #define pgd_present(pgd) (pgd_val(pgd) != 0U)
#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) #define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL)
static inline unsigned long pud_large(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
return pte_val(pte) & _PAGE_PMD_HUGE;
}
static inline unsigned long pud_pfn(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
return pte_pfn(pte);
}
/* Same in both SUN4V and SUN4U. */ /* Same in both SUN4V and SUN4U. */
#define pte_none(pte) (!pte_val(pte)) #define pte_none(pte) (!pte_val(pte))
......
...@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; ...@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
sub TSB, 0x8, TSB; \ sub TSB, 0x8, TSB; \
TSB_STORE(TSB, TAG); TSB_STORE(TSB, TAG);
/* Do a kernel page table walk. Leaves physical PTE pointer in /* Do a kernel page table walk. Leaves valid PTE value in
* REG1. Jumps to FAIL_LABEL on early page table walk termination. * REG1. Jumps to FAIL_LABEL on early page table walk
* VADDR will not be clobbered, but REG2 will. * termination. VADDR will not be clobbered, but REG2 will.
*
* There are two masks we must apply to propagate bits from
* the virtual address into the PTE physical address field
* when dealing with huge pages. This is because the page
* table boundaries do not match the huge page size(s) the
* hardware supports.
*
* In these cases we propagate the bits that are below the
* page table level where we saw the huge page mapping, but
* are still within the relevant physical bits for the huge
* page size in question. So for PMD mappings (which fall on
* bit 23, for 8MB per PMD) we must propagate bit 22 for a
* 4MB huge page. For huge PUDs (which fall on bit 33, for
* 8GB per PUD), we have to accomodate 256MB and 2GB huge
* pages. So for those we propagate bits 32 to 28.
*/ */
#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
sethi %hi(swapper_pg_dir), REG1; \ sethi %hi(swapper_pg_dir), REG1; \
...@@ -150,15 +165,35 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; ...@@ -150,15 +165,35 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
andn REG2, 0x7, REG2; \ andn REG2, 0x7, REG2; \
ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \ brz,pn REG1, FAIL_LABEL; \
sethi %uhi(_PAGE_PUD_HUGE), REG2; \
brz,pn REG1, FAIL_LABEL; \
sllx REG2, 32, REG2; \
andcc REG1, REG2, %g0; \
sethi %hi(0xf8000000), REG2; \
bne,pt %xcc, 697f; \
sllx REG2, 1, REG2; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \ andn REG2, 0x7, REG2; \
ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
sethi %uhi(_PAGE_PMD_HUGE), REG2; \
brz,pn REG1, FAIL_LABEL; \ brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \ sllx REG2, 32, REG2; \
andcc REG1, REG2, %g0; \
be,pn %xcc, 698f; \
sethi %hi(0x400000), REG2; \
697: brgez,pn REG1, FAIL_LABEL; \
andn REG1, REG2, REG1; \
and VADDR, REG2, REG2; \
ba,pt %xcc, 699f; \
or REG1, REG2, REG1; \
698: sllx VADDR, 64 - PMD_SHIFT, REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \ andn REG2, 0x7, REG2; \
add REG1, REG2, REG1; ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brgez,pn REG1, FAIL_LABEL; \
nop; \
699:
/* PMD has been loaded into REG1, interpret the value, seeing /* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid * if it is a HUGE PMD or a normal one. If it is not valid
......
...@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr: ...@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
TSB_LOCK_TAG(%g1, %g2, %g7) TSB_LOCK_TAG(%g1, %g2, %g7)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
mov 1, %g7
sllx %g7, TSB_TAG_INVALID_BIT, %g7
brgez,a,pn %g5, kvmap_itlb_longpath
TSB_STORE(%g1, %g7)
TSB_WRITE(%g1, %g5, %g6) TSB_WRITE(%g1, %g5, %g6)
/* fallthrough to TLB load */ /* fallthrough to TLB load */
...@@ -118,6 +110,12 @@ kvmap_dtlb_obp: ...@@ -118,6 +110,12 @@ kvmap_dtlb_obp:
ba,pt %xcc, kvmap_dtlb_load ba,pt %xcc, kvmap_dtlb_load
nop nop
kvmap_linear_early:
sethi %hi(kern_linear_pte_xor), %g7
ldx [%g7 + %lo(kern_linear_pte_xor)], %g2
ba,pt %xcc, kvmap_dtlb_tsb4m_load
xor %g2, %g4, %g5
.align 32 .align 32
kvmap_dtlb_tsb4m_load: kvmap_dtlb_tsb4m_load:
TSB_LOCK_TAG(%g1, %g2, %g7) TSB_LOCK_TAG(%g1, %g2, %g7)
...@@ -146,105 +144,17 @@ kvmap_dtlb_4v: ...@@ -146,105 +144,17 @@ kvmap_dtlb_4v:
/* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */
KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
#endif #endif
/* TSB entry address left in %g1, lookup linear PTE. /* Linear mapping TSB lookup failed. Fallthrough to kernel
* Must preserve %g1 and %g6 (TAG). * page table based lookup.
*/
kvmap_dtlb_tsb4m_miss:
/* Clear the PAGE_OFFSET top virtual bits, shift
* down to get PFN, and make sure PFN is in range.
*/
661: sllx %g4, 0, %g5
.section .page_offset_shift_patch, "ax"
.word 661b
.previous
/* Check to see if we know about valid memory at the 4MB
* chunk this physical address will reside within.
*/ */
661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2
.section .page_offset_shift_patch, "ax"
.word 661b
.previous
brnz,pn %g2, kvmap_dtlb_longpath
nop
/* This unconditional branch and delay-slot nop gets patched
* by the sethi sequence once the bitmap is properly setup.
*/
.globl valid_addr_bitmap_insn
valid_addr_bitmap_insn:
ba,pt %xcc, 2f
nop
.subsection 2
.globl valid_addr_bitmap_patch
valid_addr_bitmap_patch:
sethi %hi(sparc64_valid_addr_bitmap), %g7
or %g7, %lo(sparc64_valid_addr_bitmap), %g7
.previous
661: srlx %g5, ILOG2_4MB, %g2
.section .page_offset_shift_patch, "ax"
.word 661b
.previous
srlx %g2, 6, %g5
and %g2, 63, %g2
sllx %g5, 3, %g5
ldx [%g7 + %g5], %g5
mov 1, %g7
sllx %g7, %g2, %g7
andcc %g5, %g7, %g0
be,pn %xcc, kvmap_dtlb_longpath
2: sethi %hi(kpte_linear_bitmap), %g2
/* Get the 256MB physical address index. */
661: sllx %g4, 0, %g5
.section .page_offset_shift_patch, "ax"
.word 661b
.previous
or %g2, %lo(kpte_linear_bitmap), %g2
661: srlx %g5, ILOG2_256MB, %g5
.section .page_offset_shift_patch, "ax"
.word 661b
.previous
and %g5, (32 - 1), %g7
/* Divide by 32 to get the offset into the bitmask. */
srlx %g5, 5, %g5
add %g7, %g7, %g7
sllx %g5, 3, %g5
/* kern_linear_pte_xor[(mask >> shift) & 3)] */
ldx [%g2 + %g5], %g2
srlx %g2, %g7, %g7
sethi %hi(kern_linear_pte_xor), %g5
and %g7, 3, %g7
or %g5, %lo(kern_linear_pte_xor), %g5
sllx %g7, 3, %g7
ldx [%g5 + %g7], %g2
.globl kvmap_linear_patch .globl kvmap_linear_patch
kvmap_linear_patch: kvmap_linear_patch:
ba,pt %xcc, kvmap_dtlb_tsb4m_load ba,a,pt %xcc, kvmap_linear_early
xor %g2, %g4, %g5
kvmap_dtlb_vmalloc_addr: kvmap_dtlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
TSB_LOCK_TAG(%g1, %g2, %g7) TSB_LOCK_TAG(%g1, %g2, %g7)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
mov 1, %g7
sllx %g7, TSB_TAG_INVALID_BIT, %g7
brgez,a,pn %g5, kvmap_dtlb_longpath
TSB_STORE(%g1, %g7)
TSB_WRITE(%g1, %g5, %g6) TSB_WRITE(%g1, %g5, %g6)
/* fallthrough to TLB load */ /* fallthrough to TLB load */
......
...@@ -122,11 +122,6 @@ SECTIONS ...@@ -122,11 +122,6 @@ SECTIONS
*(.swapper_4m_tsb_phys_patch) *(.swapper_4m_tsb_phys_patch)
__swapper_4m_tsb_phys_patch_end = .; __swapper_4m_tsb_phys_patch_end = .;
} }
.page_offset_shift_patch : {
__page_offset_shift_patch = .;
*(.page_offset_shift_patch)
__page_offset_shift_patch_end = .;
}
.popc_3insn_patch : { .popc_3insn_patch : {
__popc_3insn_patch = .; __popc_3insn_patch = .;
*(.popc_3insn_patch) *(.popc_3insn_patch)
......
...@@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly; ...@@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly;
* 'cpu' properties, but we need to have this table setup before the * 'cpu' properties, but we need to have this table setup before the
* MDESC is initialized. * MDESC is initialized.
*/ */
unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
#ifndef CONFIG_DEBUG_PAGEALLOC #ifndef CONFIG_DEBUG_PAGEALLOC
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
...@@ -84,6 +83,7 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; ...@@ -84,6 +83,7 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
*/ */
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
#endif #endif
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
static unsigned long cpu_pgsz_mask; static unsigned long cpu_pgsz_mask;
...@@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property, ...@@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property,
cmp_p64, NULL); cmp_p64, NULL);
} }
unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
sizeof(unsigned long)];
EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
/* Kernel physical address base and size in bytes. */ /* Kernel physical address base and size in bytes. */
unsigned long kern_base __read_mostly; unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly; unsigned long kern_size __read_mostly;
...@@ -1369,9 +1365,145 @@ static unsigned long __init bootmem_init(unsigned long phys_base) ...@@ -1369,9 +1365,145 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
static struct linux_prom64_registers pall[MAX_BANKS] __initdata; static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
static int pall_ents __initdata; static int pall_ents __initdata;
#ifdef CONFIG_DEBUG_PAGEALLOC static unsigned long max_phys_bits = 40;
bool kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> max_phys_bits;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (above != 0 && above != -1UL)
return false;
if (addr >= (unsigned long) KERNBASE &&
addr < (unsigned long)&_end)
return true;
if (addr >= PAGE_OFFSET) {
unsigned long pa = __pa(addr);
return pfn_valid(pa >> PAGE_SHIFT);
}
pgd = pgd_offset_k(addr);
if (pgd_none(*pgd))
return 0;
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
return 0;
if (pud_large(*pud))
return pfn_valid(pud_pfn(*pud));
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
if (pmd_large(*pmd))
return pfn_valid(pmd_pfn(*pmd));
pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte))
return 0;
return pfn_valid(pte_pfn(*pte));
}
EXPORT_SYMBOL(kern_addr_valid);
static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
unsigned long vend,
pud_t *pud)
{
const unsigned long mask16gb = (1UL << 34) - 1UL;
u64 pte_val = vstart;
/* Each PUD is 8GB */
if ((vstart & mask16gb) ||
(vend - vstart <= mask16gb)) {
pte_val ^= kern_linear_pte_xor[2];
pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
return vstart + PUD_SIZE;
}
pte_val ^= kern_linear_pte_xor[3];
pte_val |= _PAGE_PUD_HUGE;
vend = vstart + mask16gb + 1UL;
while (vstart < vend) {
pud_val(*pud) = pte_val;
pte_val += PUD_SIZE;
vstart += PUD_SIZE;
pud++;
}
return vstart;
}
static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
bool guard)
{
if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
return true;
return false;
}
static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
unsigned long vend,
pmd_t *pmd)
{
const unsigned long mask256mb = (1UL << 28) - 1UL;
const unsigned long mask2gb = (1UL << 31) - 1UL;
u64 pte_val = vstart;
/* Each PMD is 8MB */
if ((vstart & mask256mb) ||
(vend - vstart <= mask256mb)) {
pte_val ^= kern_linear_pte_xor[0];
pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
return vstart + PMD_SIZE;
}
if ((vstart & mask2gb) ||
(vend - vstart <= mask2gb)) {
pte_val ^= kern_linear_pte_xor[1];
pte_val |= _PAGE_PMD_HUGE;
vend = vstart + mask256mb + 1UL;
} else {
pte_val ^= kern_linear_pte_xor[2];
pte_val |= _PAGE_PMD_HUGE;
vend = vstart + mask2gb + 1UL;
}
while (vstart < vend) {
pmd_val(*pmd) = pte_val;
pte_val += PMD_SIZE;
vstart += PMD_SIZE;
pmd++;
}
return vstart;
}
static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
bool guard)
{
if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
return true;
return false;
}
static unsigned long __ref kernel_map_range(unsigned long pstart, static unsigned long __ref kernel_map_range(unsigned long pstart,
unsigned long pend, pgprot_t prot) unsigned long pend, pgprot_t prot,
bool use_huge)
{ {
unsigned long vstart = PAGE_OFFSET + pstart; unsigned long vstart = PAGE_OFFSET + pstart;
unsigned long vend = PAGE_OFFSET + pend; unsigned long vend = PAGE_OFFSET + pend;
...@@ -1401,15 +1533,23 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, ...@@ -1401,15 +1533,23 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
if (pud_none(*pud)) { if (pud_none(*pud)) {
pmd_t *new; pmd_t *new;
if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
vstart = kernel_map_hugepud(vstart, vend, pud);
continue;
}
new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
alloc_bytes += PAGE_SIZE; alloc_bytes += PAGE_SIZE;
pud_populate(&init_mm, pud, new); pud_populate(&init_mm, pud, new);
} }
pmd = pmd_offset(pud, vstart); pmd = pmd_offset(pud, vstart);
if (!pmd_present(*pmd)) { if (pmd_none(*pmd)) {
pte_t *new; pte_t *new;
if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
vstart = kernel_map_hugepmd(vstart, vend, pmd);
continue;
}
new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
alloc_bytes += PAGE_SIZE; alloc_bytes += PAGE_SIZE;
pmd_populate_kernel(&init_mm, pmd, new); pmd_populate_kernel(&init_mm, pmd, new);
...@@ -1432,100 +1572,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, ...@@ -1432,100 +1572,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
return alloc_bytes; return alloc_bytes;
} }
extern unsigned int kvmap_linear_patch[1]; static void __init flush_all_kernel_tsbs(void)
#endif /* CONFIG_DEBUG_PAGEALLOC */
static void __init kpte_set_val(unsigned long index, unsigned long val)
{
unsigned long *ptr = kpte_linear_bitmap;
val <<= ((index % (BITS_PER_LONG / 2)) * 2);
ptr += (index / (BITS_PER_LONG / 2));
*ptr |= val;
}
static const unsigned long kpte_shift_min = 28; /* 256MB */
static const unsigned long kpte_shift_max = 34; /* 16GB */
static const unsigned long kpte_shift_incr = 3;
static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
unsigned long shift)
{
unsigned long size = (1UL << shift);
unsigned long mask = (size - 1UL);
unsigned long remains = end - start;
unsigned long val;
if (remains < size || (start & mask))
return start;
/* VAL maps:
*
* shift 28 --> kern_linear_pte_xor index 1
* shift 31 --> kern_linear_pte_xor index 2
* shift 34 --> kern_linear_pte_xor index 3
*/
val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
remains &= ~mask;
if (shift != kpte_shift_max)
remains = size;
while (remains) {
unsigned long index = start >> kpte_shift_min;
kpte_set_val(index, val);
start += 1UL << kpte_shift_min;
remains -= 1UL << kpte_shift_min;
}
return start;
}
static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
{ {
unsigned long smallest_size, smallest_mask; int i;
unsigned long s;
smallest_size = (1UL << kpte_shift_min);
smallest_mask = (smallest_size - 1UL);
while (start < end) {
unsigned long orig_start = start;
for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
start = kpte_mark_using_shift(start, end, s); struct tsb *ent = &swapper_tsb[i];
if (start != orig_start) ent->tag = (1UL << TSB_TAG_INVALID_BIT);
break;
} }
#ifndef CONFIG_DEBUG_PAGEALLOC
for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
struct tsb *ent = &swapper_4m_tsb[i];
if (start == orig_start) ent->tag = (1UL << TSB_TAG_INVALID_BIT);
start = (start + smallest_size) & ~smallest_mask;
} }
#endif
} }
static void __init init_kpte_bitmap(void) extern unsigned int kvmap_linear_patch[1];
{
unsigned long i;
for (i = 0; i < pall_ents; i++) {
unsigned long phys_start, phys_end;
phys_start = pall[i].phys_addr;
phys_end = phys_start + pall[i].reg_size;
mark_kpte_bitmap(phys_start, phys_end);
}
}
static void __init kernel_physical_mapping_init(void) static void __init kernel_physical_mapping_init(void)
{ {
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned long i, mem_alloced = 0UL; unsigned long i, mem_alloced = 0UL;
bool use_huge = true;
#ifdef CONFIG_DEBUG_PAGEALLOC
use_huge = false;
#endif
for (i = 0; i < pall_ents; i++) { for (i = 0; i < pall_ents; i++) {
unsigned long phys_start, phys_end; unsigned long phys_start, phys_end;
...@@ -1533,7 +1607,7 @@ static void __init kernel_physical_mapping_init(void) ...@@ -1533,7 +1607,7 @@ static void __init kernel_physical_mapping_init(void)
phys_end = phys_start + pall[i].reg_size; phys_end = phys_start + pall[i].reg_size;
mem_alloced += kernel_map_range(phys_start, phys_end, mem_alloced += kernel_map_range(phys_start, phys_end,
PAGE_KERNEL); PAGE_KERNEL, use_huge);
} }
printk("Allocated %ld bytes for kernel page tables.\n", printk("Allocated %ld bytes for kernel page tables.\n",
...@@ -1542,8 +1616,9 @@ static void __init kernel_physical_mapping_init(void) ...@@ -1542,8 +1616,9 @@ static void __init kernel_physical_mapping_init(void)
kvmap_linear_patch[0] = 0x01000000; /* nop */ kvmap_linear_patch[0] = 0x01000000; /* nop */
flushi(&kvmap_linear_patch[0]); flushi(&kvmap_linear_patch[0]);
flush_all_kernel_tsbs();
__flush_tlb_all(); __flush_tlb_all();
#endif
} }
#ifdef CONFIG_DEBUG_PAGEALLOC #ifdef CONFIG_DEBUG_PAGEALLOC
...@@ -1553,7 +1628,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) ...@@ -1553,7 +1628,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
kernel_map_range(phys_start, phys_end, kernel_map_range(phys_start, phys_end,
(enable ? PAGE_KERNEL : __pgprot(0))); (enable ? PAGE_KERNEL : __pgprot(0)), false);
flush_tsb_kernel_range(PAGE_OFFSET + phys_start, flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
PAGE_OFFSET + phys_end); PAGE_OFFSET + phys_end);
...@@ -1581,62 +1656,11 @@ unsigned long __init find_ecache_flush_span(unsigned long size) ...@@ -1581,62 +1656,11 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
unsigned long PAGE_OFFSET; unsigned long PAGE_OFFSET;
EXPORT_SYMBOL(PAGE_OFFSET); EXPORT_SYMBOL(PAGE_OFFSET);
static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
{
unsigned long final_shift;
unsigned int val = *insn;
unsigned int cnt;
/* We are patching in ilog2(max_supported_phys_address), and
* we are doing so in a manner similar to a relocation addend.
* That is, we are adding the shift value to whatever value
* is in the shift instruction count field already.
*/
cnt = (val & 0x3f);
val &= ~0x3f;
/* If we are trying to shift >= 64 bits, clear the destination
* register. This can happen when phys_bits ends up being equal
* to MAX_PHYS_ADDRESS_BITS.
*/
final_shift = (cnt + (64 - phys_bits));
if (final_shift >= 64) {
unsigned int rd = (val >> 25) & 0x1f;
val = 0x80100000 | (rd << 25);
} else {
val |= final_shift;
}
*insn = val;
__asm__ __volatile__("flush %0"
: /* no outputs */
: "r" (insn));
}
static void __init page_offset_shift_patch(unsigned long phys_bits)
{
extern unsigned int __page_offset_shift_patch;
extern unsigned int __page_offset_shift_patch_end;
unsigned int *p;
p = &__page_offset_shift_patch;
while (p < &__page_offset_shift_patch_end) {
unsigned int *insn = (unsigned int *)(unsigned long)*p;
page_offset_shift_patch_one(insn, phys_bits);
p++;
}
}
unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; unsigned long sparc64_va_hole_top = 0xfffff80000000000UL;
unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
static void __init setup_page_offset(void) static void __init setup_page_offset(void)
{ {
unsigned long max_phys_bits = 40;
if (tlb_type == cheetah || tlb_type == cheetah_plus) { if (tlb_type == cheetah || tlb_type == cheetah_plus) {
/* Cheetah/Panther support a full 64-bit virtual /* Cheetah/Panther support a full 64-bit virtual
* address, so we can use all that our page tables * address, so we can use all that our page tables
...@@ -1685,8 +1709,6 @@ static void __init setup_page_offset(void) ...@@ -1685,8 +1709,6 @@ static void __init setup_page_offset(void)
pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
PAGE_OFFSET, max_phys_bits); PAGE_OFFSET, max_phys_bits);
page_offset_shift_patch(max_phys_bits);
} }
static void __init tsb_phys_patch(void) static void __init tsb_phys_patch(void)
...@@ -1731,7 +1753,6 @@ static void __init tsb_phys_patch(void) ...@@ -1731,7 +1753,6 @@ static void __init tsb_phys_patch(void)
#define NUM_KTSB_DESCR 1 #define NUM_KTSB_DESCR 1
#endif #endif
static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
/* The swapper TSBs are loaded with a base sequence of: /* The swapper TSBs are loaded with a base sequence of:
* *
...@@ -2080,8 +2101,6 @@ void __init paging_init(void) ...@@ -2080,8 +2101,6 @@ void __init paging_init(void)
inherit_prom_mappings(); inherit_prom_mappings();
init_kpte_bitmap();
/* Ok, we can use our TLB miss and window trap handlers safely. */ /* Ok, we can use our TLB miss and window trap handlers safely. */
setup_tba(); setup_tba();
...@@ -2188,70 +2207,6 @@ int page_in_phys_avail(unsigned long paddr) ...@@ -2188,70 +2207,6 @@ int page_in_phys_avail(unsigned long paddr)
return 0; return 0;
} }
static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
static int pavail_rescan_ents __initdata;
/* Certain OBP calls, such as fetching "available" properties, can
* claim physical memory. So, along with initializing the valid
* address bitmap, what we do here is refetch the physical available
* memory list again, and make sure it provides at least as much
* memory as 'pavail' does.
*/
static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
{
int i;
read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
for (i = 0; i < pavail_ents; i++) {
unsigned long old_start, old_end;
old_start = pavail[i].phys_addr;
old_end = old_start + pavail[i].reg_size;
while (old_start < old_end) {
int n;
for (n = 0; n < pavail_rescan_ents; n++) {
unsigned long new_start, new_end;
new_start = pavail_rescan[n].phys_addr;
new_end = new_start +
pavail_rescan[n].reg_size;
if (new_start <= old_start &&
new_end >= (old_start + PAGE_SIZE)) {
set_bit(old_start >> ILOG2_4MB, bitmap);
goto do_next_page;
}
}
prom_printf("mem_init: Lost memory in pavail\n");
prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
pavail[i].phys_addr,
pavail[i].reg_size);
prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
pavail_rescan[i].phys_addr,
pavail_rescan[i].reg_size);
prom_printf("mem_init: Cannot continue, aborting.\n");
prom_halt();
do_next_page:
old_start += PAGE_SIZE;
}
}
}
static void __init patch_tlb_miss_handler_bitmap(void)
{
extern unsigned int valid_addr_bitmap_insn[];
extern unsigned int valid_addr_bitmap_patch[];
valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
mb();
valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
flushi(&valid_addr_bitmap_insn[0]);
}
static void __init register_page_bootmem_info(void) static void __init register_page_bootmem_info(void)
{ {
#ifdef CONFIG_NEED_MULTIPLE_NODES #ifdef CONFIG_NEED_MULTIPLE_NODES
...@@ -2264,18 +2219,6 @@ static void __init register_page_bootmem_info(void) ...@@ -2264,18 +2219,6 @@ static void __init register_page_bootmem_info(void)
} }
void __init mem_init(void) void __init mem_init(void)
{ {
unsigned long addr, last;
addr = PAGE_OFFSET + kern_base;
last = PAGE_ALIGN(kern_size) + addr;
while (addr < last) {
set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
addr += PAGE_SIZE;
}
setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
patch_tlb_miss_handler_bitmap();
high_memory = __va(last_valid_pfn << PAGE_SHIFT); high_memory = __va(last_valid_pfn << PAGE_SHIFT);
register_page_bootmem_info(); register_page_bootmem_info();
......
...@@ -8,15 +8,8 @@ ...@@ -8,15 +8,8 @@
*/ */
#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) #define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS)
#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
#define KPTE_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
#define VALID_ADDR_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
extern unsigned long kern_linear_pte_xor[4]; extern unsigned long kern_linear_pte_xor[4];
extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned int sparc64_highest_unlocked_tlb_ent;
extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_context;
extern unsigned long sparc64_kern_pri_nuc_bits; extern unsigned long sparc64_kern_pri_nuc_bits;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment