Commit f384796c authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Michael Ellerman

powerpc/mm: Add support for handling > 512TB address in SLB miss

For addresses above 512TB we allocate additional mmu contexts. To make
it all easy, addresses above 512TB are handled with IR/DR=1 and with
stack frame setup.

The mmu_context_t is also updated to track the new extended_ids. To
support upto 4PB we need a total 8 contexts.
Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Minor formatting tweaks and comment wording, switch BUG to WARN
      in get_ea_context().]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 0dea04b2
...@@ -11,6 +11,12 @@ ...@@ -11,6 +11,12 @@
#define H_PUD_INDEX_SIZE 9 #define H_PUD_INDEX_SIZE 9
#define H_PGD_INDEX_SIZE 9 #define H_PGD_INDEX_SIZE 9
/*
* Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
* Hence also limit max EA bits to 64TB.
*/
#define MAX_EA_BITS_PER_CONTEXT 46
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
......
...@@ -7,6 +7,12 @@ ...@@ -7,6 +7,12 @@
#define H_PUD_INDEX_SIZE 7 #define H_PUD_INDEX_SIZE 7
#define H_PGD_INDEX_SIZE 8 #define H_PGD_INDEX_SIZE 8
/*
* Each context is 512TB size. SLB miss for first context/default context
* is handled in the hotpath.
*/
#define MAX_EA_BITS_PER_CONTEXT 49
/* /*
* 64k aligned address free up few of the lower bits of RPN for us * 64k aligned address free up few of the lower bits of RPN for us
* We steal that here. For more deatils look at pte_pfn/pfn_pte() * We steal that here. For more deatils look at pte_pfn/pfn_pte()
......
...@@ -91,7 +91,18 @@ struct slice_mask { ...@@ -91,7 +91,18 @@ struct slice_mask {
}; };
typedef struct { typedef struct {
mm_context_id_t id; union {
/*
* We use id as the PIDR content for radix. On hash we can use
* more than one id. The extended ids are used when we start
* having address above 512TB. We allocate one extended id
* for each 512TB. The new id is then used with the 49 bit
* EA to build a new VA. We always use ESID_BITS_1T_MASK bits
* from EA and new context ids to build the new VAs.
*/
mm_context_id_t id;
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
};
u16 user_psize; /* page size index */ u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */ /* Number of bits in the mm_cpumask */
...@@ -196,5 +207,25 @@ extern void radix_init_pseries(void); ...@@ -196,5 +207,25 @@ extern void radix_init_pseries(void);
static inline void radix_init_pseries(void) { }; static inline void radix_init_pseries(void) { };
#endif #endif
static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
{
int index = ea >> MAX_EA_BITS_PER_CONTEXT;
if (likely(index < ARRAY_SIZE(ctx->extended_id)))
return ctx->extended_id[index];
/* should never happen */
WARN_ON(1);
return 0;
}
static inline unsigned long get_user_vsid(mm_context_t *ctx,
unsigned long ea, int ssize)
{
unsigned long context = get_ea_context(ctx, ea);
return get_vsid(context, ea, ssize);
}
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
...@@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void); ...@@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void);
extern void hash__reserve_context_id(int id); extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id); extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { } static inline void mmu_context_init(void) { }
static inline int alloc_extended_context(struct mm_struct *mm,
unsigned long ea)
{
int context_id;
int index = ea >> MAX_EA_BITS_PER_CONTEXT;
context_id = hash__alloc_context_id();
if (context_id < 0)
return context_id;
VM_WARN_ON(mm->context.extended_id[index]);
mm->context.extended_id[index] = context_id;
return context_id;
}
static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
{
int context_id;
context_id = get_ea_context(&mm->context, ea);
if (!context_id)
return true;
return false;
}
#else #else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk); struct task_struct *tsk);
extern unsigned long __init_new_context(void); extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id); extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void); extern void mmu_context_init(void);
static inline int alloc_extended_context(struct mm_struct *mm,
unsigned long ea)
{
/* non book3s_64 should never find this called */
WARN_ON(1);
return -ENOMEM;
}
static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
{
return false;
}
#endif #endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
......
...@@ -119,9 +119,15 @@ void release_thread(struct task_struct *); ...@@ -119,9 +119,15 @@ void release_thread(struct task_struct *);
*/ */
#define TASK_SIZE_USER64 TASK_SIZE_512TB #define TASK_SIZE_USER64 TASK_SIZE_512TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB #define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
#else #else
#define TASK_SIZE_USER64 TASK_SIZE_64TB #define TASK_SIZE_USER64 TASK_SIZE_64TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB #define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
/*
* We don't need to allocate extended context ids for 4K page size, because
* we limit the max effective address on this config to 64TB.
*/
#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
#endif #endif
/* /*
......
...@@ -621,7 +621,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ...@@ -621,7 +621,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10 mtlr r10
beq- 8f /* if bad address, make full stack frame */ /*
* Large address, check whether we have to allocate new contexts.
*/
beq- 8f
bne- cr5,2f /* if unrecoverable exception, oops */ bne- cr5,2f /* if unrecoverable exception, oops */
...@@ -685,7 +688,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ...@@ -685,7 +688,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mr r3,r12 mr r3,r12
mfspr r11,SPRN_SRR0 mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1 mfspr r12,SPRN_SRR1
LOAD_HANDLER(r10,bad_addr_slb) LOAD_HANDLER(r10, large_addr_slb)
mtspr SPRN_SRR0,r10 mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13) ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10 mtspr SPRN_SRR1,r10
...@@ -700,7 +703,7 @@ EXC_COMMON_BEGIN(unrecov_slb) ...@@ -700,7 +703,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
bl unrecoverable_exception bl unrecoverable_exception
b 1b b 1b
EXC_COMMON_BEGIN(bad_addr_slb) EXC_COMMON_BEGIN(large_addr_slb)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11) RECONCILE_IRQ_STATE(r10, r11)
ld r3, PACA_EXSLB+EX_DAR(r13) ld r3, PACA_EXSLB+EX_DAR(r13)
...@@ -710,7 +713,7 @@ EXC_COMMON_BEGIN(bad_addr_slb) ...@@ -710,7 +713,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
std r10, _TRAP(r1) std r10, _TRAP(r1)
2: bl save_nvgprs 2: bl save_nvgprs
addi r3, r1, STACK_FRAME_OVERHEAD addi r3, r1, STACK_FRAME_OVERHEAD
bl slb_miss_bad_addr bl slb_miss_large_addr
b ret_from_except b ret_from_except
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
......
...@@ -1495,18 +1495,6 @@ void alignment_exception(struct pt_regs *regs) ...@@ -1495,18 +1495,6 @@ void alignment_exception(struct pt_regs *regs)
exception_exit(prev_state); exception_exit(prev_state);
} }
void slb_miss_bad_addr(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
if (user_mode(regs))
_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
else
bad_page_fault(regs, regs->dar, SIGSEGV);
exception_exit(prev_state);
}
void StackOverflow(struct pt_regs *regs) void StackOverflow(struct pt_regs *regs)
{ {
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
......
...@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) ...@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
return 1; return 1;
psize = get_slice_psize(mm, ea); psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea); ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize); vsid = get_user_vsid(&mm->context, ea, ssize);
vsidkey = SLB_VSID_USER; vsidkey = SLB_VSID_USER;
break; break;
case VMALLOC_REGION_ID: case VMALLOC_REGION_ID:
......
...@@ -1267,7 +1267,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, ...@@ -1267,7 +1267,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
} }
psize = get_slice_psize(mm, ea); psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea); ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize); vsid = get_user_vsid(&mm->context, ea, ssize);
break; break;
case VMALLOC_REGION_ID: case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize); vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
...@@ -1532,7 +1532,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, ...@@ -1532,7 +1532,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Get VSID */ /* Get VSID */
ssize = user_segment_size(ea); ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize); vsid = get_user_vsid(&mm->context, ea, ssize);
if (!vsid) if (!vsid)
return; return;
/* /*
......
...@@ -179,6 +179,19 @@ void __destroy_context(int context_id) ...@@ -179,6 +179,19 @@ void __destroy_context(int context_id)
} }
EXPORT_SYMBOL_GPL(__destroy_context); EXPORT_SYMBOL_GPL(__destroy_context);
static void destroy_contexts(mm_context_t *ctx)
{
int index, context_id;
spin_lock(&mmu_context_lock);
for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
context_id = ctx->extended_id[index];
if (context_id)
ida_remove(&mmu_context_ida, context_id);
}
spin_unlock(&mmu_context_lock);
}
#ifdef CONFIG_PPC_64K_PAGES #ifdef CONFIG_PPC_64K_PAGES
static void destroy_pagetable_page(struct mm_struct *mm) static void destroy_pagetable_page(struct mm_struct *mm)
{ {
...@@ -217,7 +230,7 @@ void destroy_context(struct mm_struct *mm) ...@@ -217,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
else else
subpage_prot_free(mm); subpage_prot_free(mm);
destroy_pagetable_page(mm); destroy_pagetable_page(mm);
__destroy_context(mm->context.id); destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT; mm->context.id = MMU_NO_CONTEXT;
} }
......
...@@ -320,7 +320,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, ...@@ -320,7 +320,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
if (!is_kernel_addr(addr)) { if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr); ssize = user_segment_size(addr);
vsid = get_vsid(mm->context.id, addr, ssize); vsid = get_user_vsid(&mm->context, addr, ssize);
WARN_ON(vsid == 0); WARN_ON(vsid == 0);
} else { } else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize); vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/context_tracking.h>
#include <linux/mm_types.h> #include <linux/mm_types.h>
#include <asm/udbg.h> #include <asm/udbg.h>
...@@ -340,3 +341,110 @@ void slb_initialize(void) ...@@ -340,3 +341,110 @@ void slb_initialize(void)
asm volatile("isync":::"memory"); asm volatile("isync":::"memory");
} }
static void insert_slb_entry(unsigned long vsid, unsigned long ea,
int bpsize, int ssize)
{
unsigned long flags, vsid_data, esid_data;
enum slb_index index;
int slb_cache_index;
/*
* We are irq disabled, hence should be safe to access PACA.
*/
index = get_paca()->stab_rr;
/*
* simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
*/
if (index < (mmu_slb_size - 1))
index++;
else
index = SLB_NUM_BOLTED;
get_paca()->stab_rr = index;
flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
esid_data = mk_esid_data(ea, ssize, index);
asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
: "memory");
/*
* Now update slb cache entries
*/
slb_cache_index = get_paca()->slb_cache_ptr;
if (slb_cache_index < SLB_CACHE_ENTRIES) {
/*
* We have space in slb cache for optimized switch_slb().
* Top 36 bits from esid_data as per ISA
*/
get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
get_paca()->slb_cache_ptr++;
} else {
/*
* Our cache is full and the current cache content strictly
* doesn't indicate the active SLB conents. Bump the ptr
* so that switch_slb() will ignore the cache.
*/
get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
}
}
static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
{
struct mm_struct *mm = current->mm;
unsigned long vsid;
int bpsize;
/*
* We are always above 1TB, hence use high user segment size.
*/
vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
bpsize = get_slice_psize(mm, ea);
insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
}
void slb_miss_large_addr(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
unsigned long ea = regs->dar;
int context;
if (REGION_ID(ea) != USER_REGION_ID)
goto slb_bad_addr;
/*
* Are we beyound what the page table layout supports ?
*/
if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
goto slb_bad_addr;
/* Lower address should have been handled by asm code */
if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
goto slb_bad_addr;
/*
* consider this as bad access if we take a SLB miss
* on an address above addr limit.
*/
if (ea >= current->mm->context.slb_addr_limit)
goto slb_bad_addr;
context = get_ea_context(&current->mm->context, ea);
if (!context)
goto slb_bad_addr;
handle_multi_context_slb_miss(context, ea);
exception_exit(prev_state);
return;
slb_bad_addr:
if (user_mode(regs))
_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
else
bad_page_fault(regs, ea, SIGSEGV);
exception_exit(prev_state);
}
...@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) ...@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
*/ */
_GLOBAL(slb_allocate) _GLOBAL(slb_allocate)
/* /*
* check for bad kernel/user address * Check if the address falls within the range of the first context, or
* (ea & ~REGION_MASK) >= PGTABLE_RANGE * if we may need to handle multi context. For the first context we
* allocate the slb entry via the fast path below. For large address we
* branch out to C-code and see if additional contexts have been
* allocated.
* The test here is:
* (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
*/ */
rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4) rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
bne- 8f bne- 8f
srdi r9,r3,60 /* get region */ srdi r9,r3,60 /* get region */
......
...@@ -648,6 +648,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, ...@@ -648,6 +648,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
slice_print_mask(" mask", &potential_mask); slice_print_mask(" mask", &potential_mask);
convert: convert:
/*
* Try to allocate the context before we do slice convert
* so that we handle the context allocation failure gracefully.
*/
if (need_extra_context(mm, newaddr)) {
if (alloc_extended_context(mm, newaddr) < 0)
return -ENOMEM;
}
slice_andnot_mask(&potential_mask, &potential_mask, &good_mask); slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
if (compat_maskp && !fixed) if (compat_maskp && !fixed)
slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp); slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
...@@ -658,10 +667,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, ...@@ -658,10 +667,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
if (psize > MMU_PAGE_BASE) if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1); on_each_cpu(slice_flush_segments, mm, 1);
} }
return newaddr;
return_addr: return_addr:
if (need_extra_context(mm, newaddr)) {
if (alloc_extended_context(mm, newaddr) < 0)
return -ENOMEM;
}
return newaddr; return newaddr;
} }
EXPORT_SYMBOL_GPL(slice_get_unmapped_area); EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
......
...@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, ...@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/* Build full vaddr */ /* Build full vaddr */
if (!is_kernel_addr(addr)) { if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr); ssize = user_segment_size(addr);
vsid = get_vsid(mm->context.id, addr, ssize); vsid = get_user_vsid(&mm->context, addr, ssize);
} else { } else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize); vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize; ssize = mmu_kernel_ssize;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment