Commit 18c8a844 authored by Anton Blanchard's avatar Anton Blanchard

ppc64: hashtable management rework for SMP scalability

get rid of global lock on hpte insert and remove, use a software bit
 for pSeries and the relevant interfaces for pSeries LPAR to avoid
 races.
preload hpte entries in update_mmu_cache.
keep cpu_vm_mask and use tlbiel when mm has only run locally
batch tlb flushes where possible.
add large page support in preparation for generic large page support.
Remove HPTENOIX, we always put slot information into linux ptes now.
Note: pSeries and pSeries LPAR so far, iSeries coming next.
parent 41d64318
......@@ -27,7 +27,7 @@ obj-y := ppc_ksyms.o setup.o entry.o traps.o irq.o idle.o \
pmc.o mf_proc.o proc_pmc.o iSeries_setup.o \
ItLpQueue.o hvCall.o mf.o HvLpEvent.o ras.o \
iSeries_proc.o HvCall.o HvLpConfig.o \
rtc.o init_task.o
rtc.o init_task.o pSeries_htab.o
obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o
obj-$(CONFIG_PPC_EEH) += eeh.o
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -253,7 +253,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags)
* entry in the hardware page table.
*/
vsid = get_kernel_vsid(ea);
make_pte(htab_data.htab,
ppc_md.make_pte(htab_data.htab,
(vsid << 28) | (ea & 0xFFFFFFF), // va (NOT the ea)
pa,
_PAGE_NO_CACHE | _PAGE_GUARDED | PP_RWXX,
......@@ -261,30 +261,24 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags)
}
}
void
local_flush_tlb_all(void)
{
/* Implemented to just flush the vmalloc area.
* vmalloc is the only user of flush_tlb_all.
*/
local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END );
}
void
local_flush_tlb_mm(struct mm_struct *mm)
{
if ( mm->map_count ) {
if (mm->map_count) {
struct vm_area_struct *mp;
for ( mp = mm->mmap; mp != NULL; mp = mp->vm_next )
local_flush_tlb_range( mm, mp->vm_start, mp->vm_end );
}
else /* MIKEC: It is not clear why this is needed */
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
local_flush_tlb_range(mm, mp->vm_start, mp->vm_end);
} else {
/* MIKEC: It is not clear why this is needed */
/* paulus: it is needed to clear out stale HPTEs
* when an address space (represented by an mm_struct)
* is being destroyed. */
local_flush_tlb_range( mm, USER_START, USER_END );
}
local_flush_tlb_range(mm, USER_START, USER_END);
}
/* XXX are there races with checking cpu_vm_mask? - Anton */
mm->cpu_vm_mask = 0;
}
/*
* Callers should hold the mm->page_table_lock
......@@ -297,7 +291,9 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
pmd_t *pmd;
pte_t *ptep;
pte_t pte;
unsigned long flags;
int local = 0;
switch( REGION_ID(vmaddr) ) {
case VMALLOC_REGION_ID:
pgd = pgd_offset_k( vmaddr );
......@@ -308,13 +304,17 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
case USER_REGION_ID:
pgd = pgd_offset( vma->vm_mm, vmaddr );
context = vma->vm_mm->context;
/* XXX are there races with checking cpu_vm_mask? - Anton */
if (vma->vm_mm->cpu_vm_mask == (1 << smp_processor_id()))
local = 1;
break;
default:
panic("local_flush_tlb_page: invalid region 0x%016lx", vmaddr);
}
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, vmaddr);
if (!pmd_none(*pmd)) {
......@@ -322,12 +322,14 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
/* Check if HPTE might exist and flush it if so */
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if ( pte_val(pte) & _PAGE_HASHPTE ) {
flush_hash_page(context, vmaddr, pte);
flush_hash_page(context, vmaddr, pte, local);
}
}
}
}
struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH];
void
local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
{
......@@ -337,6 +339,10 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
pte_t pte;
unsigned long pgd_end, pmd_end;
unsigned long context;
unsigned long flags;
int i = 0;
struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0];
int local = 0;
if ( start >= end )
panic("flush_tlb_range: start (%016lx) greater than end (%016lx)\n", start, end );
......@@ -356,6 +362,12 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
case USER_REGION_ID:
pgd = pgd_offset( mm, start );
context = mm->context;
/* XXX are there races with checking cpu_vm_mask? - Anton */
if (mm->cpu_vm_mask == (1 << smp_processor_id())) {
local = 1;
}
break;
default:
panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
......@@ -377,8 +389,17 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
do {
if ( pte_val(*ptep) & _PAGE_HASHPTE ) {
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if ( pte_val(pte) & _PAGE_HASHPTE )
flush_hash_page( context, start, pte );
if ( pte_val(pte) & _PAGE_HASHPTE ) {
ptes->pte = pte;
ptes->addr = start;
ptes++;
i++;
if (i == MAX_BATCH_FLUSH) {
flush_hash_range(context, MAX_BATCH_FLUSH, local);
i = 0;
ptes = &tlb_batch_array[smp_processor_id()][0];
}
}
}
start += PAGE_SIZE;
++ptep;
......@@ -393,6 +414,9 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
start = pgd_end;
++pgd;
} while ( start < end );
if (i)
flush_hash_range(context, i, local);
}
......@@ -643,3 +667,30 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
flush_icache_range(maddr, maddr + len);
}
/*
* This is called at the end of handling a user page fault, when the
* fault has been handled by updating a PTE in the linux page tables.
* We use it to preload an HPTE into the hash table corresponding to
* the updated linux PTE.
*/
void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
pte_t pte)
{
unsigned long vsid;
void *pgdir;
pte_t *ptep;
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte))
return;
pgdir = vma->vm_mm->pgd;
if (pgdir == NULL)
return;
ptep = find_linux_pte(pgdir, ea);
vsid = get_vsid(vma->vm_mm->context, ea);
__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep);
}
......@@ -24,26 +24,33 @@ struct machdep_calls {
/* High use functions in the first cachelines, low use functions
* follow. DRENG collect profile data.
*/
void (*hpte_invalidate)(unsigned long slot);
void (*hpte_updatepp)(long slot,
void (*hpte_invalidate)(unsigned long slot,
unsigned long va,
int large,
int local);
long (*hpte_updatepp)(unsigned long slot,
unsigned long newpp,
unsigned long va);
unsigned long va,
int large);
void (*hpte_updateboltedpp)(unsigned long newpp,
unsigned long ea);
unsigned long (*hpte_getword0)(unsigned long slot);
long (*hpte_find)( unsigned long vpn );
long (*hpte_selectslot)(unsigned long vpn);
long (*insert_hpte)(unsigned long hpte_group,
unsigned long vpn,
unsigned long prpn,
int secondary,
unsigned long hpteflags,
int bolted,
int large);
long (*remove_hpte)(unsigned long hpte_group);
void (*flush_hash_range)(unsigned long context,
unsigned long number,
int local);
void (*make_pte)(void *htab, unsigned long va,
unsigned long pa,
int mode,
unsigned long hash_mask,
int large);
void (*hpte_create_valid)(unsigned long slot,
unsigned long vpn,
unsigned long prpn,
unsigned hash,
void * ptep,
unsigned hpteflags,
unsigned bolted);
void (*tce_build)(struct TceTable * tbl,
long tcenum,
unsigned long uaddr,
......
......@@ -77,7 +77,7 @@ typedef struct {
unsigned long resv0: 7; /* Padding to a 64b boundary */
} slb_dword1;
typedef struct _SLBE {
typedef struct {
union {
unsigned long dword0;
slb_dword0 dw0;
......@@ -107,25 +107,12 @@ typedef struct {
unsigned long avpn:57; /* vsid | api == avpn */
unsigned long : 2; /* Software use */
unsigned long bolted: 1; /* HPTE is "bolted" */
unsigned long : 1; /* Software use */
unsigned long lock: 1; /* lock on pSeries SMP */
unsigned long l: 1; /* Virtual page is large (L=1) or 4 KB (L=0) */
unsigned long h: 1; /* Hash function identifier */
unsigned long v: 1; /* Valid (v=1) or invalid (v=0) */
} Hpte_dword0;
typedef struct {
unsigned long : 6; /* unused - padding */
unsigned long ac: 1; /* Address compare */
unsigned long r: 1; /* Referenced */
unsigned long c: 1; /* Changed */
unsigned long w: 1; /* Write-thru cache mode */
unsigned long i: 1; /* Cache inhibited */
unsigned long m: 1; /* Memory coherence required */
unsigned long g: 1; /* Guarded */
unsigned long n: 1; /* No-execute */
unsigned long pp: 2; /* Page protection bits 1:2 */
} Hpte_flags;
typedef struct {
unsigned long pp0: 1; /* Page protection bit 0 */
unsigned long : 1; /* Reserved */
......@@ -134,12 +121,12 @@ typedef struct {
unsigned long ac: 1; /* Address compare */
unsigned long r: 1; /* Referenced */
unsigned long c: 1; /* Changed */
unsigned long w: 1; /* Write-thru cache mode */
unsigned long i: 1; /* Cache inhibited */
unsigned long m: 1; /* Memory coherence required */
unsigned long g: 1; /* Guarded */
unsigned long n: 1; /* No-execute */
unsigned long pp: 2; /* Page protection bits 1:2 */
unsigned long w: 1; /* Write-thru cache mode */
unsigned long i: 1; /* Cache inhibited */
unsigned long m: 1; /* Memory coherence required */
unsigned long g: 1; /* Guarded */
unsigned long n: 1; /* No-execute */
unsigned long pp: 2; /* Page protection bits 1:2 */
} Hpte_dword1;
typedef struct {
......@@ -148,7 +135,7 @@ typedef struct {
unsigned long flags: 10; /* HPTE flags */
} Hpte_dword1_flags;
typedef struct _HPTE {
typedef struct {
union {
unsigned long dword0;
Hpte_dword0 dw0;
......@@ -156,21 +143,8 @@ typedef struct _HPTE {
union {
unsigned long dword1;
struct {
unsigned long pp0: 1; /* Page protection bit 0 */
unsigned long ts: 1; /* Tag set bit */
unsigned long rpn: 50; /* Real page number */
unsigned long : 2; /* Unused */
unsigned long ac: 1; /* Address compare bit */
unsigned long r: 1; /* Referenced */
unsigned long c: 1; /* Changed */
unsigned long w: 1; /* Write-thru cache mode */
unsigned long i: 1; /* Cache inhibited */
unsigned long m: 1; /* Memory coherence */
unsigned long g: 1; /* Guarded */
unsigned long n: 1; /* No-execute page if N=1 */
unsigned long pp: 2; /* Page protection bit 1:2 */
} dw1;
Hpte_dword1 dw1;
Hpte_dword1_flags flags;
} dw1;
} HPTE;
......@@ -204,6 +178,8 @@ void create_valid_hpte( unsigned long slot, unsigned long vpn,
#define PT_SHIFT (12) /* Page Table */
#define PT_MASK 0x02FF
#define LARGE_PAGE_SHIFT 24
static inline unsigned long hpt_hash(unsigned long vpn, int large)
{
unsigned long vsid;
......@@ -220,20 +196,36 @@ static inline unsigned long hpt_hash(unsigned long vpn, int large)
return (vsid & 0x7fffffffff) ^ page;
}
#define PG_SHIFT (12) /* Page Entry */
static inline void _tlbie(unsigned long va, int large)
{
asm volatile("ptesync": : :"memory");
if (large) {
asm volatile("clrldi %0,%0,16\n\
tlbie %0,1" : : "r"(va) : "memory");
} else {
asm volatile("clrldi %0,%0,16\n\
tlbie %0,0" : : "r"(va) : "memory");
}
extern __inline__ void _tlbie( unsigned long va )
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
static inline void _tlbiel(unsigned long va, int large)
{
__asm__ __volatile__ ( " \n\
clrldi %0,%0,16 \n\
ptesync \n\
tlbie %0 \n\
eieio \n\
tlbsync \n\
ptesync"
: : "r" (va) : "memory" );
asm volatile("ptesync": : :"memory");
if (large) {
asm volatile("clrldi %0,%0,16\n\
tlbiel %0,1" : : "r"(va) : "memory");
} else {
asm volatile("clrldi %0,%0,16\n\
tlbiel %0,0" : : "r"(va) : "memory");
}
asm volatile("ptesync": : :"memory");
}
#endif /* __ASSEMBLY__ */
/* Block size masks */
......
......@@ -7,6 +7,7 @@
*/
#ifndef __ASSEMBLY__
#include <linux/threads.h>
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
......@@ -93,13 +94,15 @@
#define _PAGE_WRITETHRU 0x040UL /* W: cache write-through */
#define _PAGE_DIRTY 0x080UL /* C: page changed */
#define _PAGE_ACCESSED 0x100UL /* R: page referenced */
#if 0
#define _PAGE_HPTENOIX 0x200UL /* software: pte HPTE slot unknown */
#endif
#define _PAGE_HASHPTE 0x400UL /* software: pte has an associated HPTE */
#define _PAGE_EXEC 0x800UL /* software: i-cache coherence required */
#define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000UL /* software: HPTE index within group */
/* Bits 0x7000 identify the index within an HPT Group */
#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_HPTENOIX | _PAGE_SECONDARY | _PAGE_GROUP_IX)
#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
......@@ -397,6 +400,7 @@ extern void paging_init(void);
* as entries are faulted into the hash table by the low-level
* data/instruction access exception handlers.
*/
#if 0
/*
* We won't be able to use update_mmu_cache to update the
* hardware page table because we need to update the pte
......@@ -404,9 +408,29 @@ extern void paging_init(void);
* its value.
*/
#define update_mmu_cache(vma, addr, pte) do { } while (0)
#else
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to put a corresponding HPTE into the hash table
* ahead of time, instead of waiting for the inevitable extra
* hash-table miss exception.
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
#endif
extern void flush_hash_segments(unsigned low_vsid, unsigned high_vsid);
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte);
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local);
void flush_hash_range(unsigned long context, unsigned long number, int local);
/* TLB flush batching */
#define MAX_BATCH_FLUSH 128
struct tlb_batch_data {
pte_t pte;
unsigned long addr;
};
extern struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH];
/* Encode and de-code a swap entry */
#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment