Commit 18c8a844 authored by Anton Blanchard's avatar Anton Blanchard

ppc64: hashtable management rework for SMP scalability

get rid of global lock on hpte insert and remove, use a software bit
 for pSeries and the relevant interfaces for pSeries LPAR to avoid
 races.
preload hpte entries in update_mmu_cache.
keep cpu_vm_mask and use tlbiel when mm has only run locally
batch tlb flushes where possible.
add large page support in preparation for generic large page support.
Remove HPTENOIX, we always put slot information into linux ptes now.
Note: pSeries and pSeries LPAR so far, iSeries coming next.
parent 41d64318
......@@ -27,7 +27,7 @@ obj-y := ppc_ksyms.o setup.o entry.o traps.o irq.o idle.o \
pmc.o mf_proc.o proc_pmc.o iSeries_setup.o \
ItLpQueue.o hvCall.o mf.o HvLpEvent.o ras.o \
iSeries_proc.o HvCall.o HvLpConfig.o \
rtc.o init_task.o
rtc.o init_task.o pSeries_htab.o
obj-$(CONFIG_PCI) += pci.o pci_dn.o pci_dma.o
obj-$(CONFIG_PPC_EEH) += eeh.o
......
/*
*
*
* PowerPC64 port by Mike Corrigan and Dave Engebretsen
* {mikejc|engebret}@us.ibm.com
*
* Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
*
* SMP scalability work:
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
*
* Module name: htab.c
*
* Description:
......@@ -18,17 +19,7 @@
*/
#include <linux/config.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/types.h>
#include <asm/init.h>
#include <asm/system.h>
#include <asm/iSeries/LparData.h>
#include <linux/spinlock.h>
#include <asm/ppcdebug.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
......@@ -36,52 +27,42 @@
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/cache.h>
#include <asm/ppcdebug.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/types.h>
#include <asm/init.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/Naca.h>
#include <asm/system.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
#include <asm/lmb.h>
#include <asm/abs_addr.h>
#ifdef CONFIG_PPC_EEH
#include <asm/eeh.h>
#endif
/* For iSeries */
#include <asm/iSeries/HvCallHpt.h>
/* Note: pte --> Linux PTE
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
*/
HTAB htab_data = {NULL, 0, 0, 0, 0};
int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp);
void htab_initialize(void);
void make_pte_LPAR(HPTE *htab,
unsigned long va, unsigned long pa, int mode,
unsigned long hash_mask, int large);
extern unsigned long reloc_offset(void);
extern unsigned long get_kernel_vsid( unsigned long ea );
extern void cacheable_memzero( void *, unsigned int );
extern unsigned long _SDR1;
extern unsigned long klimit;
extern struct Naca *naca;
extern char _stext[], _etext[], __start_naca[], __end_stab[];
static spinlock_t hash_table_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
extern unsigned long reloc_offset(void);
#define PTRRELOC(x) ((typeof(x))((unsigned long)(x) - offset))
#define PTRUNRELOC(x) ((typeof(x))((unsigned long)(x) + offset))
#define RELOC(x) (*PTRRELOC(&(x)))
extern unsigned long htab_size( unsigned long );
unsigned long hpte_getword0_iSeries( unsigned long slot );
#define KB (1024)
#define MB (1024*KB)
static inline void
......@@ -101,8 +82,12 @@ create_pte_mapping(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += step) {
unsigned long vsid = get_kernel_vsid(addr);
unsigned long va = (vsid << 28) | (addr & 0xfffffff);
make_pte(htab, va, (unsigned long)__v2a(addr), mode, mask,
large);
if (_machine == _MACH_pSeriesLP)
pSeries_lpar_make_pte(htab, va,
(unsigned long)__v2a(addr), mode, mask, large);
else
pSeries_make_pte(htab, va,
(unsigned long)__v2a(addr), mode, mask, large);
}
}
......@@ -111,7 +96,7 @@ htab_initialize(void)
{
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
unsigned long mode_ro, mode_rw, mask;
unsigned long mode_rw, mask;
unsigned long offset = reloc_offset();
struct Naca *_naca = RELOC(naca);
HTAB *_htab_data = PTRRELOC(&htab_data);
......@@ -132,7 +117,7 @@ htab_initialize(void)
_htab_data->htab_num_ptegs = pteg_count;
_htab_data->htab_hash_mask = pteg_count - 1;
if(_machine == _MACH_pSeries) {
if (_machine == _MACH_pSeries) {
/* Find storage for the HPT. Must be contiguous in
* the absolute address space.
*/
......@@ -151,642 +136,196 @@ htab_initialize(void)
RELOC(_SDR1) = 0;
}
mode_ro = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RXRX;
mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
mask = pteg_count-1;
/* Create PTE's for the kernel text and data sections plus
* the HPT and HPTX arrays. Make the assumption that
* (addr & KERNELBASE) == 0 (ie they are disjoint).
* We also assume that the va is <= 64 bits.
*/
#if 0
create_pte_mapping((unsigned long)_stext, (unsigned long)__start_naca, mode_ro, mask);
create_pte_mapping((unsigned long)__start_naca, (unsigned long)__end_stab, mode_rw, mask);
create_pte_mapping((unsigned long)__end_stab, (unsigned long)_etext, mode_ro, mask);
create_pte_mapping((unsigned long)_etext, RELOC(klimit), mode_rw, mask);
create_pte_mapping((unsigned long)__a2v(table), (unsigned long)__a2v(table+htab_size_bytes), mode_rw, mask);
#else
#ifndef CONFIG_PPC_ISERIES
/* XXX we currently map kernel text rw, should fix this */
if (__is_processor(PV_POWER4) && _naca->physicalMemorySize > 256*MB) {
create_pte_mapping((unsigned long)KERNELBASE,
KERNELBASE + 256*MB, mode_rw, mask, 0);
create_pte_mapping((unsigned long)KERNELBASE + 256*MB,
KERNELBASE + (_naca->physicalMemorySize),
mode_rw, mask, 1);
return;
}
#endif
} else {
create_pte_mapping((unsigned long)KERNELBASE,
KERNELBASE+(_naca->physicalMemorySize),
mode_rw, mask, 0);
#endif
}
}
#undef KB
#undef MB
/*
* Create a pte. Used during initialization only.
* We assume the PTE will fit in the primary PTEG.
*/
void make_pte(HPTE *htab,
unsigned long va, unsigned long pa, int mode,
unsigned long hash_mask, int large)
{
HPTE *hptep;
unsigned long hash, i;
volatile unsigned long x = 1;
unsigned long vpn;
#ifdef CONFIG_PPC_PSERIES
if(_machine == _MACH_pSeriesLP) {
make_pte_LPAR(htab, va, pa, mode, hash_mask, large);
return;
}
#endif
if (large)
vpn = va >> 24;
else
vpn = va >> 12;
hash = hpt_hash(vpn, large);
hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP);
for (i = 0; i < 8; ++i, ++hptep) {
if ( hptep->dw0.dw0.v == 0 ) { /* !valid */
hptep->dw1.dword1 = pa | mode;
hptep->dw0.dword0 = 0;
hptep->dw0.dw0.avpn = va >> 23;
hptep->dw0.dw0.bolted = 1; /* bolted */
hptep->dw0.dw0.v = 1; /* make valid */
return;
}
}
/* We should _never_ get here and too early to call xmon. */
for(;x;x|=1);
}
/* Functions to invalidate a HPTE */
static void hpte_invalidate_iSeries( unsigned long slot )
{
HvCallHpt_invalidateSetSwBitsGet( slot, 0, 0 );
}
static void hpte_invalidate_pSeries( unsigned long slot )
{
/* Local copy of the first doubleword of the HPTE */
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
/* Locate the HPTE */
HPTE * hptep = htab_data.htab + slot;
/* Get the first doubleword of the HPTE */
hpte_dw0.d = hptep->dw0.dword0;
/* Invalidate the hpte */
hptep->dw0.dword0 = 0;
/* Invalidate the tlb */
{
unsigned long vsid, group, pi, pi_high;
vsid = hpte_dw0.h.avpn >> 5;
group = slot >> 3;
if(hpte_dw0.h.h) {
group = ~group;
}
pi = (vsid ^ group) & 0x7ff;
pi_high = (hpte_dw0.h.avpn & 0x1f) << 11;
pi |= pi_high;
_tlbie(pi << 12);
}
}
/* Select an available HPT slot for a new HPTE
* return slot index (if in primary group)
* return -slot index (if in secondary group)
*/
static long hpte_selectslot_iSeries( unsigned long vpn )
{
HPTE hpte;
long ret_slot, orig_slot;
unsigned long primary_hash;
unsigned long hpteg_slot;
unsigned long slot;
unsigned i, k;
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
ret_slot = orig_slot = HvCallHpt_findValid( &hpte, vpn );
if ( hpte.dw0.dw0.v ) { /* If valid ...what do we do now? */
udbg_printf( "hpte_selectslot_iSeries: vpn 0x%016lx already valid at slot 0x%016lx\n", vpn, ret_slot );
udbg_printf( "hpte_selectslot_iSeries: returned hpte 0x%016lx 0x%016lx\n", hpte.dw0.dword0, hpte.dw1.dword1 );
panic("select_hpte_slot found entry already valid\n");
}
if ( ret_slot == -1 ) { /* -1 indicates no available slots */
/* No available entry found in secondary group */
PMC_SW_SYSTEM(htab_capacity_castouts);
primary_hash = hpt_hash(vpn, 0);
hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
k = htab_data.next_round_robin++ & 0x7;
for ( i=0; i<HPTES_PER_GROUP; ++i ) {
if ( k == HPTES_PER_GROUP )
k = 0;
slot = hpteg_slot + k;
hpte_dw0.d = hpte_getword0_iSeries( slot );
if ( !hpte_dw0.h.bolted ) {
hpte_invalidate_iSeries( slot );
ret_slot = slot;
}
++k;
}
} else {
if ( ret_slot < 0 ) {
PMC_SW_SYSTEM(htab_primary_overflows);
ret_slot &= 0x7fffffffffffffff;
ret_slot = -ret_slot;
}
}
if ( ret_slot == -1 ) {
/* No non-bolted entry found in primary group - time to panic */
udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
}
PPCDBG(PPCDBG_MM, "hpte_selectslot_iSeries: vpn=0x%016lx, orig_slot=0x%016lx, ret_slot=0x%016lx \n",
vpn, orig_slot, ret_slot );
return ret_slot;
}
static long hpte_selectslot_pSeries(unsigned long vpn)
{
HPTE * hptep;
unsigned long primary_hash;
unsigned long hpteg_slot;
unsigned i, k;
/* Search the primary group for an available slot */
primary_hash = hpt_hash(vpn, 0);
hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
hptep = htab_data.htab + hpteg_slot;
for (i=0; i<HPTES_PER_GROUP; ++i) {
if ( hptep->dw0.dw0.v == 0 ) {
/* If an available slot found, return it */
return hpteg_slot + i;
}
hptep++;
}
/* No available entry found in primary group */
PMC_SW_SYSTEM(htab_primary_overflows);
/* Search the secondary group */
hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
hptep = htab_data.htab + hpteg_slot;
for (i=0; i<HPTES_PER_GROUP; ++i) {
if ( hptep->dw0.dw0.v == 0 ) {
/* If an available slot found, return it */
return -(hpteg_slot + i);
}
hptep++;
}
/* No available entry found in secondary group */
PMC_SW_SYSTEM(htab_capacity_castouts);
/* Select an entry in the primary group to replace */
hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
hptep = htab_data.htab + hpteg_slot;
k = htab_data.next_round_robin++ & 0x7;
for (i=0; i<HPTES_PER_GROUP; ++i) {
if (k == HPTES_PER_GROUP)
k = 0;
if (!hptep[k].dw0.dw0.bolted) {
hpteg_slot += k;
/* Invalidate the current entry */
ppc_md.hpte_invalidate(hpteg_slot);
return hpteg_slot;
}
++k;
}
/* No non-bolted entry found in primary group - time to panic */
udbg_printf("hpte_selectslot_pSeries - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
/* xmon(0); */
panic("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
/* keep the compiler happy */
return 0;
}
unsigned long hpte_getword0_iSeries( unsigned long slot )
{
unsigned long dword0;
HPTE hpte;
HvCallHpt_get( &hpte, slot );
dword0 = hpte.dw0.dword0;
return dword0;
}
unsigned long hpte_getword0_pSeries( unsigned long slot )
{
unsigned long dword0;
HPTE * hptep = htab_data.htab + slot;
dword0 = hptep->dw0.dword0;
return dword0;
}
static long hpte_find_iSeries(unsigned long vpn)
{
HPTE hpte;
long slot;
slot = HvCallHpt_findValid( &hpte, vpn );
if ( hpte.dw0.dw0.v ) {
if ( slot < 0 ) {
slot &= 0x7fffffffffffffff;
slot = -slot;
}
} else
slot = -1;
return slot;
}
static long hpte_find_pSeries(unsigned long vpn)
{
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
long slot;
unsigned long hash;
unsigned long i,j;
hash = hpt_hash(vpn, 0);
for ( j=0; j<2; ++j ) {
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
for ( i=0; i<HPTES_PER_GROUP; ++i ) {
hpte_dw0.d = hpte_getword0_pSeries( slot );
if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) &&
( hpte_dw0.h.v ) &&
( hpte_dw0.h.h == j ) ) {
/* HPTE matches */
if ( j )
slot = -slot;
return slot;
}
++slot;
}
hash = ~hash;
}
return -1;
}
/* This function is called by iSeries setup when initializing the hpt */
void build_valid_hpte( unsigned long vsid, unsigned long ea, unsigned long pa,
pte_t * ptep, unsigned hpteflags, unsigned bolted )
{
unsigned long vpn, flags;
long hpte_slot;
unsigned hash;
pte_t pte;
vpn = ((vsid << 28) | ( ea & 0xffff000 )) >> 12;
spin_lock_irqsave( &hash_table_lock, flags );
hpte_slot = ppc_md.hpte_selectslot( vpn );
hash = 0;
if ( hpte_slot < 0 ) {
hash = 1;
hpte_slot = -hpte_slot;
}
ppc_md.hpte_create_valid( hpte_slot, vpn, pa >> 12, hash, ptep,
hpteflags, bolted );
if ( ptep ) {
/* Get existing pte flags */
pte = *ptep;
pte_val(pte) &= ~_PAGE_HPTEFLAGS;
/* Add in the has hpte flag */
pte_val(pte) |= _PAGE_HASHPTE;
/* Add in the _PAGE_SECONDARY flag */
pte_val(pte) |= hash << 15;
/* Add in the hpte slot */
pte_val(pte) |= (hpte_slot << 12) & _PAGE_GROUP_IX;
/* Save the new pte. */
*ptep = pte;
}
spin_unlock_irqrestore( &hash_table_lock, flags );
}
/* Create an HPTE and validate it
* It is assumed that the HPT slot currently is invalid.
* The HPTE is set with the vpn, rpn (converted to absolute)
* and flags
*/
static void hpte_create_valid_iSeries(unsigned long slot, unsigned long vpn,
unsigned long prpn, unsigned hash,
void * ptep, unsigned hpteflags,
unsigned bolted )
{
/* Local copy of HPTE */
struct {
/* Local copy of first doubleword of HPTE */
union {
unsigned long d;
Hpte_dword0 h;
} dw0;
/* Local copy of second doubleword of HPTE */
union {
unsigned long d;
Hpte_dword1 h;
Hpte_dword1_flags f;
} dw1;
} lhpte;
unsigned long avpn = vpn >> 11;
unsigned long arpn = physRpn_to_absRpn( prpn );
/* Fill in the local HPTE with absolute rpn, avpn and flags */
lhpte.dw1.d = 0;
lhpte.dw1.h.rpn = arpn;
lhpte.dw1.f.flags = hpteflags;
lhpte.dw0.d = 0;
lhpte.dw0.h.avpn = avpn;
lhpte.dw0.h.h = hash;
lhpte.dw0.h.bolted = bolted;
lhpte.dw0.h.v = 1;
/* Now fill in the actual HPTE */
HvCallHpt_addValidate( slot, hash, (HPTE *)&lhpte );
}
static void hpte_create_valid_pSeries(unsigned long slot, unsigned long vpn,
unsigned long prpn, unsigned hash,
void * ptep, unsigned hpteflags,
unsigned bolted)
{
/* Local copy of HPTE */
struct {
/* Local copy of first doubleword of HPTE */
union {
unsigned long d;
Hpte_dword0 h;
} dw0;
/* Local copy of second doubleword of HPTE */
union {
unsigned long d;
Hpte_dword1 h;
Hpte_dword1_flags f;
} dw1;
} lhpte;
unsigned long avpn = vpn >> 11;
unsigned long arpn = physRpn_to_absRpn( prpn );
HPTE *hptep;
/* Fill in the local HPTE with absolute rpn, avpn and flags */
lhpte.dw1.d = 0;
lhpte.dw1.h.rpn = arpn;
lhpte.dw1.f.flags = hpteflags;
lhpte.dw0.d = 0;
lhpte.dw0.h.avpn = avpn;
lhpte.dw0.h.h = hash;
lhpte.dw0.h.bolted = bolted;
lhpte.dw0.h.v = 1;
/* Now fill in the actual HPTE */
hptep = htab_data.htab + slot;
/* Set the second dword first so that the valid bit
* is the last thing set
*/
hptep->dw1.dword1 = lhpte.dw1.d;
/* Guarantee the second dword is visible before
* the valid bit
*/
__asm__ __volatile__ ("eieio" : : : "memory");
/* Now set the first dword including the valid bit */
hptep->dw0.dword0 = lhpte.dw0.d;
__asm__ __volatile__ ("ptesync" : : : "memory");
}
/* find_linux_pte returns the address of a linux pte for a given
* find_linux_pte returns the address of a linux pte for a given
* effective address and directory. If not found, it returns zero.
*/
pte_t * find_linux_pte( pgd_t * pgdir, unsigned long ea )
pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
{
pgd_t *pg;
pmd_t *pm;
pte_t *pt = NULL;
pte_t pte;
pg = pgdir + pgd_index( ea );
if ( ! pgd_none( *pg ) ) {
pm = pmd_offset( pg, ea );
if ( ! pmd_none( *pm ) ) {
pt = pte_offset_kernel( pm, ea );
pg = pgdir + pgd_index(ea);
if (!pgd_none(*pg)) {
pm = pmd_offset(pg, ea);
if (!pmd_none(*pm)) {
pt = pte_offset_kernel(pm, ea);
pte = *pt;
if ( ! pte_present( pte ) )
if (!pte_present(pte))
pt = NULL;
}
}
return pt;
}
static inline unsigned long computeHptePP( unsigned long pte )
{
return ( pte & _PAGE_USER ) |
( ( ( pte & _PAGE_USER ) >> 1 ) &
( ( ~( ( pte >> 2 ) & /* _PAGE_RW */
( pte >> 7 ) ) ) & /* _PAGE_DIRTY */
1 ) );
}
static void hpte_updatepp_iSeries(long slot, unsigned long newpp, unsigned long va)
static inline unsigned long computeHptePP(unsigned long pte)
{
HvCallHpt_setPp( slot, newpp );
return (pte & _PAGE_USER) |
(((pte & _PAGE_USER) >> 1) &
((~((pte >> 2) & /* _PAGE_RW */
(pte >> 7))) & /* _PAGE_DIRTY */
1));
}
static void hpte_updatepp_pSeries(long slot, unsigned long newpp, unsigned long va)
/*
* Handle a fault by adding an HPTE. If the address can't be determined
* to be valid via Linux page tables, return 1. If handled return 0
*/
int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep)
{
/* Local copy of first doubleword of HPTE */
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
/* Local copy of second doubleword of HPTE */
union {
unsigned long d;
Hpte_dword1 h;
Hpte_dword1_flags f;
} hpte_dw1;
HPTE * hptep = htab_data.htab + slot;
unsigned long va, vpn;
unsigned long newpp, prpn;
unsigned long hpteflags;
long slot;
pte_t old_pte, new_pte;
/* Turn off valid bit in HPTE */
hpte_dw0.d = hptep->dw0.dword0;
hpte_dw0.h.v = 0;
hptep->dw0.dword0 = hpte_dw0.d;
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
/* Ensure it is out of the tlb too */
_tlbie( va );
/*
* If no pte found or not present, send the problem up to
* do_page_fault
*/
if (!ptep || !pte_present(*ptep))
return 1;
/* Insert the new pp bits into the HPTE */
hpte_dw1.d = hptep->dw1.dword1;
hpte_dw1.h.pp = newpp;
hptep->dw1.dword1 = hpte_dw1.d;
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
access |= _PAGE_PRESENT;
if (access & ~(pte_val(*ptep)))
return 1;
/* Ensure it is visible before validating */
__asm__ __volatile__ ("eieio" : : : "memory");
/*
* At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
*
* 1. There is a valid (present) pte with no associated HPTE (this is
* the most common case)
* 2. There is a valid (present) pte with an associated HPTE. The
* current values of the pp bits in the HPTE prevent access
* because we are doing software DIRTY bit management and the
* page is currently not DIRTY.
*/
/* Turn the valid bit back on in HPTE */
hpte_dw0.h.v = 1;
hptep->dw0.dword0 = hpte_dw0.d;
old_pte = *ptep;
new_pte = old_pte;
/* If the attempted access was a store */
if (access & _PAGE_RW)
pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
else
pte_val(new_pte) |= _PAGE_ACCESSED;
__asm__ __volatile__ ("ptesync" : : : "memory");
}
newpp = computeHptePP(pte_val(new_pte));
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
*/
void hpte_updateboltedpp_iSeries(unsigned long newpp, unsigned long ea )
{
unsigned long vsid,va,vpn;
long slot;
/* Check if pte already has an hpte (case 2) */
if (pte_val(old_pte) & _PAGE_HASHPTE) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot, secondary;
vsid = get_kernel_vsid( ea );
va = ( vsid << 28 ) | ( ea & 0x0fffffff );
vpn = va >> PAGE_SHIFT;
/* XXX fix large pte flag */
hash = hpt_hash(vpn, 0);
secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
slot = ppc_md.hpte_find( vpn );
HvCallHpt_setPp( slot, newpp );
}
udbg_printf("updatepp cpu %d ea %lx vsid should be %lx\n", smp_processor_id(), ea, vsid);
/* XXX fix large pte flag */
if (ppc_md.hpte_updatepp(slot, newpp, va, 0) == -1)
pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
else
if (!pte_same(old_pte, new_pte))
*ptep = new_pte;
}
static __inline__ void set_pp_bit(unsigned long pp, HPTE *addr)
{
unsigned long old;
unsigned long *p = (unsigned long *)(&(addr->dw1));
if (!(pte_val(old_pte) & _PAGE_HASHPTE)) {
/* XXX fix large pte flag */
unsigned long hash = hpt_hash(vpn, 0);
unsigned long hpte_group;
prpn = pte_val(old_pte) >> PTE_SHIFT;
__asm__ __volatile__(
"1: ldarx %0,0,%3\n\
rldimi %0,%2,0,62\n\
stdcx. %0,0,%3\n\
bne 1b"
: "=&r" (old), "=m" (*p)
: "r" (pp), "r" (p), "m" (*p)
: "cc");
}
repeat:
hpte_group = ((hash & htab_data.htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
*/
void hpte_updateboltedpp_pSeries(unsigned long newpp, unsigned long ea)
{
unsigned long vsid,va,vpn,flags;
long slot;
HPTE *hptep;
/* Update the linux pte with the HPTE slot */
pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
pte_val(new_pte) |= _PAGE_HASHPTE;
vsid = get_kernel_vsid( ea );
va = ( vsid << 28 ) | ( ea & 0x0fffffff );
vpn = va >> PAGE_SHIFT;
/* copy appropriate flags from linux pte */
hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
slot = ppc_md.hpte_find( vpn );
hptep = htab_data.htab + slot;
/* XXX fix large pte flag */
slot = ppc_md.insert_hpte(hpte_group, vpn, prpn, 0,
hpteflags, 0, 0);
set_pp_bit(newpp , hptep);
/* Primary is full, try the secondary */
if (slot == -1) {
pte_val(new_pte) |= 1 << 15;
hpte_group = ((~hash & htab_data.htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
/* XXX fix large pte flag */
slot = ppc_md.insert_hpte(hpte_group, vpn, prpn,
1, hpteflags, 0, 0);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Ensure it is out of the tlb too */
spin_lock_irqsave( &hash_table_lock, flags );
_tlbie( va );
spin_unlock_irqrestore( &hash_table_lock, flags );
}
ppc_md.remove_hpte(hpte_group);
goto repeat;
}
}
pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
/*
* No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the
* page_table_lock or the hash_table_lock
* (we hold both)
*/
*ptep = new_pte;
}
/* This is called very early. */
void hpte_init_iSeries(void)
{
ppc_md.hpte_invalidate = hpte_invalidate_iSeries;
ppc_md.hpte_updatepp = hpte_updatepp_iSeries;
ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_iSeries;
ppc_md.hpte_getword0 = hpte_getword0_iSeries;
ppc_md.hpte_selectslot = hpte_selectslot_iSeries;
ppc_md.hpte_create_valid = hpte_create_valid_iSeries;
ppc_md.hpte_find = hpte_find_iSeries;
}
void hpte_init_pSeries(void)
{
ppc_md.hpte_invalidate = hpte_invalidate_pSeries;
ppc_md.hpte_updatepp = hpte_updatepp_pSeries;
ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeries;
ppc_md.hpte_getword0 = hpte_getword0_pSeries;
ppc_md.hpte_selectslot = hpte_selectslot_pSeries;
ppc_md.hpte_create_valid = hpte_create_valid_pSeries;
ppc_md.hpte_find = hpte_find_pSeries;
return 0;
}
/*
* Handle a fault by adding an HPTE. If the address can't be determined
* to be valid via Linux page tables, return 1. If handled return 0
*/
int hash_page(unsigned long ea, unsigned long access)
{
void *pgdir;
unsigned long va, vsid, vpn;
unsigned long newpp, hash_ind, prpn;
unsigned long hpteflags;
long slot;
unsigned long vsid;
struct mm_struct *mm;
pte_t old_pte, new_pte, *ptep;
pte_t *ptep;
int ret;
/* Check for invalid addresses. */
if (!IS_VALID_EA(ea))
......@@ -830,9 +369,6 @@ int hash_page(unsigned long ea, unsigned long access)
break;
}
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
pgdir = mm->pgd;
if (pgdir == NULL)
......@@ -843,149 +379,20 @@ int hash_page(unsigned long ea, unsigned long access)
* from modifying entries while we search and update
*/
spin_lock(&mm->page_table_lock);
ptep = find_linux_pte(pgdir, ea);
/*
* If no pte found or not present, send the problem up to
* do_page_fault
*/
if (!ptep || !pte_present(*ptep)) {
ret = __hash_page(ea, access, vsid, ptep);
spin_unlock(&mm->page_table_lock);
return 1;
}
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
*/
access |= _PAGE_PRESENT;
if (access & ~(pte_val(*ptep))) {
spin_unlock(&mm->page_table_lock);
return 1;
}
/*
* Acquire the hash table lock to guarantee that the linux
* pte we fetch will not change
*/
spin_lock(&hash_table_lock);
/*
* At this point we have found a pte (which was present).
* The spinlocks prevent this status from changing
* The hash_table_lock prevents the _PAGE_HASHPTE status
* from changing (RPN, DIRTY and ACCESSED too)
* The page_table_lock prevents the pte from being
* invalidated or modified
*/
/*
* At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
*
* 1. There is a valid (present) pte with no associated HPTE (this is
* the most common case)
* 2. There is a valid (present) pte with an associated HPTE. The
* current values of the pp bits in the HPTE prevent access
* because we are doing software DIRTY bit management and the
* page is currently not DIRTY.
*/
old_pte = *ptep;
new_pte = old_pte;
/* If the attempted access was a store */
if (access & _PAGE_RW)
pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
else
pte_val(new_pte) |= _PAGE_ACCESSED;
newpp = computeHptePP(pte_val(new_pte));
/* Check if pte already has an hpte (case 2) */
if (pte_val(old_pte) & _PAGE_HASHPTE) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot, secondary;
/* Local copy of first doubleword of HPTE */
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
hash = hpt_hash(vpn, 0);
secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
/* If there is an HPTE for this page it is indexed by slot */
hpte_dw0.d = ppc_md.hpte_getword0(slot);
if ((hpte_dw0.h.avpn == (vpn >> 11)) &&
(hpte_dw0.h.v) &&
(hpte_dw0.h.h == secondary)){
/* HPTE matches */
ppc_md.hpte_updatepp(slot, newpp, va);
if (!pte_same(old_pte, new_pte))
*ptep = new_pte;
} else {
/* HPTE is not for this pte */
pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
}
}
if (!(pte_val(old_pte) & _PAGE_HASHPTE)) {
/*
* Case 1
* For these cases we need to create a new
* HPTE and update the linux pte
*/
/* Find an available HPTE slot */
slot = ppc_md.hpte_selectslot(vpn);
hash_ind = 0;
if (slot < 0) {
slot = -slot;
hash_ind = 1;
}
/* Set the physical address */
prpn = pte_val(old_pte) >> PTE_SHIFT;
/* Update the linux pte with the HPTE slot */
pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
pte_val(new_pte) |= hash_ind << 15;
pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
pte_val(new_pte) |= _PAGE_HASHPTE;
/*
* No need to use ldarx/stdcx here because all who
* might be updating the pte will hold the
* page_table_lock or the hash_table_lock
* (we hold both)
*/
*ptep = new_pte;
/* copy appropriate flags from linux pte */
hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
/* Create the HPTE */
ppc_md.hpte_create_valid(slot, vpn, prpn, hash_ind, ptep,
hpteflags, 0);
}
spin_unlock(&hash_table_lock);
spin_unlock(&mm->page_table_lock);
return 0;
return ret;
}
void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte)
void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local)
{
unsigned long vsid, vpn, va, hash, secondary, slot, flags;
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
unsigned long vsid, vpn, va, hash, secondary, slot;
/* XXX fix for large ptes */
unsigned long large = 0;
if ((ea >= USER_START) && (ea <= USER_END))
vsid = get_vsid(context, ea);
......@@ -993,156 +400,32 @@ void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte)
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, 0);
hash = hpt_hash(vpn, large);
secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
spin_lock_irqsave(&hash_table_lock, flags);
/*
* Id prefer to flush even if our hpte was stolen, but the new
* entry could be bolted - Anton
*/
hpte_dw0.d = ppc_md.hpte_getword0(slot);
if ((hpte_dw0.h.avpn == (vpn >> 11)) &&
(hpte_dw0.h.v) &&
(hpte_dw0.h.h == secondary)){
/* HPTE matches */
ppc_md.hpte_invalidate(slot);
}
spin_unlock_irqrestore(&hash_table_lock, flags);
ppc_md.hpte_invalidate(slot, va, large, local);
}
int proc_dol2crvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
void flush_hash_range(unsigned long context, unsigned long number, int local)
{
int vleft, first=1, len, left, val;
#define TMPBUFLEN 256
char buf[TMPBUFLEN], *p;
static const char *sizestrings[4] = {
"2MB", "256KB", "512KB", "1MB"
};
static const char *clockstrings[8] = {
"clock disabled", "+1 clock", "+1.5 clock", "reserved(3)",
"+2 clock", "+2.5 clock", "+3 clock", "reserved(7)"
};
static const char *typestrings[4] = {
"flow-through burst SRAM", "reserved SRAM",
"pipelined burst SRAM", "pipelined late-write SRAM"
};
static const char *holdstrings[4] = {
"0.5", "1.0", "(reserved2)", "(reserved3)"
};
if ( ((_get_PVR() >> 16) != 8) && ((_get_PVR() >> 16) != 12))
return -EFAULT;
if ( /*!table->maxlen ||*/ (filp->f_pos && !write)) {
*lenp = 0;
return 0;
}
vleft = table->maxlen / sizeof(int);
left = *lenp;
for (; left /*&& vleft--*/; first=0) {
if (write) {
while (left) {
char c;
if(get_user(c,(char *) buffer))
return -EFAULT;
if (!isspace(c))
break;
left--;
((char *) buffer)++;
}
if (!left)
break;
len = left;
if (len > TMPBUFLEN-1)
len = TMPBUFLEN-1;
if(copy_from_user(buf, buffer, len))
return -EFAULT;
buf[len] = 0;
p = buf;
if (*p < '0' || *p > '9')
break;
val = simple_strtoul(p, &p, 0);
len = p-buf;
if ((len < left) && *p && !isspace(*p))
break;
buffer += len;
left -= len;
#if 0
/* DRENG need a def */
_set_L2CR(0);
_set_L2CR(val);
while ( _get_L2CR() & 0x1 )
/* wait for invalidate to finish */;
#endif
if (ppc_md.flush_hash_range) {
ppc_md.flush_hash_range(context, number, local);
} else {
p = buf;
if (!first)
*p++ = '\t';
#if 0
/* DRENG need a def */
val = _get_L2CR();
#endif
p += sprintf(p, "0x%08x: ", val);
p += sprintf(p, " %s", (val >> 31) & 1 ? "enabled" :
"disabled");
p += sprintf(p, ", %sparity", (val>>30)&1 ? "" : "no ");
p += sprintf(p, ", %s", sizestrings[(val >> 28) & 3]);
p += sprintf(p, ", %s", clockstrings[(val >> 25) & 7]);
p += sprintf(p, ", %s", typestrings[(val >> 23) & 2]);
p += sprintf(p, "%s", (val>>22)&1 ? ", data only" : "");
p += sprintf(p, "%s", (val>>20)&1 ? ", ZZ enabled": "");
p += sprintf(p, ", %s", (val>>19)&1 ? "write-through" :
"copy-back");
p += sprintf(p, "%s", (val>>18)&1 ? ", testing" : "");
p += sprintf(p, ", %sns hold",holdstrings[(val>>16)&3]);
p += sprintf(p, "%s", (val>>15)&1 ? ", DLL slow" : "");
p += sprintf(p, "%s", (val>>14)&1 ? ", diff clock" :"");
p += sprintf(p, "%s", (val>>13)&1 ? ", DLL bypass" :"");
p += sprintf(p,"\n");
len = strlen(buf);
if (len > left)
len = left;
if(copy_to_user(buffer, buf, len))
return -EFAULT;
left -= len;
buffer += len;
break;
}
}
int i;
struct tlb_batch_data *ptes =
&tlb_batch_array[smp_processor_id()][0];
if (!write && !first && left) {
if(put_user('\n', (char *) buffer))
return -EFAULT;
left--, buffer++;
}
if (write) {
p = (char *) buffer;
while (left) {
char c;
if(get_user(c, p++))
return -EFAULT;
if (!isspace(c))
break;
left--;
for (i = 0; i < number; i++) {
flush_hash_page(context, ptes->addr, ptes->pte, local);
ptes++;
}
}
if (write && first)
return -EINVAL;
*lenp -= left;
filp->f_pos += *lenp;
return 0;
}
/*
* pSeries hashtable management.
*
* SMP scalability work:
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/threads.h>
#include <linux/smp.h>
#include <asm/abs_addr.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
/*
* Create a pte. Used during initialization only.
* We assume the PTE will fit in the primary PTEG.
*/
void pSeries_make_pte(HPTE *htab, unsigned long va, unsigned long pa,
int mode, unsigned long hash_mask, int large)
{
HPTE *hptep;
unsigned long hash, i;
unsigned long vpn;
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, large);
hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP);
for (i = 0; i < 8; ++i, ++hptep) {
if (hptep->dw0.dw0.v == 0) { /* !valid */
hptep->dw1.dword1 = pa | mode;
hptep->dw0.dword0 = 0;
hptep->dw0.dw0.avpn = va >> 23;
hptep->dw0.dw0.bolted = 1; /* bolted */
if (large) {
hptep->dw0.dw0.l = 1;
hptep->dw0.dw0.avpn &= ~0x1UL;
}
hptep->dw0.dw0.v = 1; /* make valid */
return;
}
}
/* We should _never_ get here and too early to call xmon. */
while(1)
;
}
#define HPTE_LOCK_BIT 3
static inline void pSeries_lock_hpte(HPTE *hptep)
{
unsigned long *word = &hptep->dw0.dword0;
while (1) {
if (!test_and_set_bit(HPTE_LOCK_BIT, word))
break;
while(test_bit(HPTE_LOCK_BIT, word))
barrier();
}
}
static inline void pSeries_unlock_hpte(HPTE *hptep)
{
unsigned long *word = &hptep->dw0.dword0;
asm volatile("lwsync":::"memory");
clear_bit(HPTE_LOCK_BIT, word);
}
static spinlock_t pSeries_tlbie_lock = SPIN_LOCK_UNLOCKED;
static long pSeries_insert_hpte(unsigned long hpte_group, unsigned long vpn,
unsigned long prpn, int secondary,
unsigned long hpteflags, int bolted, int large)
{
unsigned long avpn = vpn >> 11;
unsigned long arpn = physRpn_to_absRpn(prpn);
HPTE *hptep = htab_data.htab + hpte_group;
Hpte_dword0 dw0;
HPTE lhpte;
int i;
for (i = 0; i < HPTES_PER_GROUP; i++) {
dw0 = hptep->dw0.dw0;
if (!dw0.v) {
/* retry with lock held */
pSeries_lock_hpte(hptep);
dw0 = hptep->dw0.dw0;
if (!dw0.v)
break;
pSeries_unlock_hpte(hptep);
}
hptep++;
}
if (i == HPTES_PER_GROUP)
return -1;
lhpte.dw1.dword1 = 0;
lhpte.dw1.dw1.rpn = arpn;
lhpte.dw1.flags.flags = hpteflags;
lhpte.dw0.dword0 = 0;
lhpte.dw0.dw0.avpn = avpn;
lhpte.dw0.dw0.h = secondary;
lhpte.dw0.dw0.bolted = bolted;
lhpte.dw0.dw0.v = 1;
if (large)
lhpte.dw0.dw0.l = 1;
hptep->dw1.dword1 = lhpte.dw1.dword1;
/* Guarantee the second dword is visible before the valid bit */
__asm__ __volatile__ ("eieio" : : : "memory");
/*
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
hptep->dw0.dword0 = lhpte.dw0.dword0;
__asm__ __volatile__ ("ptesync" : : : "memory");
return i;
}
static long pSeries_remove_hpte(unsigned long hpte_group)
{
HPTE *hptep;
Hpte_dword0 dw0;
int i;
int slot_offset;
unsigned long vsid, group, pi, pi_high;
unsigned long slot;
unsigned long flags;
int large;
unsigned long va;
/* pick a random slot to start at */
slot_offset = mftb() & 0x7;
udbg_printf("remove_hpte in %d\n", slot_offset);
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_data.htab + hpte_group + slot_offset;
dw0 = hptep->dw0.dw0;
if (dw0.v && !dw0.bolted) {
/* retry with lock held */
pSeries_lock_hpte(hptep);
dw0 = hptep->dw0.dw0;
if (dw0.v && !dw0.bolted)
break;
pSeries_unlock_hpte(hptep);
}
slot_offset++;
slot_offset &= 0x7;
}
if (i == HPTES_PER_GROUP)
return -1;
large = dw0.l;
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->dw0.dword0 = 0;
/* Invalidate the tlb */
vsid = dw0.avpn >> 5;
slot = hptep - htab_data.htab;
group = slot >> 3;
if (dw0.h)
group = ~group;
pi = (vsid ^ group) & 0x7ff;
pi_high = (dw0.avpn & 0x1f) << 11;
pi |= pi_high;
if (large)
va = pi << LARGE_PAGE_SHIFT;
else
va = pi << PAGE_SHIFT;
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
_tlbie(va, large);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
return i;
}
static inline void set_pp_bit(unsigned long pp, HPTE *addr)
{
unsigned long old;
unsigned long *p = &addr->dw1.dword1;
__asm__ __volatile__(
"1: ldarx %0,0,%3\n\
rldimi %0,%2,0,62\n\
stdcx. %0,0,%3\n\
bne 1b"
: "=&r" (old), "=m" (*p)
: "r" (pp), "r" (p), "m" (*p)
: "cc");
}
/*
* Only works on small pages. Yes its ugly to have to check each slot in
* the group but we only use this during bootup.
*/
static long pSeries_hpte_find(unsigned long vpn)
{
HPTE *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
Hpte_dword0 dw0;
hash = hpt_hash(vpn, 0);
for (j = 0; j < 2; j++) {
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_data.htab + slot;
dw0 = hptep->dw0.dw0;
if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
(dw0.h == j)) {
/* HPTE matches */
if (j)
slot = -slot;
return slot;
}
++slot;
}
hash = ~hash;
}
return -1;
}
static long pSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long va, int large)
{
HPTE *hptep = htab_data.htab + slot;
Hpte_dword0 dw0;
unsigned long vpn, avpn;
unsigned long flags;
udbg_printf("updatepp\n");
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
avpn = vpn >> 11;
pSeries_lock_hpte(hptep);
dw0 = hptep->dw0.dw0;
if ((dw0.avpn != avpn) || !dw0.v) {
pSeries_unlock_hpte(hptep);
udbg_printf("updatepp missed\n");
return -1;
}
set_pp_bit(newpp, hptep);
pSeries_unlock_hpte(hptep);
/* Ensure it is out of the tlb too */
/* XXX use tlbiel where possible */
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
_tlbie(va, large);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
return 0;
}
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
* Does not work on large pages.
*
* No need to lock here because we should be the only user.
*/
static void pSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
{
unsigned long vsid, va, vpn, flags;
long slot;
HPTE *hptep;
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
slot = pSeries_hpte_find(vpn);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_data.htab + slot;
set_pp_bit(newpp, hptep);
/* Ensure it is out of the tlb too */
/* XXX use tlbiel where possible */
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
_tlbie(va, 0);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
static void pSeries_hpte_invalidate(unsigned long slot, unsigned long va,
int large, int local)
{
HPTE *hptep = htab_data.htab + slot;
Hpte_dword0 dw0;
unsigned long vpn, avpn;
unsigned long flags;
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
avpn = vpn >> 11;
pSeries_lock_hpte(hptep);
dw0 = hptep->dw0.dw0;
if ((dw0.avpn != avpn) || !dw0.v) {
pSeries_unlock_hpte(hptep);
udbg_printf("invalidate missed\n");
return;
}
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->dw0.dword0 = 0;
/* Invalidate the tlb */
if (!large && local && __is_processor(PV_POWER4)) {
_tlbiel(va, large);
} else {
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
_tlbie(va, large);
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
}
static void pSeries_flush_hash_range(unsigned long context,
unsigned long number, int local)
{
unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn;
int i, j;
unsigned long va_array[MAX_BATCH_FLUSH];
HPTE *hptep;
Hpte_dword0 dw0;
struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0];
/* XXX fix for large ptes */
unsigned long large = 0;
j = 0;
for (i = 0; i < number; i++) {
if ((ptes->addr >= USER_START) && (ptes->addr <= USER_END))
vsid = get_vsid(context, ptes->addr);
else
vsid = get_kernel_vsid(ptes->addr);
va = (vsid << 28) | (ptes->addr & 0x0fffffff);
va_array[j] = va;
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, large);
secondary = (pte_val(ptes->pte) & _PAGE_SECONDARY) >> 15;
if (secondary)
hash = ~hash;
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(ptes->pte) & _PAGE_GROUP_IX) >> 12;
hptep = htab_data.htab + slot;
avpn = vpn >> 11;
pSeries_lock_hpte(hptep);
dw0 = hptep->dw0.dw0;
ptes++;
if ((dw0.avpn != avpn) || !dw0.v) {
pSeries_unlock_hpte(hptep);
udbg_printf("invalidate missed\n");
continue;
}
j++;
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->dw0.dword0 = 0;
}
if (!large && local && __is_processor(PV_POWER4)) {
asm volatile("ptesync":::"memory");
for (i = 0; i < j; i++) {
asm volatile("\n\
clrldi %0,%0,16\n\
tlbiel %0"
: : "r" (va_array[i]) : "memory" );
}
asm volatile("ptesync":::"memory");
} else {
/* XXX double check that it is safe to take this late */
spin_lock_irqsave(&pSeries_tlbie_lock, flags);
asm volatile("ptesync":::"memory");
for (i = 0; i < j; i++) {
asm volatile("\n\
clrldi %0,%0,16\n\
tlbie %0"
: : "r" (va_array[i]) : "memory" );
}
asm volatile("eieio; tlbsync; ptesync":::"memory");
spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
}
}
void hpte_init_pSeries(void)
{
struct device_node *root;
const char *model;
ppc_md.hpte_invalidate = pSeries_hpte_invalidate;
ppc_md.hpte_updatepp = pSeries_hpte_updatepp;
ppc_md.hpte_updateboltedpp = pSeries_hpte_updateboltedpp;
ppc_md.insert_hpte = pSeries_insert_hpte;
ppc_md.remove_hpte = pSeries_remove_hpte;
ppc_md.make_pte = pSeries_make_pte;
/* Disable TLB batching on nighthawk */
root = find_path_device("/");
if (root) {
model = get_property(root, "model", NULL);
if (strcmp(model, "CHRP IBM,9076-N81"))
ppc_md.flush_hash_range = pSeries_flush_hash_range;
}
}
......@@ -214,358 +214,6 @@ long plpar_xirr(unsigned long *xirr_ret)
xirr_ret, &dummy, &dummy);
}
/*
* The following section contains code that ultimately should
* be put in the relavent file (htab.c, xics.c, etc). It has
* been put here for the time being in order to ease maintainence
* of the pSeries LPAR code until it can all be put into CVS.
*/
static void hpte_invalidate_pSeriesLP(unsigned long slot)
{
HPTE old_pte;
unsigned long lpar_rc;
unsigned long flags = 0;
lpar_rc = plpar_pte_remove(flags,
slot,
0,
&old_pte.dw0.dword0,
&old_pte.dw1.dword1);
if (lpar_rc != H_Success) BUG();
}
/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
* the low 3 bits of flags happen to line up. So no transform is needed.
* We can probably optimize here and assume the high bits of newpp are
* already zero. For now I am paranoid.
*/
static void hpte_updatepp_pSeriesLP(long slot, unsigned long newpp, unsigned long va)
{
unsigned long lpar_rc;
unsigned long flags;
flags = newpp & 3;
lpar_rc = plpar_pte_protect( flags,
slot,
0);
if (lpar_rc != H_Success) {
udbg_printf( " bad return code from pte protect rc = %lx \n", lpar_rc);
for (;;);
}
}
static void hpte_updateboltedpp_pSeriesLP(unsigned long newpp, unsigned long ea)
{
unsigned long lpar_rc;
unsigned long vsid,va,vpn,flags;
long slot;
vsid = get_kernel_vsid( ea );
va = ( vsid << 28 ) | ( ea & 0x0fffffff );
vpn = va >> PAGE_SHIFT;
slot = ppc_md.hpte_find( vpn );
flags = newpp & 3;
lpar_rc = plpar_pte_protect( flags,
slot,
0);
if (lpar_rc != H_Success) {
udbg_printf( " bad return code from pte bolted protect rc = %lx \n", lpar_rc);
for (;;);
}
}
static unsigned long hpte_getword0_pSeriesLP(unsigned long slot)
{
unsigned long dword0;
unsigned long lpar_rc;
unsigned long dummy_word1;
unsigned long flags;
/* Read 1 pte at a time */
/* Do not need RPN to logical page translation */
/* No cross CEC PFT access */
flags = 0;
lpar_rc = plpar_pte_read(flags,
slot,
&dword0, &dummy_word1);
if (lpar_rc != H_Success) {
udbg_printf(" error on pte read in get_hpte0 rc = %lx \n", lpar_rc);
for (;;);
}
return(dword0);
}
static long hpte_selectslot_pSeriesLP(unsigned long vpn)
{
unsigned long primary_hash;
unsigned long hpteg_slot;
unsigned i, k;
unsigned long flags;
HPTE pte_read;
unsigned long lpar_rc;
/* Search the primary group for an available slot */
primary_hash = hpt_hash(vpn, 0);
hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
/* Read 1 pte at a time */
/* Do not need RPN to logical page translation */
/* No cross CEC PFT access */
flags = 0;
for (i=0; i<HPTES_PER_GROUP; ++i) {
/* read the hpte entry from the slot */
lpar_rc = plpar_pte_read(flags,
hpteg_slot + i,
&pte_read.dw0.dword0, &pte_read.dw1.dword1);
if (lpar_rc != H_Success) {
udbg_printf(" read of hardware page table failed rc = %lx \n", lpar_rc);
for (;;);
}
if ( pte_read.dw0.dw0.v == 0 ) {
/* If an available slot found, return it */
return hpteg_slot + i;
}
}
/* Search the secondary group for an available slot */
hpteg_slot = ( ~primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
for (i=0; i<HPTES_PER_GROUP; ++i) {
/* read the hpte entry from the slot */
lpar_rc = plpar_pte_read(flags,
hpteg_slot + i,
&pte_read.dw0.dword0, &pte_read.dw1.dword1);
if (lpar_rc != H_Success) {
udbg_printf(" read of hardware page table failed2 rc = %lx \n", lpar_rc);
for (;;);
}
if ( pte_read.dw0.dw0.v == 0 ) {
/* If an available slot found, return it */
return hpteg_slot + i;
}
}
/* No available entry found in secondary group */
/* Select an entry in the primary group to replace */
hpteg_slot = ( primary_hash & htab_data.htab_hash_mask ) * HPTES_PER_GROUP;
k = htab_data.next_round_robin++ & 0x7;
for (i=0; i<HPTES_PER_GROUP; ++i) {
if (k == HPTES_PER_GROUP)
k = 0;
lpar_rc = plpar_pte_read(flags,
hpteg_slot + k,
&pte_read.dw0.dword0, &pte_read.dw1.dword1);
if (lpar_rc != H_Success) {
udbg_printf( " pte read failed - rc = %lx", lpar_rc);
for (;;);
}
if ( ! pte_read.dw0.dw0.bolted)
{
hpteg_slot += k;
/* Invalidate the current entry */
ppc_md.hpte_invalidate(hpteg_slot);
return hpteg_slot;
}
++k;
}
/* No non-bolted entry found in primary group - time to panic */
udbg_printf("select_hpte_slot - No non-bolted HPTE in group 0x%lx! \n", hpteg_slot/HPTES_PER_GROUP);
udbg_printf("No non-bolted HPTE in group %lx", (unsigned long)hpteg_slot/HPTES_PER_GROUP);
for (;;);
/* never executes - avoid compiler errors */
return 0;
}
static void hpte_create_valid_pSeriesLP(unsigned long slot, unsigned long vpn,
unsigned long prpn, unsigned hash,
void *ptep, unsigned hpteflags,
unsigned bolted)
{
/* Local copy of HPTE */
struct {
/* Local copy of first doubleword of HPTE */
union {
unsigned long d;
Hpte_dword0 h;
} dw0;
/* Local copy of second doubleword of HPTE */
union {
unsigned long d;
Hpte_dword1 h;
Hpte_dword1_flags f;
} dw1;
} lhpte;
unsigned long avpn = vpn >> 11;
unsigned long arpn = physRpn_to_absRpn( prpn );
unsigned long lpar_rc;
unsigned long flags;
HPTE ret_hpte;
/* Fill in the local HPTE with absolute rpn, avpn and flags */
lhpte.dw1.d = 0;
lhpte.dw1.h.rpn = arpn;
lhpte.dw1.f.flags = hpteflags;
lhpte.dw0.d = 0;
lhpte.dw0.h.avpn = avpn;
lhpte.dw0.h.h = hash;
lhpte.dw0.h.bolted = bolted;
lhpte.dw0.h.v = 1;
/* Now fill in the actual HPTE */
/* Set CEC cookie to 0 */
/* Large page = 0 */
/* Zero page = 0 */
/* I-cache Invalidate = 0 */
/* I-cache synchronize = 0 */
/* Exact = 1 - only modify exact entry */
flags = H_EXACT;
if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
lhpte.dw1.f.flags &= ~_PAGE_COHERENT;
#if 1
__asm__ __volatile__ (
H_ENTER_r3
"mr 4, %1\n"
"mr 5, %2\n"
"mr 6, %3\n"
"mr 7, %4\n"
HSC
"mr %0, 3\n"
: "=r" (lpar_rc)
: "r" (flags), "r" (slot), "r" (lhpte.dw0.d), "r" (lhpte.dw1.d)
: "r3", "r4", "r5", "r6", "r7", "cc");
#else
lpar_rc = plpar_pte_enter(flags,
slot,
lhpte.dw0.d,
lhpte.dw1.d,
&ret_hpte.dw0.dword0,
&ret_hpte.dw1.dword1);
#endif
if (lpar_rc != H_Success) {
udbg_printf("error on pte enter lapar rc = %ld\n",lpar_rc);
udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot, lhpte.dw0.d, lhpte.dw1.d);
/* xmon_backtrace("backtrace"); */
for (;;);
}
}
static long hpte_find_pSeriesLP(unsigned long vpn)
{
union {
unsigned long d;
Hpte_dword0 h;
} hpte_dw0;
long slot;
unsigned long hash;
unsigned long i,j;
hash = hpt_hash(vpn, 0);
for ( j=0; j<2; ++j ) {
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
for ( i=0; i<HPTES_PER_GROUP; ++i ) {
hpte_dw0.d = hpte_getword0_pSeriesLP( slot );
if ( ( hpte_dw0.h.avpn == ( vpn >> 11 ) ) &&
( hpte_dw0.h.v ) &&
( hpte_dw0.h.h == j ) ) {
/* HPTE matches */
if ( j )
slot = -slot;
return slot;
}
++slot;
}
hash = ~hash;
}
return -1;
}
/*
* Create a pte - LPAR . Used during initialization only.
* We assume the PTE will fit in the primary PTEG.
*/
void make_pte_LPAR(HPTE *htab,
unsigned long va, unsigned long pa, int mode,
unsigned long hash_mask, int large)
{
HPTE local_hpte, ret_hpte;
unsigned long hash, slot, flags,lpar_rc, vpn;
if (large)
vpn = va >> 24;
else
vpn = va >> 12;
hash = hpt_hash(vpn, large);
slot = ((hash & hash_mask)*HPTES_PER_GROUP);
local_hpte.dw1.dword1 = pa | mode;
local_hpte.dw0.dword0 = 0;
local_hpte.dw0.dw0.avpn = va >> 23;
local_hpte.dw0.dw0.bolted = 1; /* bolted */
if (large)
local_hpte.dw0.dw0.l = 1; /* large page */
local_hpte.dw0.dw0.v = 1;
/* Set CEC cookie to 0 */
/* Zero page = 0 */
/* I-cache Invalidate = 0 */
/* I-cache synchronize = 0 */
/* Exact = 0 - modify any entry in group */
flags = 0;
#if 1
__asm__ __volatile__ (
H_ENTER_r3
"mr 4, %1\n"
"mr 5, %2\n"
"mr 6, %3\n"
"mr 7, %4\n"
HSC
"mr %0, 3\n"
: "=r" (lpar_rc)
: "r" (flags), "r" (slot), "r" (local_hpte.dw0.dword0), "r" (local_hpte.dw1.dword1)
: "r3", "r4", "r5", "r6", "r7", "cc");
#else
lpar_rc = plpar_pte_enter(flags,
slot,
local_hpte.dw0.dword0,
local_hpte.dw1.dword1,
&ret_hpte.dw0.dword0,
&ret_hpte.dw1.dword1);
#endif
#if 0 /* NOTE: we explicitly do not check return status here because it is
* "normal" for early boot code to map io regions for which a partition
* has no access. However, we will die if we actually fault on these
* "permission denied" pages.
*/
if (lpar_rc != H_Success) {
/* pSeriesLP_init_early(); */
udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc);
BUG();
}
#endif
}
static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum,
unsigned long uaddr, int direction )
{
......@@ -786,19 +434,14 @@ static unsigned char udbg_getcLP(void)
}
}
void pSeries_lpar_mm_init(void);
/* This is called early in setup.c.
* Use it to setup page table ppc_md stuff as well as udbg.
*/
void pSeriesLP_init_early(void)
{
ppc_md.hpte_invalidate = hpte_invalidate_pSeriesLP;
ppc_md.hpte_updatepp = hpte_updatepp_pSeriesLP;
ppc_md.hpte_updateboltedpp = hpte_updateboltedpp_pSeriesLP;
ppc_md.hpte_getword0 = hpte_getword0_pSeriesLP;
ppc_md.hpte_selectslot = hpte_selectslot_pSeriesLP;
ppc_md.hpte_create_valid = hpte_create_valid_pSeriesLP;
ppc_md.hpte_find = hpte_find_pSeriesLP;
pSeries_lpar_mm_init();
ppc_md.tce_build = tce_build_pSeriesLP;
ppc_md.tce_free = tce_free_pSeriesLP;
......@@ -892,3 +535,315 @@ int hvc_count(int *start_termno)
}
return 0;
}
/*
* Create a pte - LPAR . Used during initialization only.
* We assume the PTE will fit in the primary PTEG.
*/
void pSeries_lpar_make_pte(HPTE *htab, unsigned long va, unsigned long pa,
int mode, unsigned long hash_mask, int large)
{
HPTE local_hpte;
unsigned long hash, slot, flags, lpar_rc, vpn;
unsigned long dummy1, dummy2;
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
hash = hpt_hash(vpn, large);
slot = ((hash & hash_mask)*HPTES_PER_GROUP);
local_hpte.dw1.dword1 = pa | mode;
local_hpte.dw0.dword0 = 0;
local_hpte.dw0.dw0.avpn = va >> 23;
local_hpte.dw0.dw0.bolted = 1; /* bolted */
if (large) {
local_hpte.dw0.dw0.l = 1; /* large page */
local_hpte.dw0.dw0.avpn &= ~0x1UL;
}
local_hpte.dw0.dw0.v = 1;
/* Set CEC cookie to 0 */
/* Zero page = 0 */
/* I-cache Invalidate = 0 */
/* I-cache synchronize = 0 */
/* Exact = 0 - modify any entry in group */
flags = 0;
lpar_rc = plpar_pte_enter(flags, slot, local_hpte.dw0.dword0,
local_hpte.dw1.dword1, &dummy1, &dummy2);
#if 0
/*
* NOTE: we explicitly do not check return status here because it is
* "normal" for early boot code to map io regions for which a partition
* has no access. However, we will die if we actually fault on these
* "permission denied" pages.
*/
if (lpar_rc != H_Success) {
udbg_printf("flags=%lx, slot=%lx, dword0=%lx, dword1=%lx, rc=%d\n", flags, slot, local_hpte.dw0.dword0,local_hpte.dw1.dword1, lpar_rc);
BUG();
}
#endif
}
static long pSeries_lpar_insert_hpte(unsigned long hpte_group,
unsigned long vpn, unsigned long prpn,
int secondary, unsigned long hpteflags,
int bolted, int large)
{
unsigned long avpn = vpn >> 11;
unsigned long arpn = physRpn_to_absRpn(prpn);
unsigned long lpar_rc;
unsigned long flags;
unsigned long slot;
HPTE lhpte;
/* Fill in the local HPTE with absolute rpn, avpn and flags */
lhpte.dw1.dword1 = 0;
lhpte.dw1.dw1.rpn = arpn;
lhpte.dw1.flags.flags = hpteflags;
lhpte.dw0.dword0 = 0;
lhpte.dw0.dw0.avpn = avpn;
lhpte.dw0.dw0.h = secondary;
lhpte.dw0.dw0.bolted = bolted;
lhpte.dw0.dw0.v = 1;
if (large)
lhpte.dw0.dw0.l = 1;
/* Now fill in the actual HPTE */
/* Set CEC cookie to 0 */
/* Large page = 0 */
/* Zero page = 0 */
/* I-cache Invalidate = 0 */
/* I-cache synchronize = 0 */
/* Exact = 0 */
flags = 0;
/* XXX why is this here? - Anton */
if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;
__asm__ __volatile__ (
H_ENTER_r3
"mr 4, %2\n"
"mr 5, %3\n"
"mr 6, %4\n"
"mr 7, %5\n"
HSC
"mr %0, 3\n"
"mr %1, 4\n"
: "=r" (lpar_rc), "=r" (slot)
: "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
"r" (lhpte.dw1.dword1)
: "r3", "r4", "r5", "r6", "r7", "cc");
if (lpar_rc == H_PTEG_Full)
return -1;
if (lpar_rc != H_Success) {
udbg_printf("error on pte enter lpar rc = %ld\n",lpar_rc);
udbg_printf("ent: s=%lx, dw0=%lx, dw1=%lx\n", slot,
lhpte.dw0.dword0, lhpte.dw1.dword1);
PPCDBG_ENTER_DEBUGGER();
panic("error on pte enter");
}
return slot;
}
static spinlock_t pSeries_lpar_tlbie_lock = SPIN_LOCK_UNLOCKED;
static long pSeries_lpar_remove_hpte(unsigned long hpte_group)
{
/* XXX take spinlock */
panic("pSeries_lpar_remove_hpte");
}
/* NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
* the low 3 bits of flags happen to line up. So no transform is needed.
* We can probably optimize here and assume the high bits of newpp are
* already zero. For now I am paranoid.
*/
static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long va, int large)
{
unsigned long lpar_rc;
unsigned long flags;
flags = (newpp & 3) | H_AVPN;
unsigned long vpn = va >> PAGE_SHIFT;
udbg_printf("updatepp\n");
lpar_rc = plpar_pte_protect(flags, slot, (vpn >> 4) & ~0x7fUL);
if (lpar_rc == H_Not_Found) {
udbg_printf("updatepp missed\n");
return -1;
}
if (lpar_rc != H_Success) {
udbg_printf("bad return code from pte protect rc = %lx\n",
lpar_rc);
for (;;);
}
return 0;
}
static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
{
unsigned long dword0;
unsigned long lpar_rc;
unsigned long dummy_word1;
unsigned long flags;
/* Read 1 pte at a time */
/* Do not need RPN to logical page translation */
/* No cross CEC PFT access */
flags = 0;
lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
if (lpar_rc != H_Success) {
udbg_printf("error on pte read in get_hpte0 rc = %lx\n",
lpar_rc);
for (;;);
}
return dword0;
}
static long pSeries_lpar_hpte_find(unsigned long vpn)
{
unsigned long hash;
unsigned long i, j;
long slot;
union {
unsigned long dword0;
Hpte_dword0 dw0;
} hpte_dw0;
Hpte_dword0 dw0;
hash = hpt_hash(vpn, 0);
for (j = 0; j < 2; j++) {
slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot);
dw0 = hpte_dw0.dw0;
if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
(dw0.h == j)) {
/* HPTE matches */
if (j)
slot = -slot;
return slot;
}
++slot;
}
hash = ~hash;
}
return -1;
}
static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
unsigned long ea)
{
unsigned long lpar_rc;
unsigned long vsid, va, vpn, flags;
long slot;
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
vpn = va >> PAGE_SHIFT;
slot = pSeries_lpar_hpte_find(vpn);
if (slot == -1)
panic("count not find page to bolt\n");
flags = newpp & 3;
lpar_rc = plpar_pte_protect(flags, slot, 0);
if (lpar_rc != H_Success) {
udbg_printf("bad return code from pte bolted protect rc = %lx\n", lpar_rc);
for (;;);
}
}
/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
*/
static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
int large, int local)
{
unsigned long vpn, avpn;
unsigned long lpar_rc;
unsigned long flags;
unsigned long dummy1, dummy2;
if (large)
vpn = va >> LARGE_PAGE_SHIFT;
else
vpn = va >> PAGE_SHIFT;
avpn = vpn >> 11;
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
lpar_rc = plpar_pte_remove(H_AVPN, slot, (vpn >> 4) & ~0x7fUL, &dummy1,
&dummy2);
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
if (lpar_rc == H_Not_Found) {
udbg_printf("invalidate missed\n");
return;
}
if (lpar_rc != H_Success) {
udbg_printf("bad return code from invalidate rc = %lx\n",
lpar_rc);
for (;;);
}
}
/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
*/
void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
int local)
{
int i;
struct tlb_batch_data *ptes =
&tlb_batch_array[smp_processor_id()][0];
unsigned long flags;
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
for (i = 0; i < number; i++) {
flush_hash_page(context, ptes->addr, ptes->pte, local);
ptes++;
}
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
void pSeries_lpar_mm_init(void)
{
ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp;
ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
ppc_md.insert_hpte = pSeries_lpar_insert_hpte;
ppc_md.remove_hpte = pSeries_lpar_remove_hpte;
ppc_md.make_pte = pSeries_lpar_make_pte;
}
......@@ -253,7 +253,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags)
* entry in the hardware page table.
*/
vsid = get_kernel_vsid(ea);
make_pte(htab_data.htab,
ppc_md.make_pte(htab_data.htab,
(vsid << 28) | (ea & 0xFFFFFFF), // va (NOT the ea)
pa,
_PAGE_NO_CACHE | _PAGE_GUARDED | PP_RWXX,
......@@ -261,30 +261,24 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags)
}
}
void
local_flush_tlb_all(void)
{
/* Implemented to just flush the vmalloc area.
* vmalloc is the only user of flush_tlb_all.
*/
local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END );
}
void
local_flush_tlb_mm(struct mm_struct *mm)
{
if ( mm->map_count ) {
if (mm->map_count) {
struct vm_area_struct *mp;
for ( mp = mm->mmap; mp != NULL; mp = mp->vm_next )
local_flush_tlb_range( mm, mp->vm_start, mp->vm_end );
}
else /* MIKEC: It is not clear why this is needed */
for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
local_flush_tlb_range(mm, mp->vm_start, mp->vm_end);
} else {
/* MIKEC: It is not clear why this is needed */
/* paulus: it is needed to clear out stale HPTEs
* when an address space (represented by an mm_struct)
* is being destroyed. */
local_flush_tlb_range( mm, USER_START, USER_END );
}
local_flush_tlb_range(mm, USER_START, USER_END);
}
/* XXX are there races with checking cpu_vm_mask? - Anton */
mm->cpu_vm_mask = 0;
}
/*
* Callers should hold the mm->page_table_lock
......@@ -297,6 +291,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
pmd_t *pmd;
pte_t *ptep;
pte_t pte;
unsigned long flags;
int local = 0;
switch( REGION_ID(vmaddr) ) {
case VMALLOC_REGION_ID:
......@@ -308,13 +304,17 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
case USER_REGION_ID:
pgd = pgd_offset( vma->vm_mm, vmaddr );
context = vma->vm_mm->context;
/* XXX are there races with checking cpu_vm_mask? - Anton */
if (vma->vm_mm->cpu_vm_mask == (1 << smp_processor_id()))
local = 1;
break;
default:
panic("local_flush_tlb_page: invalid region 0x%016lx", vmaddr);
}
if (!pgd_none(*pgd)) {
pmd = pmd_offset(pgd, vmaddr);
if (!pmd_none(*pmd)) {
......@@ -322,12 +322,14 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
/* Check if HPTE might exist and flush it if so */
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if ( pte_val(pte) & _PAGE_HASHPTE ) {
flush_hash_page(context, vmaddr, pte);
flush_hash_page(context, vmaddr, pte, local);
}
}
}
}
struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH];
void
local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
{
......@@ -337,6 +339,10 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
pte_t pte;
unsigned long pgd_end, pmd_end;
unsigned long context;
unsigned long flags;
int i = 0;
struct tlb_batch_data *ptes = &tlb_batch_array[smp_processor_id()][0];
int local = 0;
if ( start >= end )
panic("flush_tlb_range: start (%016lx) greater than end (%016lx)\n", start, end );
......@@ -356,6 +362,12 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
case USER_REGION_ID:
pgd = pgd_offset( mm, start );
context = mm->context;
/* XXX are there races with checking cpu_vm_mask? - Anton */
if (mm->cpu_vm_mask == (1 << smp_processor_id())) {
local = 1;
}
break;
default:
panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
......@@ -377,8 +389,17 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
do {
if ( pte_val(*ptep) & _PAGE_HASHPTE ) {
pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
if ( pte_val(pte) & _PAGE_HASHPTE )
flush_hash_page( context, start, pte );
if ( pte_val(pte) & _PAGE_HASHPTE ) {
ptes->pte = pte;
ptes->addr = start;
ptes++;
i++;
if (i == MAX_BATCH_FLUSH) {
flush_hash_range(context, MAX_BATCH_FLUSH, local);
i = 0;
ptes = &tlb_batch_array[smp_processor_id()][0];
}
}
}
start += PAGE_SIZE;
++ptep;
......@@ -393,6 +414,9 @@ local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long e
start = pgd_end;
++pgd;
} while ( start < end );
if (i)
flush_hash_range(context, i, local);
}
......@@ -643,3 +667,30 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
flush_icache_range(maddr, maddr + len);
}
/*
* This is called at the end of handling a user page fault, when the
* fault has been handled by updating a PTE in the linux page tables.
* We use it to preload an HPTE into the hash table corresponding to
* the updated linux PTE.
*/
void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
pte_t pte)
{
unsigned long vsid;
void *pgdir;
pte_t *ptep;
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte))
return;
pgdir = vma->vm_mm->pgd;
if (pgdir == NULL)
return;
ptep = find_linux_pte(pgdir, ea);
vsid = get_vsid(vma->vm_mm->context, ea);
__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep);
}
......@@ -24,26 +24,33 @@ struct machdep_calls {
/* High use functions in the first cachelines, low use functions
* follow. DRENG collect profile data.
*/
void (*hpte_invalidate)(unsigned long slot);
void (*hpte_updatepp)(long slot,
void (*hpte_invalidate)(unsigned long slot,
unsigned long va,
int large,
int local);
long (*hpte_updatepp)(unsigned long slot,
unsigned long newpp,
unsigned long va);
unsigned long va,
int large);
void (*hpte_updateboltedpp)(unsigned long newpp,
unsigned long ea);
unsigned long (*hpte_getword0)(unsigned long slot);
long (*hpte_find)( unsigned long vpn );
long (*hpte_selectslot)(unsigned long vpn);
void (*hpte_create_valid)(unsigned long slot,
long (*insert_hpte)(unsigned long hpte_group,
unsigned long vpn,
unsigned long prpn,
unsigned hash,
void * ptep,
unsigned hpteflags,
unsigned bolted);
int secondary,
unsigned long hpteflags,
int bolted,
int large);
long (*remove_hpte)(unsigned long hpte_group);
void (*flush_hash_range)(unsigned long context,
unsigned long number,
int local);
void (*make_pte)(void *htab, unsigned long va,
unsigned long pa,
int mode,
unsigned long hash_mask,
int large);
void (*tce_build)(struct TceTable * tbl,
long tcenum,
unsigned long uaddr,
......
......@@ -77,7 +77,7 @@ typedef struct {
unsigned long resv0: 7; /* Padding to a 64b boundary */
} slb_dword1;
typedef struct _SLBE {
typedef struct {
union {
unsigned long dword0;
slb_dword0 dw0;
......@@ -107,25 +107,12 @@ typedef struct {
unsigned long avpn:57; /* vsid | api == avpn */
unsigned long : 2; /* Software use */
unsigned long bolted: 1; /* HPTE is "bolted" */
unsigned long : 1; /* Software use */
unsigned long lock: 1; /* lock on pSeries SMP */
unsigned long l: 1; /* Virtual page is large (L=1) or 4 KB (L=0) */
unsigned long h: 1; /* Hash function identifier */
unsigned long v: 1; /* Valid (v=1) or invalid (v=0) */
} Hpte_dword0;
typedef struct {
unsigned long : 6; /* unused - padding */
unsigned long ac: 1; /* Address compare */
unsigned long r: 1; /* Referenced */
unsigned long c: 1; /* Changed */
unsigned long w: 1; /* Write-thru cache mode */
unsigned long i: 1; /* Cache inhibited */
unsigned long m: 1; /* Memory coherence required */
unsigned long g: 1; /* Guarded */
unsigned long n: 1; /* No-execute */
unsigned long pp: 2; /* Page protection bits 1:2 */
} Hpte_flags;
typedef struct {
unsigned long pp0: 1; /* Page protection bit 0 */
unsigned long : 1; /* Reserved */
......@@ -148,7 +135,7 @@ typedef struct {
unsigned long flags: 10; /* HPTE flags */
} Hpte_dword1_flags;
typedef struct _HPTE {
typedef struct {
union {
unsigned long dword0;
Hpte_dword0 dw0;
......@@ -156,21 +143,8 @@ typedef struct _HPTE {
union {
unsigned long dword1;
struct {
unsigned long pp0: 1; /* Page protection bit 0 */
unsigned long ts: 1; /* Tag set bit */
unsigned long rpn: 50; /* Real page number */
unsigned long : 2; /* Unused */
unsigned long ac: 1; /* Address compare bit */
unsigned long r: 1; /* Referenced */
unsigned long c: 1; /* Changed */
unsigned long w: 1; /* Write-thru cache mode */
unsigned long i: 1; /* Cache inhibited */
unsigned long m: 1; /* Memory coherence */
unsigned long g: 1; /* Guarded */
unsigned long n: 1; /* No-execute page if N=1 */
unsigned long pp: 2; /* Page protection bit 1:2 */
} dw1;
Hpte_dword1 dw1;
Hpte_dword1_flags flags;
} dw1;
} HPTE;
......@@ -204,6 +178,8 @@ void create_valid_hpte( unsigned long slot, unsigned long vpn,
#define PT_SHIFT (12) /* Page Table */
#define PT_MASK 0x02FF
#define LARGE_PAGE_SHIFT 24
static inline unsigned long hpt_hash(unsigned long vpn, int large)
{
unsigned long vsid;
......@@ -220,18 +196,34 @@ static inline unsigned long hpt_hash(unsigned long vpn, int large)
return (vsid & 0x7fffffffff) ^ page;
}
#define PG_SHIFT (12) /* Page Entry */
static inline void _tlbie(unsigned long va, int large)
{
asm volatile("ptesync": : :"memory");
extern __inline__ void _tlbie( unsigned long va )
if (large) {
asm volatile("clrldi %0,%0,16\n\
tlbie %0,1" : : "r"(va) : "memory");
} else {
asm volatile("clrldi %0,%0,16\n\
tlbie %0,0" : : "r"(va) : "memory");
}
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
static inline void _tlbiel(unsigned long va, int large)
{
__asm__ __volatile__ ( " \n\
clrldi %0,%0,16 \n\
ptesync \n\
tlbie %0 \n\
eieio \n\
tlbsync \n\
ptesync"
: : "r" (va) : "memory" );
asm volatile("ptesync": : :"memory");
if (large) {
asm volatile("clrldi %0,%0,16\n\
tlbiel %0,1" : : "r"(va) : "memory");
} else {
asm volatile("clrldi %0,%0,16\n\
tlbiel %0,0" : : "r"(va) : "memory");
}
asm volatile("ptesync": : :"memory");
}
#endif /* __ASSEMBLY__ */
......
......@@ -7,6 +7,7 @@
*/
#ifndef __ASSEMBLY__
#include <linux/threads.h>
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
......@@ -93,13 +94,15 @@
#define _PAGE_WRITETHRU 0x040UL /* W: cache write-through */
#define _PAGE_DIRTY 0x080UL /* C: page changed */
#define _PAGE_ACCESSED 0x100UL /* R: page referenced */
#if 0
#define _PAGE_HPTENOIX 0x200UL /* software: pte HPTE slot unknown */
#endif
#define _PAGE_HASHPTE 0x400UL /* software: pte has an associated HPTE */
#define _PAGE_EXEC 0x800UL /* software: i-cache coherence required */
#define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */
#define _PAGE_GROUP_IX 0x7000UL /* software: HPTE index within group */
/* Bits 0x7000 identify the index within an HPT Group */
#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_HPTENOIX | _PAGE_SECONDARY | _PAGE_GROUP_IX)
#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
/* PAGE_MASK gives the right answer below, but only by accident */
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
......@@ -397,6 +400,7 @@ extern void paging_init(void);
* as entries are faulted into the hash table by the low-level
* data/instruction access exception handlers.
*/
#if 0
/*
* We won't be able to use update_mmu_cache to update the
* hardware page table because we need to update the pte
......@@ -404,9 +408,29 @@ extern void paging_init(void);
* its value.
*/
#define update_mmu_cache(vma, addr, pte) do { } while (0)
#else
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
* We use it to put a corresponding HPTE into the hash table
* ahead of time, instead of waiting for the inevitable extra
* hash-table miss exception.
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
#endif
extern void flush_hash_segments(unsigned low_vsid, unsigned high_vsid);
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte);
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local);
void flush_hash_range(unsigned long context, unsigned long number, int local);
/* TLB flush batching */
#define MAX_BATCH_FLUSH 128
struct tlb_batch_data {
pte_t pte;
unsigned long addr;
};
extern struct tlb_batch_data tlb_batch_array[NR_CPUS][MAX_BATCH_FLUSH];
/* Encode and de-code a swap entry */
#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment