Commit 121f0a4e authored by David S. Miller's avatar David S. Miller

[SPARC64]: Implement tlb flush batching just like ppc64.

Signed-off-by: default avatarDavid S. Miller <davem@redhat.com>
parent 2ae23d29
......@@ -152,7 +152,12 @@ __handle_signal:
.globl rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
rtrap_irq:
rtrap_clr_l6: clr %l6
rtrap: ldub [%g6 + TI_CPU], %l0
rtrap:
#if 1
call tlb_batch_rtrap_debug
add %sp, PTREGS_OFF, %o0
#endif
ldub [%g6 + TI_CPU], %l0
sethi %hi(irq_stat), %l2 ! &softirq_active
or %l2, %lo(irq_stat), %l2 ! &softirq_active
irqsz_patchme: sllx %l0, 0, %l0
......
......@@ -35,6 +35,7 @@
#include <asm/uaccess.h>
#include <asm/timer.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
extern int linux_num_cpus;
extern void calibrate_delay(void);
......@@ -635,9 +636,8 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
}
}
extern unsigned long xcall_flush_tlb_page;
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_range;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
extern unsigned long xcall_flush_tlb_all_spitfire;
extern unsigned long xcall_flush_tlb_all_cheetah;
......@@ -835,7 +835,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
int cpu = get_cpu();
if (atomic_read(&mm->mm_users) == 1) {
/* See smp_flush_tlb_page for info about this. */
mm->cpu_vm_mask = cpumask_of_cpu(cpu);
goto local_flush_and_out;
}
......@@ -851,27 +850,40 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
}
}
void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
unsigned long end)
void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
{
u32 ctx = CTX_HWBITS(mm->context);
int cpu = get_cpu();
start &= PAGE_MASK;
end = PAGE_ALIGN(end);
if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
mm->cpu_vm_mask = cpumask_of_cpu(cpu);
goto local_flush_and_out;
} else {
/* This optimization is not valid. Normally
* we will be holding the page_table_lock, but
* there is an exception which is copy_page_range()
* when forking. The lock is held during the individual
* page table updates in the parent, but not at the
* top level, which is where we are invoked.
*/
if (0) {
cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
/* By virtue of running under the mm->page_table_lock,
* and mmu_context.h:switch_mm doing the same, the
* following operation is safe.
*/
if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
goto local_flush_and_out;
}
}
smp_cross_call_masked(&xcall_flush_tlb_range,
ctx, start, end,
smp_cross_call_masked(&xcall_flush_tlb_pending,
ctx, nr, (unsigned long) vaddrs,
mm->cpu_vm_mask);
local_flush_and_out:
__flush_tlb_range(ctx, start, SECONDARY_CONTEXT,
end, PAGE_SIZE, (end-start));
local_flush_and_out:
__flush_tlb_pending(ctx, nr, vaddrs);
put_cpu();
}
......@@ -888,55 +900,6 @@ void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
}
void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page)
{
{
u32 ctx = CTX_HWBITS(mm->context);
int cpu = get_cpu();
page &= PAGE_MASK;
if (mm == current->active_mm &&
atomic_read(&mm->mm_users) == 1) {
/* By virtue of being the current address space, and
* having the only reference to it, the following
* operation is safe.
*
* It would not be a win to perform the xcall tlb
* flush in this case, because even if we switch back
* to one of the other processors in cpu_vm_mask it
* is almost certain that all TLB entries for this
* context will be replaced by the time that happens.
*/
mm->cpu_vm_mask = cpumask_of_cpu(cpu);
goto local_flush_and_out;
} else {
cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
/* By virtue of running under the mm->page_table_lock,
* and mmu_context.h:switch_mm doing the same, the
* following operation is safe.
*/
if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
goto local_flush_and_out;
}
/* OK, we have to actually perform the cross call. Most
* likely this is a cloned mm or kswapd is kicking out pages
* for a task which has run recently on another cpu.
*/
smp_cross_call_masked(&xcall_flush_tlb_page,
ctx, page, 0,
mm->cpu_vm_mask);
if (!cpu_isset(cpu, mm->cpu_vm_mask))
return;
local_flush_and_out:
__flush_tlb_page(ctx, page, SECONDARY_CONTEXT);
put_cpu();
}
}
/* CPU capture. */
/* #define CAPTURE_DEBUG */
extern unsigned long xcall_capture;
......
......@@ -259,7 +259,7 @@ EXPORT_SYMBOL(verify_compat_iovec);
EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(pte_alloc_one_kernel);
EXPORT_SYMBOL(__pte_alloc_one_kernel);
#ifndef CONFIG_SMP
EXPORT_SYMBOL(pgt_quicklists);
#endif
......
......@@ -5,6 +5,6 @@
EXTRA_AFLAGS := -ansi
EXTRA_CFLAGS := -Werror
obj-y := ultra.o fault.o init.o generic.o extable.o
obj-y := ultra.o tlb.o fault.o init.o generic.o extable.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
......@@ -37,8 +37,6 @@
#include <asm/spitfire.h>
#include <asm/sections.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
extern void device_scan(void);
struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
......@@ -252,87 +250,6 @@ void flush_dcache_page(struct page *page)
put_cpu();
}
/* When shared+writable mmaps of files go away, we lose all dirty
* page state, so we have to deal with D-cache aliasing here.
*
* This code relies on the fact that flush_cache_range() is always
* called for an area composed by a single VMA. It also assumes that
* the MM's page_table_lock is held.
*/
static inline void flush_cache_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long address, unsigned long size)
{
unsigned long offset;
pte_t *ptep;
if (pmd_none(*pmd))
return;
ptep = pte_offset_map(pmd, address);
offset = address & ~PMD_MASK;
if (offset + size > PMD_SIZE)
size = PMD_SIZE - offset;
size &= PAGE_MASK;
for (offset = 0; offset < size; ptep++, offset += PAGE_SIZE) {
pte_t pte = *ptep;
if (pte_none(pte))
continue;
if (pte_present(pte) && pte_dirty(pte)) {
struct page *page;
unsigned long pgaddr, uaddr;
unsigned long pfn = pte_pfn(pte);
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
if (PageReserved(page) || !page_mapping(page))
continue;
pgaddr = (unsigned long) page_address(page);
uaddr = address + offset;
if ((pgaddr ^ uaddr) & (1 << 13))
flush_dcache_page_all(mm, page);
}
}
pte_unmap(ptep - 1);
}
static inline void flush_cache_pmd_range(struct mm_struct *mm, pgd_t *dir, unsigned long address, unsigned long size)
{
pmd_t *pmd;
unsigned long end;
if (pgd_none(*dir))
return;
pmd = pmd_offset(dir, address);
end = address + size;
if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
end = ((address + PGDIR_SIZE) & PGDIR_MASK);
do {
flush_cache_pte_range(mm, pmd, address, end - address);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
}
void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
pgd_t *dir = pgd_offset(mm, start);
if (mm == current->mm)
flushw_user();
if (vma->vm_file == NULL ||
((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)))
return;
do {
flush_cache_pmd_range(mm, dir, start, end - start);
start = (start + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (start && (start < end));
}
void flush_icache_range(unsigned long start, unsigned long end)
{
/* Cheetah has coherent I-cache. */
......@@ -1173,7 +1090,7 @@ struct pgtable_cache_struct pgt_quicklists;
#else
#define DC_ALIAS_SHIFT 0
#endif
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
struct page *page;
unsigned long color;
......
/* arch/sparc64/mm/tlb.c
*
* Copyright (C) 2004 David S. Miller <davem@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
/* Heavily inspired by the ppc64 code. */
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) =
{ NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
void flush_tlb_pending(void)
{
struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
if (mp->tlb_nr) {
unsigned long context = mp->mm->context;
if (CTX_VALID(context)) {
#ifdef CONFIG_SMP
smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
&mp->vaddrs[0]);
#else
__flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr,
&mp->vaddrs[0]);
#endif
}
mp->tlb_nr = 0;
}
}
void tlb_batch_rtrap_debug(struct pt_regs *regs)
{
/* If we are returning to userspace and have pending
* tlb batch work on this processor, all hope is lost.
*/
if (!(regs->tstate & TSTATE_PRIV)) {
struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
if (mp->tlb_nr)
BUG();
put_cpu_var(mmu_gathers);
}
}
void tlb_batch_add(pte_t *ptep, pte_t orig)
{
struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
struct page *ptepage;
struct mm_struct *mm;
unsigned long vaddr, nr;
ptepage = virt_to_page(ptep);
mm = (struct mm_struct *) ptepage->mapping;
/* It is more efficient to let flush_tlb_kernel_range()
* handle these cases.
*/
if (mm == &init_mm)
return;
vaddr = ptepage->index +
(((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE);
if (pte_exec(orig))
vaddr |= 0x1UL;
if (pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
struct address_space *mapping;
struct page *page;
if (!pfn_valid(pfn))
goto no_cache_flush;
page = pfn_to_page(pfn);
if (PageReserved(page))
goto no_cache_flush;
/* A real file page? */
mapping = page_mapping(page);
if (!mapping || mapping == &swapper_space)
goto no_cache_flush;
paddr = (unsigned long) page_address(page);
if ((paddr ^ vaddr) & (1 << 13))
flush_dcache_page_all(mm, page);
}
no_cache_flush:
if (mp->tlb_frozen)
return;
nr = mp->tlb_nr;
if (unlikely(nr != 0 && mm != mp->mm)) {
flush_tlb_pending();
nr = 0;
}
if (nr == 0)
mp->mm = mm;
mp->vaddrs[nr] = vaddr;
mp->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
}
void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
{
struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
unsigned long nr = mp->tlb_nr;
long s = start, e = end, vpte_base;
if (mp->tlb_frozen)
return;
/* Nobody should call us with start below VM hole and end above.
* See if it is really true.
*/
BUG_ON(s > e);
#if 0
/* Currently free_pgtables guarantees this. */
s &= PMD_MASK;
e = (e + PMD_SIZE - 1) & PMD_MASK;
#endif
vpte_base = (tlb_type == spitfire ?
VPTE_BASE_SPITFIRE :
VPTE_BASE_CHEETAH);
if (unlikely(nr != 0 && mm != mp->mm)) {
flush_tlb_pending();
nr = 0;
}
if (nr == 0)
mp->mm = mm;
start = vpte_base + (s >> (PAGE_SHIFT - 3));
end = vpte_base + (e >> (PAGE_SHIFT - 3));
while (start < end) {
mp->vaddrs[nr] = start;
mp->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR) {
flush_tlb_pending();
nr = 0;
}
start += PAGE_SIZE;
}
if (nr)
flush_tlb_pending();
}
unsigned long __ptrs_per_pmd(void)
{
if (test_thread_flag(TIF_32BIT))
return (1UL << (32 - (PAGE_SHIFT-3) - PAGE_SHIFT));
return REAL_PTRS_PER_PMD;
}
......@@ -26,25 +26,7 @@
*/
.text
.align 32
.globl __flush_tlb_page, __flush_tlb_mm, __flush_tlb_range
__flush_tlb_page: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=page&PAGE_MASK, %o2=SECONDARY_CONTEXT */
ldxa [%o2] ASI_DMMU, %g2
cmp %g2, %o0
bne,pn %icc, __spitfire_flush_tlb_page_slow
or %o1, 0x10, %g3
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
retl
flush %g6
nop
nop
nop
nop
nop
nop
nop
nop
.globl __flush_tlb_mm
__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0
......@@ -63,84 +45,31 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
nop
nop
__flush_tlb_range: /* %o0=(ctx&TAG_CONTEXT_BITS), %o1=start&PAGE_MASK, %o2=SECONDARY_CONTEXT,
* %o3=end&PAGE_MASK, %o4=PAGE_SIZE, %o5=(end - start)
*/
#define TLB_MAGIC 207 /* Students, do you know how I calculated this? -DaveM */
cmp %o5, %o4
bleu,pt %xcc, __flush_tlb_page
srlx %o5, PAGE_SHIFT, %g5
cmp %g5, TLB_MAGIC
bgeu,pn %icc, __spitfire_flush_tlb_range_constant_time
or %o1, 0x10, %g5
ldxa [%o2] ASI_DMMU, %g2
cmp %g2, %o0
__spitfire_flush_tlb_range_page_by_page:
bne,pn %icc, __spitfire_flush_tlb_range_pbp_slow
sub %o5, %o4, %o5
1: stxa %g0, [%g5 + %o5] ASI_DMMU_DEMAP
stxa %g0, [%g5 + %o5] ASI_IMMU_DEMAP
brnz,pt %o5, 1b
sub %o5, %o4, %o5
retl
flush %g6
__spitfire_flush_tlb_range_constant_time: /* %o0=ctx, %o1=start, %o3=end */
rdpr %pstate, %g1
wrpr %g1, PSTATE_IE, %pstate
mov TLB_TAG_ACCESS, %g3
mov ((SPITFIRE_HIGHEST_LOCKED_TLBENT-1) << 3), %g2
/* Spitfire Errata #32 workaround. */
mov 0x8, %o4
stxa %g0, [%o4] ASI_DMMU
flush %g6
1: ldxa [%g2] ASI_ITLB_TAG_READ, %o4
and %o4, TAG_CONTEXT_BITS, %o5
cmp %o5, %o0
bne,pt %icc, 2f
andn %o4, TAG_CONTEXT_BITS, %o4
cmp %o4, %o1
blu,pt %xcc, 2f
cmp %o4, %o3
blu,pn %xcc, 4f
2: ldxa [%g2] ASI_DTLB_TAG_READ, %o4
and %o4, TAG_CONTEXT_BITS, %o5
cmp %o5, %o0
andn %o4, TAG_CONTEXT_BITS, %o4
bne,pt %icc, 3f
cmp %o4, %o1
blu,pt %xcc, 3f
cmp %o4, %o3
blu,pn %xcc, 5f
nop
3: brnz,pt %g2, 1b
sub %g2, (1 << 3), %g2
retl
wrpr %g1, 0x0, %pstate
4: stxa %g0, [%g3] ASI_IMMU
stxa %g0, [%g2] ASI_ITLB_DATA_ACCESS
flush %g6
/* Spitfire Errata #32 workaround. */
mov 0x8, %o4
stxa %g0, [%o4] ASI_DMMU
flush %g6
ba,pt %xcc, 2b
.align 32
.globl __flush_tlb_pending
__flush_tlb_pending:
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g5
sllx %o1, 3, %o1
andn %g5, PSTATE_IE, %g2
wrpr %g2, %pstate
mov SECONDARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
stxa %o0, [%o4] ASI_DMMU
1: sub %o1, (1 << 3), %o1
ldx [%o2 + %o1], %o3
andcc %o3, 1, %g0
be,pn %icc, 2f
andn %o3, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
brnz,pt %o1, 1b
nop
5: stxa %g0, [%g3] ASI_DMMU
stxa %g0, [%g2] ASI_DTLB_DATA_ACCESS
flush %g6
/* Spitfire Errata #32 workaround. */
mov 0x8, %o4
stxa %g0, [%o4] ASI_DMMU
stxa %g2, [%o2] ASI_DMMU
flush %g6
ba,pt %xcc, 3b
nop
retl
wrpr %g5, 0x0, %pstate
.align 32
.globl __flush_tlb_kernel_range
......@@ -171,33 +100,6 @@ __spitfire_flush_tlb_mm_slow:
retl
wrpr %g1, 0, %pstate
__spitfire_flush_tlb_page_slow:
rdpr %pstate, %g1
wrpr %g1, PSTATE_IE, %pstate
stxa %o0, [%o2] ASI_DMMU
stxa %g0, [%g3] ASI_DMMU_DEMAP
stxa %g0, [%g3] ASI_IMMU_DEMAP
flush %g6
stxa %g2, [%o2] ASI_DMMU
flush %g6
retl
wrpr %g1, 0, %pstate
__spitfire_flush_tlb_range_pbp_slow:
rdpr %pstate, %g1
wrpr %g1, PSTATE_IE, %pstate
stxa %o0, [%o2] ASI_DMMU
2: stxa %g0, [%g5 + %o5] ASI_DMMU_DEMAP
stxa %g0, [%g5 + %o5] ASI_IMMU_DEMAP
brnz,pt %o5, 2b
sub %o5, %o4, %o5
flush %g6
stxa %g2, [%o2] ASI_DMMU
flush %g6
retl
wrpr %g1, 0x0, %pstate
/*
* The following code flushes one page_size worth.
*/
......@@ -356,22 +258,6 @@ __update_mmu_cache: /* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */
ba,a,pt %xcc, __prefill_itlb
/* Cheetah specific versions, patched at boot time. */
__cheetah_flush_tlb_page: /* 14 insns */
rdpr %pstate, %g5
andn %g5, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o2
ldxa [%o2] ASI_DMMU, %g2
stxa %o0, [%o2] ASI_DMMU
stxa %g0, [%o1] ASI_DMMU_DEMAP
stxa %g0, [%o1] ASI_IMMU_DEMAP
stxa %g2, [%o2] ASI_DMMU
flush %g6
wrpr %g0, 0, %tl
retl
wrpr %g5, 0x0, %pstate
__cheetah_flush_tlb_mm: /* 15 insns */
rdpr %pstate, %g5
andn %g5, PSTATE_IE, %g2
......@@ -389,26 +275,29 @@ __cheetah_flush_tlb_mm: /* 15 insns */
retl
wrpr %g5, 0x0, %pstate
__cheetah_flush_tlb_range: /* 20 insns */
cmp %o5, %o4
blu,pt %xcc, 9f
__cheetah_flush_tlb_pending: /* 22 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g5
sllx %o1, 3, %o1
andn %g5, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
mov PRIMARY_CONTEXT, %o2
sub %o5, %o4, %o5
ldxa [%o2] ASI_DMMU, %g2
stxa %o0, [%o2] ASI_DMMU
1: stxa %g0, [%o1 + %o5] ASI_DMMU_DEMAP
stxa %g0, [%o1 + %o5] ASI_IMMU_DEMAP
mov PRIMARY_CONTEXT, %o4
ldxa [%o4] ASI_DMMU, %g2
stxa %o0, [%o4] ASI_DMMU
1: sub %o1, (1 << 3), %o1
ldx [%o2 + %o1], %o3
andcc %o3, 1, %g0
be,pn %icc, 2f
andn %o3, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
brnz,pt %o1, 1b
membar #Sync
brnz,pt %o5, 1b
sub %o5, %o4, %o5
stxa %g2, [%o2] ASI_DMMU
stxa %g2, [%o4] ASI_DMMU
flush %g6
wrpr %g0, 0, %tl
9: retl
retl
wrpr %g5, 0x0, %pstate
flush_dcpage_cheetah: /* 11 insns */
......@@ -439,13 +328,6 @@ cheetah_patch_one:
cheetah_patch_cachetlbops:
save %sp, -128, %sp
sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__cheetah_flush_tlb_page), %o1
or %o1, %lo(__cheetah_flush_tlb_page), %o1
call cheetah_patch_one
mov 14, %o2
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
......@@ -453,12 +335,12 @@ cheetah_patch_cachetlbops:
call cheetah_patch_one
mov 15, %o2
sethi %hi(__flush_tlb_range), %o0
or %o0, %lo(__flush_tlb_range), %o0
sethi %hi(__cheetah_flush_tlb_range), %o1
or %o1, %lo(__cheetah_flush_tlb_range), %o1
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__cheetah_flush_tlb_pending), %o1
or %o1, %lo(__cheetah_flush_tlb_pending), %o1
call cheetah_patch_one
mov 20, %o2
mov 22, %o2
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
......@@ -487,17 +369,7 @@ cheetah_patch_cachetlbops:
* TODO: Make xcall TLB range flushes use the tricks above... -DaveM
*/
.align 32
.globl xcall_flush_tlb_page, xcall_flush_tlb_mm, xcall_flush_tlb_range
xcall_flush_tlb_page:
mov PRIMARY_CONTEXT, %g2
ldxa [%g2] ASI_DMMU, %g3
stxa %g5, [%g2] ASI_DMMU
stxa %g0, [%g1] ASI_DMMU_DEMAP
stxa %g0, [%g1] ASI_IMMU_DEMAP
stxa %g3, [%g2] ASI_DMMU
retry
nop
.globl xcall_flush_tlb_mm
xcall_flush_tlb_mm:
mov PRIMARY_CONTEXT, %g2
mov 0x40, %g4
......@@ -508,34 +380,25 @@ xcall_flush_tlb_mm:
stxa %g3, [%g2] ASI_DMMU
retry
xcall_flush_tlb_range:
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
add %g2, 1, %g2
srlx %g3, PAGE_SHIFT, %g4
cmp %g4, 96
bgu,pn %icc, xcall_flush_tlb_mm
.globl xcall_flush_tlb_pending
xcall_flush_tlb_pending:
/* %g5=context, %g1=nr, %g7=vaddrs[] */
sllx %g1, 3, %g1
mov PRIMARY_CONTEXT, %g4
ldxa [%g4] ASI_DMMU, %g7
sub %g3, %g2, %g3
ldxa [%g4] ASI_DMMU, %g2
stxa %g5, [%g4] ASI_DMMU
nop
nop
nop
1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
1: sub %g1, (1 << 3), %g1
ldx [%g7 + %g1], %g5
andcc %g5, 0x1, %g0
be,pn %icc, 2f
andn %g5, 0x1, %g5
stxa %g0, [%g5] ASI_IMMU_DEMAP
2: stxa %g0, [%g5] ASI_DMMU_DEMAP
brnz,pt %g1, 1b
membar #Sync
brnz,pt %g3, 1b
sub %g3, %g2, %g3
stxa %g7, [%g4] ASI_DMMU
stxa %g2, [%g4] ASI_DMMU
retry
nop
nop
.globl xcall_flush_tlb_kernel_range
xcall_flush_tlb_kernel_range:
......@@ -555,7 +418,6 @@ xcall_flush_tlb_kernel_range:
retry
nop
nop
nop
/* This runs in a very controlled environment, so we do
* not need to worry about BH races etc.
......
......@@ -9,7 +9,8 @@
/* These are the same regardless of whether this is an SMP kernel or not. */
#define flush_cache_mm(__mm) \
do { if ((__mm) == current->mm) flushw_user(); } while(0)
extern void flush_cache_range(struct vm_area_struct *, unsigned long, unsigned long);
#define flush_cache_range(vma, start, end) \
flush_cache_mm((vma)->vm_mm)
#define flush_cache_page(vma, page) \
flush_cache_mm((vma)->vm_mm)
......
......@@ -142,8 +142,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
spin_unlock(&mm->page_table_lock);
}
extern void __flush_tlb_mm(unsigned long, unsigned long);
#define deactivate_mm(tsk,mm) do { } while (0)
/* Activate a new MM instance for the current task. */
......
......@@ -188,14 +188,29 @@ static __inline__ void free_pmd_slow(pmd_t *pmd)
#define pmd_populate(MM,PMD,PTE_PAGE) \
pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
extern pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address);
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte = __pte_alloc_one_kernel(mm, address);
if (pte) {
struct page *page = virt_to_page(pte);
page->mapping = (void *) mm;
page->index = address & PMD_MASK;
}
return pte;
}
static inline struct page *
pte_alloc_one(struct mm_struct *mm, unsigned long addr)
{
pte_t *pte = pte_alloc_one_kernel(mm, addr);
if (pte)
return virt_to_page(pte);
pte_t *pte = __pte_alloc_one_kernel(mm, addr);
if (pte) {
struct page *page = virt_to_page(pte);
page->mapping = (void *) mm;
page->index = addr & PMD_MASK;
return page;
}
return NULL;
}
......@@ -230,8 +245,18 @@ static __inline__ void free_pte_slow(pte_t *pte)
free_page((unsigned long)pte);
}
#define pte_free_kernel(pte) free_pte_fast(pte)
#define pte_free(pte) free_pte_fast(page_address(pte))
static inline void pte_free_kernel(pte_t *pte)
{
virt_to_page(pte)->mapping = NULL;
free_pte_fast(pte);
}
static inline void pte_free(struct page *ptepage)
{
ptepage->mapping = NULL;
free_pte_fast(page_address(ptepage));
}
#define pmd_free(pmd) free_pmd_fast(pmd)
#define pgd_free(pgd) free_pgd_fast(pgd)
#define pgd_alloc(mm) get_pgd_fast()
......
......@@ -67,12 +67,6 @@
#include <linux/sched.h>
/* Certain architectures need to do special things when pte's
* within a page table are directly modified. Thus, the following
* hook is made available.
*/
#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
/* Entries per page directory level. */
#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
......@@ -80,9 +74,12 @@
* is different so we can optimize correctly for 32-bit tasks.
*/
#define REAL_PTRS_PER_PMD (1UL << PMD_BITS)
#define PTRS_PER_PMD ((const int)(test_thread_flag(TIF_32BIT) ? \
(1UL << (32 - (PAGE_SHIFT-3) - PAGE_SHIFT)) : \
(REAL_PTRS_PER_PMD)))
/* This is gross, but unless we do this gcc retests the
* thread flag every interation in pmd traversal loops.
*/
extern unsigned long __ptrs_per_pmd(void) __attribute_const__;
#define PTRS_PER_PMD __ptrs_per_pmd()
/*
* We cannot use the top address range because VPTE table lives there. This
......@@ -273,7 +270,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
((unsigned long) __va((((unsigned long)pgd_val(pgd))<<11UL)))
#define pte_none(pte) (!pte_val(pte))
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
#define pte_clear(pte) (pte_val(*(pte)) = 0UL)
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
#define pmd_present(pmd) (pmd_val(pmd) != 0U)
......@@ -287,7 +283,7 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
* Undefined behaviour if not..
*/
#define pte_read(pte) (pte_val(pte) & _PAGE_READ)
#define pte_exec(pte) pte_read(pte)
#define pte_exec(pte) (pte_val(pte) & _PAGE_EXEC)
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED)
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
......@@ -329,6 +325,20 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
#define pte_unmap(pte) do { } while (0)
#define pte_unmap_nested(pte) do { } while (0)
/* Actual page table PTE updates. */
extern void tlb_batch_add(pte_t *ptep, pte_t orig);
static inline void set_pte(pte_t *ptep, pte_t pte)
{
pte_t orig = *ptep;
*ptep = pte;
if (pte_present(orig))
tlb_batch_add(ptep, orig);
}
#define pte_clear(ptep) set_pte((ptep), __pte(0UL))
extern pgd_t swapper_pg_dir[1];
/* These do nothing with the way I have things setup. */
......
#ifndef _SPARC64_TLB_H
#define _SPARC64_TLB_H
#define tlb_flush(tlb) \
do { if ((tlb)->fullmm) \
flush_tlb_mm((tlb)->mm);\
} while (0)
#include <linux/config.h>
#include <linux/swap.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#define tlb_start_vma(tlb, vma) \
do { if (!(tlb)->fullmm) \
flush_cache_range(vma, vma->vm_start, vma->vm_end); \
} while (0)
#define TLB_BATCH_NR 192
#define tlb_end_vma(tlb, vma) \
do { if (!(tlb)->fullmm) \
flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
} while (0)
/*
* For UP we don't need to worry about TLB flush
* and page free order so much..
*/
#ifdef CONFIG_SMP
#define FREE_PTE_NR 506
#define tlb_fast_mode(bp) ((bp)->pages_nr == ~0U)
#else
#define FREE_PTE_NR 1
#define tlb_fast_mode(bp) 1
#endif
#define __tlb_remove_tlb_entry(tlb, ptep, address) \
do { } while (0)
struct mmu_gather {
struct mm_struct *mm;
unsigned int pages_nr;
unsigned int need_flush;
unsigned int tlb_frozen;
unsigned int tlb_nr;
unsigned long freed;
unsigned long vaddrs[TLB_BATCH_NR];
struct page *pages[FREE_PTE_NR];
};
#include <asm-generic/tlb.h>
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
#define __pmd_free_tlb(tlb, pmd) pmd_free(pmd)
#define __pte_free_tlb(tlb, pte) pte_free(pte)
#ifdef CONFIG_SMP
extern void smp_flush_tlb_pending(struct mm_struct *,
unsigned long, unsigned long *);
#endif
extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
extern void flush_tlb_pending(void);
static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
struct mmu_gather *mp = &per_cpu(mmu_gathers, smp_processor_id());
BUG_ON(mp->tlb_nr);
mp->mm = mm;
mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
mp->tlb_frozen = full_mm_flush;
mp->freed = 0;
return mp;
}
static inline void tlb_flush_mmu(struct mmu_gather *mp)
{
if (mp->need_flush) {
mp->need_flush = 0;
if (!tlb_fast_mode(mp)) {
free_pages_and_swap_cache(mp->pages, mp->pages_nr);
mp->pages_nr = 0;
}
}
}
#ifdef CONFIG_SMP
extern void smp_flush_tlb_mm(struct mm_struct *mm);
#define do_flush_tlb_mm(mm) smp_flush_tlb_mm(mm)
#else
#define do_flush_tlb_mm(mm) __flush_tlb_mm(CTX_HWBITS(mm), SECONDARY_CONTEXT)
#endif
static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
{
unsigned long freed = mp->freed;
struct mm_struct *mm = mp->mm;
unsigned long rss = mm->rss;
if (rss < freed)
freed = rss;
mm->rss = rss - freed;
tlb_flush_mmu(mp);
if (mp->tlb_frozen) {
unsigned long context = mm->context;
if (CTX_VALID(context))
do_flush_tlb_mm(mm);
mp->tlb_frozen = 0;
} else
flush_tlb_pending();
/* keep the page table cache within bounds */
check_pgt_cache();
}
static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp)
{
return mp->tlb_frozen;
}
static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
{
mp->need_flush = 1;
if (tlb_fast_mode(mp)) {
free_page_and_swap_cache(page);
return;
}
mp->pages[mp->pages_nr++] = page;
if (mp->pages_nr >= FREE_PTE_NR)
tlb_flush_mmu(mp);
}
#define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
#define pte_free_tlb(mp,ptepage) pte_free(ptepage)
#define pmd_free_tlb(mp,pmdp) pmd_free(pmdp)
#define tlb_migrate_finish(mm) do { } while (0)
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#endif /* _SPARC64_TLB_H */
......@@ -7,11 +7,14 @@
/* TLB flush operations. */
extern void flush_tlb_pending(void);
#define flush_tlb_range(vma,start,end) \
do { (void)(start); flush_tlb_pending(); } while (0)
#define flush_tlb_page(vma,addr) flush_tlb_pending()
#define flush_tlb_mm(mm) flush_tlb_pending()
extern void __flush_tlb_all(void);
extern void __flush_tlb_mm(unsigned long context, unsigned long r);
extern void __flush_tlb_range(unsigned long context, unsigned long start,
unsigned long r, unsigned long end,
unsigned long pgsz, unsigned long size);
extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
......@@ -22,89 +25,17 @@ extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
#define flush_tlb_kernel_range(start,end) \
__flush_tlb_kernel_range(start,end)
#define flush_tlb_mm(__mm) \
do { if (CTX_VALID((__mm)->context)) \
__flush_tlb_mm(CTX_HWBITS((__mm)->context), SECONDARY_CONTEXT); \
} while (0)
#define flush_tlb_range(__vma, start, end) \
do { if (CTX_VALID((__vma)->vm_mm->context)) { \
unsigned long __start = (start)&PAGE_MASK; \
unsigned long __end = PAGE_ALIGN(end); \
__flush_tlb_range(CTX_HWBITS((__vma)->vm_mm->context), __start, \
SECONDARY_CONTEXT, __end, PAGE_SIZE, \
(__end - __start)); \
} \
} while (0)
#define flush_tlb_vpte_range(__mm, start, end) \
do { if (CTX_VALID((__mm)->context)) { \
unsigned long __start = (start)&PAGE_MASK; \
unsigned long __end = PAGE_ALIGN(end); \
__flush_tlb_range(CTX_HWBITS((__mm)->context), __start, \
SECONDARY_CONTEXT, __end, PAGE_SIZE, \
(__end - __start)); \
} \
} while (0)
#define flush_tlb_page(vma, page) \
do { struct mm_struct *__mm = (vma)->vm_mm; \
if (CTX_VALID(__mm->context)) \
__flush_tlb_page(CTX_HWBITS(__mm->context), (page)&PAGE_MASK, \
SECONDARY_CONTEXT); \
} while (0)
#define flush_tlb_vpte_page(mm, addr) \
do { struct mm_struct *__mm = (mm); \
if (CTX_VALID(__mm->context)) \
__flush_tlb_page(CTX_HWBITS(__mm->context), (addr)&PAGE_MASK, \
SECONDARY_CONTEXT); \
} while (0)
#else /* CONFIG_SMP */
extern void smp_flush_tlb_all(void);
extern void smp_flush_tlb_mm(struct mm_struct *mm);
extern void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
unsigned long end);
extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page);
#define flush_tlb_all() smp_flush_tlb_all()
#define flush_tlb_mm(mm) smp_flush_tlb_mm(mm)
#define flush_tlb_range(vma, start, end) \
smp_flush_tlb_range((vma)->vm_mm, start, end)
#define flush_tlb_vpte_range(mm, start, end) \
smp_flush_tlb_range(mm, start, end)
#define flush_tlb_kernel_range(start, end) \
smp_flush_tlb_kernel_range(start, end)
#define flush_tlb_page(vma, page) \
smp_flush_tlb_page((vma)->vm_mm, page)
#define flush_tlb_vpte_page(mm, page) \
smp_flush_tlb_page((mm), page)
#endif /* ! CONFIG_SMP */
static __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
/* Note the signed type. */
long s = start, e = end, vpte_base;
/* Nobody should call us with start below VM hole and end above.
See if it is really true. */
BUG_ON(s > e);
#if 0
/* Currently free_pgtables guarantees this. */
s &= PMD_MASK;
e = (e + PMD_SIZE - 1) & PMD_MASK;
#endif
vpte_base = (tlb_type == spitfire ?
VPTE_BASE_SPITFIRE :
VPTE_BASE_CHEETAH);
flush_tlb_vpte_range(mm,
vpte_base + (s >> (PAGE_SHIFT - 3)),
vpte_base + (e >> (PAGE_SHIFT - 3)));
}
extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long);
#endif /* _SPARC64_TLBFLUSH_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment