Commit 930bff88 authored by Thomas Bogendoerfer's avatar Thomas Bogendoerfer Committed by Ralf Baechle

[MIPS] IP28: added cache barrier to assembly routines

IP28 needs special treatment to avoid speculative accesses. gcc
takes care for .c code, but for assembly code we need to do it
manually.

This is taken from Peter Fuersts IP28 patches.
Signed-off-by: default avatarThomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 2064ba23
...@@ -199,6 +199,7 @@ FEXPORT(__copy_user) ...@@ -199,6 +199,7 @@ FEXPORT(__copy_user)
*/ */
#define rem t8 #define rem t8
R10KCBARRIER(0(ra))
/* /*
* The "issue break"s below are very approximate. * The "issue break"s below are very approximate.
* Issue delays for dcache fills will perturb the schedule, as will * Issue delays for dcache fills will perturb the schedule, as will
...@@ -231,6 +232,7 @@ both_aligned: ...@@ -231,6 +232,7 @@ both_aligned:
PREF( 1, 3*32(dst) ) PREF( 1, 3*32(dst) )
.align 4 .align 4
1: 1:
R10KCBARRIER(0(ra))
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), l_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
...@@ -272,6 +274,7 @@ EXC( LOAD t2, UNIT(2)(src), l_exc_copy) ...@@ -272,6 +274,7 @@ EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
R10KCBARRIER(0(ra))
EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
...@@ -287,6 +290,7 @@ less_than_4units: ...@@ -287,6 +290,7 @@ less_than_4units:
beq rem, len, copy_bytes beq rem, len, copy_bytes
nop nop
1: 1:
R10KCBARRIER(0(ra))
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), l_exc)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
...@@ -334,6 +338,7 @@ EXC( LDFIRST t3, FIRST(0)(src), l_exc) ...@@ -334,6 +338,7 @@ EXC( LDFIRST t3, FIRST(0)(src), l_exc)
EXC( LDREST t3, REST(0)(src), l_exc_copy) EXC( LDREST t3, REST(0)(src), l_exc_copy)
SUB t2, t2, t1 # t2 = number of bytes copied SUB t2, t2, t1 # t2 = number of bytes copied
xor match, t0, t1 xor match, t0, t1
R10KCBARRIER(0(ra))
EXC( STFIRST t3, FIRST(0)(dst), s_exc) EXC( STFIRST t3, FIRST(0)(dst), s_exc)
beq len, t2, done beq len, t2, done
SUB len, len, t2 SUB len, len, t2
...@@ -354,6 +359,7 @@ src_unaligned_dst_aligned: ...@@ -354,6 +359,7 @@ src_unaligned_dst_aligned:
* It's OK to load FIRST(N+1) before REST(N) because the two addresses * It's OK to load FIRST(N+1) before REST(N) because the two addresses
* are to the same unit (unless src is aligned, but it's not). * are to the same unit (unless src is aligned, but it's not).
*/ */
R10KCBARRIER(0(ra))
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), l_exc)
EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
...@@ -384,6 +390,7 @@ cleanup_src_unaligned: ...@@ -384,6 +390,7 @@ cleanup_src_unaligned:
beq rem, len, copy_bytes beq rem, len, copy_bytes
nop nop
1: 1:
R10KCBARRIER(0(ra))
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), l_exc)
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), l_exc_copy)
ADD src, src, NBYTES ADD src, src, NBYTES
...@@ -399,6 +406,7 @@ copy_bytes_checklen: ...@@ -399,6 +406,7 @@ copy_bytes_checklen:
nop nop
copy_bytes: copy_bytes:
/* 0 < len < NBYTES */ /* 0 < len < NBYTES */
R10KCBARRIER(0(ra))
#define COPY_BYTE(N) \ #define COPY_BYTE(N) \
EXC( lb t0, N(src), l_exc); \ EXC( lb t0, N(src), l_exc); \
SUB len, len, 1; \ SUB len, len, 1; \
...@@ -528,6 +536,7 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ ...@@ -528,6 +536,7 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
ADD a1, a2 # src = src + len ADD a1, a2 # src = src + len
r_end_bytes: r_end_bytes:
R10KCBARRIER(0(ra))
lb t0, -1(a1) lb t0, -1(a1)
SUB a2, a2, 0x1 SUB a2, a2, 0x1
sb t0, -1(a0) sb t0, -1(a0)
...@@ -542,6 +551,7 @@ r_out: ...@@ -542,6 +551,7 @@ r_out:
move a2, zero move a2, zero
r_end_bytes_up: r_end_bytes_up:
R10KCBARRIER(0(ra))
lb t0, (a1) lb t0, (a1)
SUB a2, a2, 0x1 SUB a2, a2, 0x1
sb t0, (a0) sb t0, (a0)
......
...@@ -86,6 +86,7 @@ FEXPORT(__bzero) ...@@ -86,6 +86,7 @@ FEXPORT(__bzero)
.set at .set at
#endif #endif
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */
#endif #endif
...@@ -103,11 +104,13 @@ FEXPORT(__bzero) ...@@ -103,11 +104,13 @@ FEXPORT(__bzero)
PTR_ADDU t1, a0 /* end address */ PTR_ADDU t1, a0 /* end address */
.set reorder .set reorder
1: PTR_ADDIU a0, 64 1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra))
f_fill64 a0, -64, a1, fwd_fixup f_fill64 a0, -64, a1, fwd_fixup
bne t1, a0, 1b bne t1, a0, 1b
.set noreorder .set noreorder
memset_partial: memset_partial:
R10KCBARRIER(0(ra))
PTR_LA t1, 2f /* where to start */ PTR_LA t1, 2f /* where to start */
#if LONGSIZE == 4 #if LONGSIZE == 4
PTR_SUBU t1, t0 PTR_SUBU t1, t0
...@@ -129,6 +132,7 @@ memset_partial: ...@@ -129,6 +132,7 @@ memset_partial:
beqz a2, 1f beqz a2, 1f
PTR_ADDU a0, a2 /* What's left */ PTR_ADDU a0, a2 /* What's left */
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), last_fixup) EX(LONG_S_R, a1, -1(a0), last_fixup)
#endif #endif
...@@ -143,6 +147,7 @@ small_memset: ...@@ -143,6 +147,7 @@ small_memset:
PTR_ADDU t1, a0, a2 PTR_ADDU t1, a0, a2
1: PTR_ADDIU a0, 1 /* fill bytewise */ 1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
bne t1, a0, 1b bne t1, a0, 1b
sb a1, -1(a0) sb a1, -1(a0)
......
...@@ -38,6 +38,7 @@ FEXPORT(__strncpy_from_user_nocheck_asm) ...@@ -38,6 +38,7 @@ FEXPORT(__strncpy_from_user_nocheck_asm)
.set noreorder .set noreorder
1: EX(lbu, t0, (v1), fault) 1: EX(lbu, t0, (v1), fault)
PTR_ADDIU v1, 1 PTR_ADDIU v1, 1
R10KCBARRIER(0(ra))
beqz t0, 2f beqz t0, 2f
sb t0, (a0) sb t0, (a0)
PTR_ADDIU v0, 1 PTR_ADDIU v0, 1
......
...@@ -398,4 +398,12 @@ symbol = value ...@@ -398,4 +398,12 @@ symbol = value
#define SSNOP sll zero, zero, 1 #define SSNOP sll zero, zero, 1
#ifdef CONFIG_SGI_IP28
/* Inhibit speculative stores to volatile (e.g.DMA) or invalid addresses. */
#include <asm/cacheops.h>
#define R10KCBARRIER(addr) cache Cache_Barrier, addr;
#else
#define R10KCBARRIER(addr)
#endif
#endif /* __ASM_ASM_H */ #endif /* __ASM_ASM_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment