Commit 04ed7d9c authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:
 "Several sparc64 bug fixes here:

  1) Make the user copy routines on sparc64 return a properly accurate
     residual length when an exception occurs.

  2) We can get enormous kernel TLB range flush requests from vmalloc
     unmaps, so handle these more gracefully by doing full flushes
     instead of going page-by-page.

  3) Cope properly with negative branch offsets in sparc jump-label
     support, from James Clarke.

  4) Some old-style decl GCC warning fixups from Tobias Klauser"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Handle extremely large kernel TLB range flushes more gracefully.
  sparc64: Fix illegal relative branches in hypervisor patched TLB cross-call code.
  sparc64: Fix instruction count in comment for __hypervisor_flush_tlb_pending.
  sparc64: Handle extremely large kernel TSB range flushes sanely.
  sparc: Handle negative offsets in arch_jump_label_transform
  sparc64: Fix illegal relative branches in hypervisor patched TLB code.
  sparc64: Delete now unused user copy fixup functions.
  sparc64: Delete now unused user copy assembler helpers.
  sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.
  sparc64: Convert NG2copy_{from,to}_user to accurate exception reporting.
  sparc64: Convert NGcopy_{from,to}_user to accurate exception reporting.
  sparc64: Convert NG4copy_{from,to}_user to accurate exception reporting.
  sparc64: Convert U1copy_{from,to}_user to accurate exception reporting.
  sparc64: Convert GENcopy_{from,to}_user to accurate exception reporting.
  sparc64: Convert copy_in_user to accurate exception reporting.
  sparc64: Prepare to move to more saner user copy exception handling.
  sparc64: Delete __ret_efault.
  sparc32: Fix old style declaration GCC warnings
  sparc64: Fix old style declaration GCC warnings
  sparc64: Setup a scheduling domain for highest level cache.
parents 2a26d99b a74ad5e6
...@@ -24,9 +24,10 @@ typedef struct { ...@@ -24,9 +24,10 @@ typedef struct {
unsigned int icache_line_size; unsigned int icache_line_size;
unsigned int ecache_size; unsigned int ecache_size;
unsigned int ecache_line_size; unsigned int ecache_line_size;
unsigned short sock_id; unsigned short sock_id; /* physical package */
unsigned short core_id; unsigned short core_id;
int proc_id; unsigned short max_cache_id; /* groupings of highest shared cache */
unsigned short proc_id; /* strand (aka HW thread) id */
} cpuinfo_sparc; } cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
......
...@@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) ...@@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
*(volatile __u32 *)&lp->lock = ~0U; *(volatile __u32 *)&lp->lock = ~0U;
} }
static void inline arch_write_unlock(arch_rwlock_t *lock) static inline void arch_write_unlock(arch_rwlock_t *lock)
{ {
__asm__ __volatile__( __asm__ __volatile__(
" st %%g0, [%0]" " st %%g0, [%0]"
......
...@@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla ...@@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla
/* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
static void inline arch_read_lock(arch_rwlock_t *lock) static inline void arch_read_lock(arch_rwlock_t *lock)
{ {
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
...@@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock) ...@@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock)
: "memory"); : "memory");
} }
static int inline arch_read_trylock(arch_rwlock_t *lock) static inline int arch_read_trylock(arch_rwlock_t *lock)
{ {
int tmp1, tmp2; int tmp1, tmp2;
...@@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock) ...@@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock)
return tmp1; return tmp1;
} }
static void inline arch_read_unlock(arch_rwlock_t *lock) static inline void arch_read_unlock(arch_rwlock_t *lock)
{ {
unsigned long tmp1, tmp2; unsigned long tmp1, tmp2;
...@@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock) ...@@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock)
: "memory"); : "memory");
} }
static void inline arch_write_lock(arch_rwlock_t *lock) static inline void arch_write_lock(arch_rwlock_t *lock)
{ {
unsigned long mask, tmp1, tmp2; unsigned long mask, tmp1, tmp2;
...@@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock) ...@@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock)
: "memory"); : "memory");
} }
static void inline arch_write_unlock(arch_rwlock_t *lock) static inline void arch_write_unlock(arch_rwlock_t *lock)
{ {
__asm__ __volatile__( __asm__ __volatile__(
" stw %%g0, [%0]" " stw %%g0, [%0]"
...@@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock) ...@@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock)
: "memory"); : "memory");
} }
static int inline arch_write_trylock(arch_rwlock_t *lock) static inline int arch_write_trylock(arch_rwlock_t *lock)
{ {
unsigned long mask, tmp1, tmp2, result; unsigned long mask, tmp1, tmp2, result;
......
...@@ -44,14 +44,20 @@ int __node_distance(int, int); ...@@ -44,14 +44,20 @@ int __node_distance(int, int);
#define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id)
#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu])
#define topology_core_cache_cpumask(cpu) (&cpu_core_sib_cache_map[cpu])
#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
extern cpumask_t cpu_core_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS];
extern cpumask_t cpu_core_sib_map[NR_CPUS]; extern cpumask_t cpu_core_sib_map[NR_CPUS];
extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
/**
* Return cores that shares the last level cache.
*/
static inline const struct cpumask *cpu_coregroup_mask(int cpu) static inline const struct cpumask *cpu_coregroup_mask(int cpu)
{ {
return &cpu_core_map[cpu]; return &cpu_core_sib_cache_map[cpu];
} }
#endif /* _ASM_SPARC64_TOPOLOGY_H */ #endif /* _ASM_SPARC64_TOPOLOGY_H */
...@@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si ...@@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si
return 1; return 1;
} }
void __ret_efault(void);
void __retl_efault(void); void __retl_efault(void);
/* Uh, these should become the main single-value transfer routines.. /* Uh, these should become the main single-value transfer routines..
...@@ -189,55 +188,34 @@ int __get_user_bad(void); ...@@ -189,55 +188,34 @@ int __get_user_bad(void);
unsigned long __must_check ___copy_from_user(void *to, unsigned long __must_check ___copy_from_user(void *to,
const void __user *from, const void __user *from,
unsigned long size); unsigned long size);
unsigned long copy_from_user_fixup(void *to, const void __user *from,
unsigned long size);
static inline unsigned long __must_check static inline unsigned long __must_check
copy_from_user(void *to, const void __user *from, unsigned long size) copy_from_user(void *to, const void __user *from, unsigned long size)
{ {
unsigned long ret;
check_object_size(to, size, false); check_object_size(to, size, false);
ret = ___copy_from_user(to, from, size); return ___copy_from_user(to, from, size);
if (unlikely(ret))
ret = copy_from_user_fixup(to, from, size);
return ret;
} }
#define __copy_from_user copy_from_user #define __copy_from_user copy_from_user
unsigned long __must_check ___copy_to_user(void __user *to, unsigned long __must_check ___copy_to_user(void __user *to,
const void *from, const void *from,
unsigned long size); unsigned long size);
unsigned long copy_to_user_fixup(void __user *to, const void *from,
unsigned long size);
static inline unsigned long __must_check static inline unsigned long __must_check
copy_to_user(void __user *to, const void *from, unsigned long size) copy_to_user(void __user *to, const void *from, unsigned long size)
{ {
unsigned long ret;
check_object_size(from, size, true); check_object_size(from, size, true);
ret = ___copy_to_user(to, from, size); return ___copy_to_user(to, from, size);
if (unlikely(ret))
ret = copy_to_user_fixup(to, from, size);
return ret;
} }
#define __copy_to_user copy_to_user #define __copy_to_user copy_to_user
unsigned long __must_check ___copy_in_user(void __user *to, unsigned long __must_check ___copy_in_user(void __user *to,
const void __user *from, const void __user *from,
unsigned long size); unsigned long size);
unsigned long copy_in_user_fixup(void __user *to, void __user *from,
unsigned long size);
static inline unsigned long __must_check static inline unsigned long __must_check
copy_in_user(void __user *to, void __user *from, unsigned long size) copy_in_user(void __user *to, void __user *from, unsigned long size)
{ {
unsigned long ret = ___copy_in_user(to, from, size); return ___copy_in_user(to, from, size);
if (unlikely(ret))
ret = copy_in_user_fixup(to, from, size);
return ret;
} }
#define __copy_in_user copy_in_user #define __copy_in_user copy_in_user
......
...@@ -926,48 +926,11 @@ tlb_type: .word 0 /* Must NOT end up in BSS */ ...@@ -926,48 +926,11 @@ tlb_type: .word 0 /* Must NOT end up in BSS */
EXPORT_SYMBOL(tlb_type) EXPORT_SYMBOL(tlb_type)
.section ".fixup",#alloc,#execinstr .section ".fixup",#alloc,#execinstr
.globl __ret_efault, __retl_efault, __ret_one, __retl_one
ENTRY(__ret_efault)
ret
restore %g0, -EFAULT, %o0
ENDPROC(__ret_efault)
EXPORT_SYMBOL(__ret_efault)
ENTRY(__retl_efault) ENTRY(__retl_efault)
retl retl
mov -EFAULT, %o0 mov -EFAULT, %o0
ENDPROC(__retl_efault) ENDPROC(__retl_efault)
ENTRY(__retl_one)
retl
mov 1, %o0
ENDPROC(__retl_one)
ENTRY(__retl_one_fp)
VISExitHalf
retl
mov 1, %o0
ENDPROC(__retl_one_fp)
ENTRY(__ret_one_asi)
wr %g0, ASI_AIUS, %asi
ret
restore %g0, 1, %o0
ENDPROC(__ret_one_asi)
ENTRY(__retl_one_asi)
wr %g0, ASI_AIUS, %asi
retl
mov 1, %o0
ENDPROC(__retl_one_asi)
ENTRY(__retl_one_asi_fp)
wr %g0, ASI_AIUS, %asi
VISExitHalf
retl
mov 1, %o0
ENDPROC(__retl_one_asi_fp)
ENTRY(__retl_o1) ENTRY(__retl_o1)
retl retl
mov %o1, %o0 mov %o1, %o0
......
...@@ -13,19 +13,30 @@ ...@@ -13,19 +13,30 @@
void arch_jump_label_transform(struct jump_entry *entry, void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type) enum jump_label_type type)
{ {
u32 val;
u32 *insn = (u32 *) (unsigned long) entry->code; u32 *insn = (u32 *) (unsigned long) entry->code;
u32 val;
if (type == JUMP_LABEL_JMP) { if (type == JUMP_LABEL_JMP) {
s32 off = (s32)entry->target - (s32)entry->code; s32 off = (s32)entry->target - (s32)entry->code;
bool use_v9_branch = false;
BUG_ON(off & 3);
#ifdef CONFIG_SPARC64 #ifdef CONFIG_SPARC64
/* ba,pt %xcc, . + (off << 2) */ if (off <= 0xfffff && off >= -0x100000)
val = 0x10680000 | ((u32) off >> 2); use_v9_branch = true;
#else
/* ba . + (off << 2) */
val = 0x10800000 | ((u32) off >> 2);
#endif #endif
if (use_v9_branch) {
/* WDISP19 - target is . + immed << 2 */
/* ba,pt %xcc, . + off */
val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
} else {
/* WDISP22 - target is . + immed << 2 */
BUG_ON(off > 0x7fffff);
BUG_ON(off < -0x800000);
/* ba . + off */
val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
}
} else { } else {
val = 0x01000000; val = 0x01000000;
} }
......
...@@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node, ...@@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node,
cpu_data(*id).core_id = core_id; cpu_data(*id).core_id = core_id;
} }
static void __mark_sock_id(struct mdesc_handle *hp, u64 node, static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
int sock_id) int max_cache_id)
{ {
const u64 *id = mdesc_get_property(hp, node, "id", NULL); const u64 *id = mdesc_get_property(hp, node, "id", NULL);
if (*id < num_possible_cpus()) if (*id < num_possible_cpus()) {
cpu_data(*id).sock_id = sock_id; cpu_data(*id).max_cache_id = max_cache_id;
/**
* On systems without explicit socket descriptions socket
* is max_cache_id
*/
cpu_data(*id).sock_id = max_cache_id;
}
} }
static void mark_core_ids(struct mdesc_handle *hp, u64 mp, static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
...@@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp, ...@@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
} }
static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
int sock_id) int max_cache_id)
{ {
find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
max_cache_id, 10);
} }
static void set_core_ids(struct mdesc_handle *hp) static void set_core_ids(struct mdesc_handle *hp)
...@@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp) ...@@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp)
} }
} }
static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
{ {
u64 mp; u64 mp;
int idx = 1; int idx = 1;
int fnd = 0; int fnd = 0;
/* Identify unique sockets by looking for cpus backpointed to by /**
* shared level n caches. * Identify unique highest level of shared cache by looking for cpus
* backpointed to by shared level N caches.
*/ */
mdesc_for_each_node_by_name(hp, mp, "cache") { mdesc_for_each_node_by_name(hp, mp, "cache") {
const u64 *cur_lvl; const u64 *cur_lvl;
...@@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) ...@@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
cur_lvl = mdesc_get_property(hp, mp, "level", NULL); cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
if (*cur_lvl != level) if (*cur_lvl != level)
continue; continue;
mark_max_cache_ids(hp, mp, idx);
mark_sock_ids(hp, mp, idx);
idx++; idx++;
fnd = 1; fnd = 1;
} }
...@@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp) ...@@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp)
{ {
u64 mp; u64 mp;
/* If machine description exposes sockets data use it. /**
* Otherwise fallback to use shared L3 or L2 caches. * Find the highest level of shared cache which pre-T7 is also
* the socket.
*/ */
if (!set_max_cache_ids_by_cache(hp, 3))
set_max_cache_ids_by_cache(hp, 2);
/* If machine description exposes sockets data use it.*/
mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
if (mp != MDESC_NODE_NULL) if (mp != MDESC_NODE_NULL)
return set_sock_ids_by_socket(hp, mp); set_sock_ids_by_socket(hp, mp);
if (!set_sock_ids_by_cache(hp, 3))
set_sock_ids_by_cache(hp, 2);
} }
static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
......
...@@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly = ...@@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
[0 ... NR_CPUS-1] = CPU_MASK_NONE }; [0 ... NR_CPUS-1] = CPU_MASK_NONE };
cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
[0 ... NR_CPUS - 1] = CPU_MASK_NONE };
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map); EXPORT_SYMBOL(cpu_core_map);
EXPORT_SYMBOL(cpu_core_sib_map); EXPORT_SYMBOL(cpu_core_sib_map);
EXPORT_SYMBOL(cpu_core_sib_cache_map);
static cpumask_t smp_commenced_mask; static cpumask_t smp_commenced_mask;
...@@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void) ...@@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void)
unsigned int j; unsigned int j;
for_each_present_cpu(j) { for_each_present_cpu(j) {
if (cpu_data(i).max_cache_id ==
cpu_data(j).max_cache_id)
cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
if (cpu_data(i).sock_id == cpu_data(j).sock_id) if (cpu_data(i).sock_id == cpu_data(j).sock_id)
cpumask_set_cpu(j, &cpu_core_sib_map[i]); cpumask_set_cpu(j, &cpu_core_sib_map[i]);
} }
......
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_LD(x) \ #define EX_LD(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -4,21 +4,18 @@ ...@@ -4,21 +4,18 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#define GLOBAL_SPARE %g7 #define GLOBAL_SPARE %g7
#else #else
#define GLOBAL_SPARE %g5 #define GLOBAL_SPARE %g5
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif #endif
#ifndef LOAD #ifndef LOAD
...@@ -45,6 +42,29 @@ ...@@ -45,6 +42,29 @@
.register %g3,#scratch .register %g3,#scratch
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
ENTRY(GEN_retl_o4_1)
add %o4, %o2, %o4
retl
add %o4, 1, %o0
ENDPROC(GEN_retl_o4_1)
ENTRY(GEN_retl_g1_8)
add %g1, %o2, %g1
retl
add %g1, 8, %o0
ENDPROC(GEN_retl_g1_8)
ENTRY(GEN_retl_o2_4)
retl
add %o2, 4, %o0
ENDPROC(GEN_retl_o2_4)
ENTRY(GEN_retl_o2_1)
retl
add %o2, 1, %o0
ENDPROC(GEN_retl_o2_1)
#endif
.align 64 .align 64
.globl FUNC_NAME .globl FUNC_NAME
...@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %o4, %o4 sub %g0, %o4, %o4
sub %o2, %o4, %o2 sub %o2, %o4, %o2
1: subcc %o4, 1, %o4 1: subcc %o4, 1, %o4
EX_LD(LOAD(ldub, %o1, %g1)) EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
EX_ST(STORE(stb, %g1, %o0)) EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
add %o1, 1, %o1 add %o1, 1, %o1
bne,pt %XCC, 1b bne,pt %XCC, 1b
add %o0, 1, %o0 add %o0, 1, %o0
...@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x7, %g1 andn %o2, 0x7, %g1
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: subcc %g1, 0x8, %g1 1: subcc %g1, 0x8, %g1
EX_LD(LOAD(ldx, %o1, %g2)) EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
EX_ST(STORE(stx, %g2, %o0)) EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
bne,pt %XCC, 1b bne,pt %XCC, 1b
add %o0, 0x8, %o0 add %o0, 0x8, %o0
...@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1: 1:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EX_LD(LOAD(lduw, %o1, %g1)) EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
EX_ST(STORE(stw, %g1, %o1 + %o3)) EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
...@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 32 .align 32
90: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_LD(LOAD(ldub, %o1, %g1)) EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
EX_ST(STORE(stb, %g1, %o1 + %o3)) EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
......
...@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o ...@@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC64) += iomap.o
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_LD(x) \ #define EX_LD(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_LD_FP(x) \ #define EX_LD_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi_fp;\ .word 98b, y##_fp; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_ST_FP(x) \ #define EX_ST_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi_fp;\ .word 98b, y##_fp; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#include <asm/visasm.h> #include <asm/visasm.h>
#include <asm/asi.h> #include <asm/asi.h>
#define GLOBAL_SPARE %g7 #define GLOBAL_SPARE %g7
...@@ -32,21 +33,17 @@ ...@@ -32,21 +33,17 @@
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_LD_FP #ifndef EX_LD_FP
#define EX_LD_FP(x) x #define EX_LD_FP(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif #endif
#ifndef EX_ST_FP #ifndef EX_ST_FP
#define EX_ST_FP(x) x #define EX_ST_FP(x,y) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif #endif
#ifndef LOAD #ifndef LOAD
...@@ -140,45 +137,110 @@ ...@@ -140,45 +137,110 @@
fsrc2 %x6, %f12; \ fsrc2 %x6, %f12; \
fsrc2 %x7, %f14; fsrc2 %x7, %f14;
#define FREG_LOAD_1(base, x0) \ #define FREG_LOAD_1(base, x0) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
#define FREG_LOAD_2(base, x0, x1) \ #define FREG_LOAD_2(base, x0, x1) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
#define FREG_LOAD_3(base, x0, x1, x2) \ #define FREG_LOAD_3(base, x0, x1, x2) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
#define FREG_LOAD_4(base, x0, x1, x2, x3) \ #define FREG_LOAD_4(base, x0, x1, x2, x3) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
.register %g2,#scratch .register %g2,#scratch
.register %g3,#scratch .register %g3,#scratch
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
__restore_fp:
VISExitHalf
__restore_asi:
retl
wr %g0, ASI_AIUS, %asi
ENTRY(NG2_retl_o2)
ba,pt %xcc, __restore_asi
mov %o2, %o0
ENDPROC(NG2_retl_o2)
ENTRY(NG2_retl_o2_plus_1)
ba,pt %xcc, __restore_asi
add %o2, 1, %o0
ENDPROC(NG2_retl_o2_plus_1)
ENTRY(NG2_retl_o2_plus_4)
ba,pt %xcc, __restore_asi
add %o2, 4, %o0
ENDPROC(NG2_retl_o2_plus_4)
ENTRY(NG2_retl_o2_plus_8)
ba,pt %xcc, __restore_asi
add %o2, 8, %o0
ENDPROC(NG2_retl_o2_plus_8)
ENTRY(NG2_retl_o2_plus_o4_plus_1)
add %o4, 1, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG2_retl_o2_plus_o4_plus_1)
ENTRY(NG2_retl_o2_plus_o4_plus_8)
add %o4, 8, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG2_retl_o2_plus_o4_plus_8)
ENTRY(NG2_retl_o2_plus_o4_plus_16)
add %o4, 16, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG2_retl_o2_plus_o4_plus_16)
ENTRY(NG2_retl_o2_plus_g1_fp)
ba,pt %xcc, __restore_fp
add %o2, %g1, %o0
ENDPROC(NG2_retl_o2_plus_g1_fp)
ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
add %g1, 64, %g1
ba,pt %xcc, __restore_fp
add %o2, %g1, %o0
ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
ENTRY(NG2_retl_o2_plus_g1_plus_1)
add %g1, 1, %g1
ba,pt %xcc, __restore_asi
add %o2, %g1, %o0
ENDPROC(NG2_retl_o2_plus_g1_plus_1)
ENTRY(NG2_retl_o2_and_7_plus_o4)
and %o2, 7, %o2
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG2_retl_o2_and_7_plus_o4)
ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
and %o2, 7, %o2
add %o4, 8, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
#endif
.align 64 .align 64
.globl FUNC_NAME .globl FUNC_NAME
...@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %o4, %o4 ! bytes to align dst sub %g0, %o4, %o4 ! bytes to align dst
sub %o2, %o4, %o2 sub %o2, %o4, %o2
1: subcc %o4, 1, %o4 1: subcc %o4, 1, %o4
EX_LD(LOAD(ldub, %o1, %g1)) EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
EX_ST(STORE(stb, %g1, %o0)) EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
add %o1, 1, %o1 add %o1, 1, %o1
bne,pt %XCC, 1b bne,pt %XCC, 1b
add %o0, 1, %o0 add %o0, 1, %o0
...@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop nop
/* fall through for 0 < low bits < 8 */ /* fall through for 0 < low bits < 8 */
110: sub %o4, 64, %g2 110: sub %o4, 64, %g2
EX_LD_FP(LOAD_BLK(%g2, %f0)) EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
120: sub %o4, 56, %g2 120: sub %o4, 56, %g2
FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
130: sub %o4, 48, %g2 130: sub %o4, 48, %g2
FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_6(f20, f22, f24, f26, f28, f30) FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
140: sub %o4, 40, %g2 140: sub %o4, 40, %g2
FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_5(f22, f24, f26, f28, f30) FREG_MOVE_5(f22, f24, f26, f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
150: sub %o4, 32, %g2 150: sub %o4, 32, %g2
FREG_LOAD_4(%g2, f0, f2, f4, f6) FREG_LOAD_4(%g2, f0, f2, f4, f6)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_4(f24, f26, f28, f30) FREG_MOVE_4(f24, f26, f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
160: sub %o4, 24, %g2 160: sub %o4, 24, %g2
FREG_LOAD_3(%g2, f0, f2, f4) FREG_LOAD_3(%g2, f0, f2, f4)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_3(f26, f28, f30) FREG_MOVE_3(f26, f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
170: sub %o4, 16, %g2 170: sub %o4, 16, %g2
FREG_LOAD_2(%g2, f0, f2) FREG_LOAD_2(%g2, f0, f2)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_2(f28, f30) FREG_MOVE_2(f28, f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
180: sub %o4, 8, %g2 180: sub %o4, 8, %g2
FREG_LOAD_1(%g2, f0) FREG_LOAD_1(%g2, f0)
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
EX_LD_FP(LOAD_BLK(%o4, %f16)) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
FREG_MOVE_1(f30) FREG_MOVE_1(f30)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
add %o4, 64, %o4 add %o4, 64, %o4
...@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop nop
190: 190:
1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
EX_LD_FP(LOAD_BLK(%o4, %f0)) EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
add %o4, 64, %o4 add %o4, 64, %o4
bne,pt %xcc, 1b bne,pt %xcc, 1b
LOAD(prefetch, %o4 + 64, #one_read) LOAD(prefetch, %o4 + 64, #one_read)
...@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0xf, %o4 andn %o2, 0xf, %o4
and %o2, 0xf, %o2 and %o2, 0xf, %o2
1: subcc %o4, 0x10, %o4 1: subcc %o4, 0x10, %o4
EX_LD(LOAD(ldx, %o1, %o5)) EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
add %o1, 0x08, %o1 add %o1, 0x08, %o1
EX_LD(LOAD(ldx, %o1, %g1)) EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
sub %o1, 0x08, %o1 sub %o1, 0x08, %o1
EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x8, %o1 add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0 73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %o2, 0x8, %o2 sub %o2, 0x8, %o2
EX_LD(LOAD(ldx, %o1, %o5)) EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0 1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %o2, 0x4, %o2 sub %o2, 0x4, %o2
EX_LD(LOAD(lduw, %o1, %o5)) EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, 85f be,pt %XCC, 85f
...@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: subcc %g1, 1, %g1 1: subcc %g1, 1, %g1
EX_LD(LOAD(ldub, %o1, %o5)) EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
bgu,pt %icc, 1b bgu,pt %icc, 1b
add %o1, 1, %o1 add %o1, 1, %o1
...@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, GLOBAL_SPARE 8: mov 64, GLOBAL_SPARE
andn %o1, 0x7, %o1 andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2)) EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
sub GLOBAL_SPARE, %g1, GLOBAL_SPARE sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
andn %o2, 0x7, %o4 andn %o2, 0x7, %o4
sllx %g2, %g1, %g2 sllx %g2, %g1, %g2
1: add %o1, 0x8, %o1 1: add %o1, 0x8, %o1
EX_LD(LOAD(ldx, %o1, %g3)) EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
subcc %o4, 0x8, %o4 subcc %o4, 0x8, %o4
srlx %g3, GLOBAL_SPARE, %o5 srlx %g3, GLOBAL_SPARE, %o5
or %o5, %g2, %o5 or %o5, %g2, %o5
EX_ST(STORE(stx, %o5, %o0)) EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
add %o0, 0x8, %o0 add %o0, 0x8, %o0
bgu,pt %icc, 1b bgu,pt %icc, 1b
sllx %g3, %g1, %g2 sllx %g3, %g1, %g2
...@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1: 1:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EX_LD(LOAD(lduw, %o1, %g1)) EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
...@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 32 .align 32
90: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_LD(LOAD(ldub, %o1, %g1)) EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 2012 David S. Miller (davem@davemloft.net) * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/ */
#define EX_LD(x) \ #define EX_LD(x, y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_LD_FP(x) \ #define EX_LD_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi_fp;\ .word 98b, y##_fp; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 2012 David S. Miller (davem@davemloft.net) * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_ST_FP(x) \ #define EX_ST_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_asi_fp;\ .word 98b, y##_fp; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#include <asm/visasm.h> #include <asm/visasm.h>
#include <asm/asi.h> #include <asm/asi.h>
#define GLOBAL_SPARE %g7 #define GLOBAL_SPARE %g7
...@@ -46,22 +47,19 @@ ...@@ -46,22 +47,19 @@
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_LD_FP #ifndef EX_LD_FP
#define EX_LD_FP(x) x #define EX_LD_FP(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif #endif
#ifndef EX_ST_FP #ifndef EX_ST_FP
#define EX_ST_FP(x) x #define EX_ST_FP(x,y) x
#endif #endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif
#ifndef LOAD #ifndef LOAD
#define LOAD(type,addr,dest) type [addr], dest #define LOAD(type,addr,dest) type [addr], dest
...@@ -94,6 +92,158 @@ ...@@ -94,6 +92,158 @@
.register %g3,#scratch .register %g3,#scratch
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
__restore_asi_fp:
VISExitHalf
__restore_asi:
retl
wr %g0, ASI_AIUS, %asi
ENTRY(NG4_retl_o2)
ba,pt %xcc, __restore_asi
mov %o2, %o0
ENDPROC(NG4_retl_o2)
ENTRY(NG4_retl_o2_plus_1)
ba,pt %xcc, __restore_asi
add %o2, 1, %o0
ENDPROC(NG4_retl_o2_plus_1)
ENTRY(NG4_retl_o2_plus_4)
ba,pt %xcc, __restore_asi
add %o2, 4, %o0
ENDPROC(NG4_retl_o2_plus_4)
ENTRY(NG4_retl_o2_plus_o5)
ba,pt %xcc, __restore_asi
add %o2, %o5, %o0
ENDPROC(NG4_retl_o2_plus_o5)
ENTRY(NG4_retl_o2_plus_o5_plus_4)
add %o5, 4, %o5
ba,pt %xcc, __restore_asi
add %o2, %o5, %o0
ENDPROC(NG4_retl_o2_plus_o5_plus_4)
ENTRY(NG4_retl_o2_plus_o5_plus_8)
add %o5, 8, %o5
ba,pt %xcc, __restore_asi
add %o2, %o5, %o0
ENDPROC(NG4_retl_o2_plus_o5_plus_8)
ENTRY(NG4_retl_o2_plus_o5_plus_16)
add %o5, 16, %o5
ba,pt %xcc, __restore_asi
add %o2, %o5, %o0
ENDPROC(NG4_retl_o2_plus_o5_plus_16)
ENTRY(NG4_retl_o2_plus_o5_plus_24)
add %o5, 24, %o5
ba,pt %xcc, __restore_asi
add %o2, %o5, %o0
ENDPROC(NG4_retl_o2_plus_o5_plus_24)
ENTRY(NG4_retl_o2_plus_o5_plus_32)
add %o5, 32, %o5
ba,pt %xcc, __restore_asi
add %o2, %o5, %o0
ENDPROC(NG4_retl_o2_plus_o5_plus_32)
ENTRY(NG4_retl_o2_plus_g1)
ba,pt %xcc, __restore_asi
add %o2, %g1, %o0
ENDPROC(NG4_retl_o2_plus_g1)
ENTRY(NG4_retl_o2_plus_g1_plus_1)
add %g1, 1, %g1
ba,pt %xcc, __restore_asi
add %o2, %g1, %o0
ENDPROC(NG4_retl_o2_plus_g1_plus_1)
ENTRY(NG4_retl_o2_plus_g1_plus_8)
add %g1, 8, %g1
ba,pt %xcc, __restore_asi
add %o2, %g1, %o0
ENDPROC(NG4_retl_o2_plus_g1_plus_8)
ENTRY(NG4_retl_o2_plus_o4)
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4)
ENTRY(NG4_retl_o2_plus_o4_plus_8)
add %o4, 8, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_8)
ENTRY(NG4_retl_o2_plus_o4_plus_16)
add %o4, 16, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_16)
ENTRY(NG4_retl_o2_plus_o4_plus_24)
add %o4, 24, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_24)
ENTRY(NG4_retl_o2_plus_o4_plus_32)
add %o4, 32, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_32)
ENTRY(NG4_retl_o2_plus_o4_plus_40)
add %o4, 40, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_40)
ENTRY(NG4_retl_o2_plus_o4_plus_48)
add %o4, 48, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_48)
ENTRY(NG4_retl_o2_plus_o4_plus_56)
add %o4, 56, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_56)
ENTRY(NG4_retl_o2_plus_o4_plus_64)
add %o4, 64, %o4
ba,pt %xcc, __restore_asi
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_64)
ENTRY(NG4_retl_o2_plus_o4_fp)
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
add %o4, 8, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
add %o4, 16, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
add %o4, 24, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
add %o4, 32, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
add %o4, 40, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
add %o4, 48, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
add %o4, 56, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
add %o4, 64, %o4
ba,pt %xcc, __restore_asi_fp
add %o2, %o4, %o0
ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
#endif
.align 64 .align 64
.globl FUNC_NAME .globl FUNC_NAME
...@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, 51f brz,pt %g1, 51f
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 1, %o1 add %o1, 1, %o1
subcc %g1, 1, %g1 subcc %g1, 1, %g1
add %o0, 1, %o0 add %o0, 1, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stb, %g2, %o0 - 0x01)) EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
LOAD(prefetch, %o1 + 0x080, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
...@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, .Llarge_aligned brz,pt %g1, .Llarge_aligned
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 8, %o1 add %o1, 8, %o1
subcc %g1, 8, %g1 subcc %g1, 8, %g1
add %o0, 8, %o0 add %o0, 8, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stx, %g2, %o0 - 0x08)) EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
.Llarge_aligned: .Llarge_aligned:
/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
andn %o2, 0x3f, %o4 andn %o2, 0x3f, %o4
sub %o2, %o4, %o2 sub %o2, %o4, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
subcc %o4, 0x40, %o4 subcc %o4, 0x40, %o4
EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
EX_ST(STORE_INIT(%g1, %o0)) EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_ST(STORE_INIT(%g2, %o0)) EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
EX_ST(STORE_INIT(%g3, %o0)) EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
EX_ST(STORE_INIT(%o5, %o0)) EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_ST(STORE_INIT(%g2, %o0)) EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_ST(STORE_INIT(%g3, %o0)) EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
LOAD(prefetch, %o1 + 0x200, #n_reads_strong) LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
...@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %o4, %o2 sub %o2, %o4, %o2
alignaddr %o1, %g0, %g1 alignaddr %o1, %g0, %g1
add %o1, %o4, %o1 add %o1, %o4, %o1
EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
subcc %o4, 0x40, %o4 subcc %o4, 0x40, %o4
EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
faligndata %f0, %f2, %f16 faligndata %f0, %f2, %f16
EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
faligndata %f2, %f4, %f18 faligndata %f2, %f4, %f18
add %g1, 0x40, %g1 add %g1, 0x40, %g1
faligndata %f4, %f6, %f20 faligndata %f4, %f6, %f20
...@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
faligndata %f10, %f12, %f26 faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30 faligndata %f14, %f0, %f30
EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
add %o0, 0x40, %o0 add %o0, 0x40, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
LOAD(prefetch, %g1 + 0x200, #n_reads_strong) LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
...@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andncc %o2, 0x20 - 1, %o5 andncc %o2, 0x20 - 1, %o5
be,pn %icc, 2f be,pn %icc, 2f
sub %o2, %o5, %o2 sub %o2, %o5, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
add %o1, 0x20, %o1 add %o1, 0x20, %o1
subcc %o5, 0x20, %o5 subcc %o5, 0x20, %o5
EX_ST(STORE(stx, %g1, %o0 + 0x00)) EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
EX_ST(STORE(stx, %g2, %o0 + 0x08)) EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
EX_ST(STORE(stx, %o4, %o0 + 0x18)) EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
bne,pt %icc, 1b bne,pt %icc, 1b
add %o0, 0x20, %o0 add %o0, 0x20, %o0
2: andcc %o2, 0x18, %o5 2: andcc %o2, 0x18, %o5
be,pt %icc, 3f be,pt %icc, 3f
sub %o2, %o5, %o2 sub %o2, %o5, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
add %o1, 0x08, %o1 add %o1, 0x08, %o1
add %o0, 0x08, %o0 add %o0, 0x08, %o0
subcc %o5, 0x08, %o5 subcc %o5, 0x08, %o5
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stx, %g1, %o0 - 0x08)) EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
3: brz,pt %o2, .Lexit 3: brz,pt %o2, .Lexit
cmp %o2, 0x04 cmp %o2, 0x04
bl,pn %icc, .Ltiny bl,pn %icc, .Ltiny
nop nop
EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
add %o1, 0x04, %o1 add %o1, 0x04, %o1
add %o0, 0x04, %o0 add %o0, 0x04, %o0
subcc %o2, 0x04, %o2 subcc %o2, 0x04, %o2
bne,pn %icc, .Ltiny bne,pn %icc, .Ltiny
EX_ST(STORE(stw, %g1, %o0 - 0x04)) EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
ba,a,pt %icc, .Lexit ba,a,pt %icc, .Lexit
.Lmedium_unaligned: .Lmedium_unaligned:
/* First get dest 8 byte aligned. */ /* First get dest 8 byte aligned. */
...@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, 2f brz,pt %g1, 2f
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 1, %o1 add %o1, 1, %o1
subcc %g1, 1, %g1 subcc %g1, 1, %g1
add %o0, 1, %o0 add %o0, 1, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stb, %g2, %o0 - 0x01)) EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
2: 2:
and %o1, 0x7, %g1 and %o1, 0x7, %g1
brz,pn %g1, .Lmedium_noprefetch brz,pn %g1, .Lmedium_noprefetch
...@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov 64, %g2 mov 64, %g2
sub %g2, %g1, %g2 sub %g2, %g1, %g2
andn %o1, 0x7, %o1 andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
sllx %o4, %g1, %o4 sllx %o4, %g1, %o4
andn %o2, 0x08 - 1, %o5 andn %o2, 0x08 - 1, %o5
sub %o2, %o5, %o2 sub %o2, %o5, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
add %o1, 0x08, %o1 add %o1, 0x08, %o1
subcc %o5, 0x08, %o5 subcc %o5, 0x08, %o5
srlx %g3, %g2, GLOBAL_SPARE srlx %g3, %g2, GLOBAL_SPARE
or GLOBAL_SPARE, %o4, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE
EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
add %o0, 0x08, %o0 add %o0, 0x08, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
sllx %g3, %g1, %o4 sllx %g3, %g1, %o4
...@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %icc, .Lsmall_unaligned ba,pt %icc, .Lsmall_unaligned
.Ltiny: .Ltiny:
EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
subcc %o2, 1, %o2 subcc %o2, 1, %o2
be,pn %icc, .Lexit be,pn %icc, .Lexit
EX_ST(STORE(stb, %g1, %o0 + 0x00)) EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
subcc %o2, 1, %o2 subcc %o2, 1, %o2
be,pn %icc, .Lexit be,pn %icc, .Lexit
EX_ST(STORE(stb, %g1, %o0 + 0x01)) EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
ba,pt %icc, .Lexit ba,pt %icc, .Lexit
EX_ST(STORE(stb, %g1, %o0 + 0x02)) EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
.Lsmall: .Lsmall:
andcc %g2, 0x3, %g0 andcc %g2, 0x3, %g0
...@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x4 - 1, %o5 andn %o2, 0x4 - 1, %o5
sub %o2, %o5, %o2 sub %o2, %o5, %o2
1: 1:
EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
add %o1, 0x04, %o1 add %o1, 0x04, %o1
subcc %o5, 0x04, %o5 subcc %o5, 0x04, %o5
add %o0, 0x04, %o0 add %o0, 0x04, %o0
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stw, %g1, %o0 - 0x04)) EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
brz,pt %o2, .Lexit brz,pt %o2, .Lexit
nop nop
ba,a,pt %icc, .Ltiny ba,a,pt %icc, .Ltiny
.Lsmall_unaligned: .Lsmall_unaligned:
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
add %o1, 1, %o1 add %o1, 1, %o1
add %o0, 1, %o0 add %o0, 1, %o0
subcc %o2, 1, %o2 subcc %o2, 1, %o2
bne,pt %icc, 1b bne,pt %icc, 1b
EX_ST(STORE(stb, %g1, %o0 - 0x01)) EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
ba,a,pt %icc, .Lexit ba,a,pt %icc, .Lexit
.size FUNC_NAME, .-FUNC_NAME .size FUNC_NAME, .-FUNC_NAME
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
* Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_LD(x) \ #define EX_LD(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __ret_one_asi;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
* Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __ret_one_asi;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#include <asm/asi.h> #include <asm/asi.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#define GLOBAL_SPARE %g7 #define GLOBAL_SPARE %g7
...@@ -27,15 +28,11 @@ ...@@ -27,15 +28,11 @@
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif #endif
#ifndef LOAD #ifndef LOAD
...@@ -79,6 +76,92 @@ ...@@ -79,6 +76,92 @@
.register %g3,#scratch .register %g3,#scratch
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
__restore_asi:
ret
wr %g0, ASI_AIUS, %asi
restore
ENTRY(NG_ret_i2_plus_i4_plus_1)
ba,pt %xcc, __restore_asi
add %i2, %i5, %i0
ENDPROC(NG_ret_i2_plus_i4_plus_1)
ENTRY(NG_ret_i2_plus_g1)
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1)
ENTRY(NG_ret_i2_plus_g1_minus_8)
sub %g1, 8, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_8)
ENTRY(NG_ret_i2_plus_g1_minus_16)
sub %g1, 16, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_16)
ENTRY(NG_ret_i2_plus_g1_minus_24)
sub %g1, 24, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_24)
ENTRY(NG_ret_i2_plus_g1_minus_32)
sub %g1, 32, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_32)
ENTRY(NG_ret_i2_plus_g1_minus_40)
sub %g1, 40, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_40)
ENTRY(NG_ret_i2_plus_g1_minus_48)
sub %g1, 48, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_48)
ENTRY(NG_ret_i2_plus_g1_minus_56)
sub %g1, 56, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_minus_56)
ENTRY(NG_ret_i2_plus_i4)
ba,pt %xcc, __restore_asi
add %i2, %i4, %i0
ENDPROC(NG_ret_i2_plus_i4)
ENTRY(NG_ret_i2_plus_i4_minus_8)
sub %i4, 8, %i4
ba,pt %xcc, __restore_asi
add %i2, %i4, %i0
ENDPROC(NG_ret_i2_plus_i4_minus_8)
ENTRY(NG_ret_i2_plus_8)
ba,pt %xcc, __restore_asi
add %i2, 8, %i0
ENDPROC(NG_ret_i2_plus_8)
ENTRY(NG_ret_i2_plus_4)
ba,pt %xcc, __restore_asi
add %i2, 4, %i0
ENDPROC(NG_ret_i2_plus_4)
ENTRY(NG_ret_i2_plus_1)
ba,pt %xcc, __restore_asi
add %i2, 1, %i0
ENDPROC(NG_ret_i2_plus_1)
ENTRY(NG_ret_i2_plus_g1_plus_1)
add %g1, 1, %g1
ba,pt %xcc, __restore_asi
add %i2, %g1, %i0
ENDPROC(NG_ret_i2_plus_g1_plus_1)
ENTRY(NG_ret_i2)
ba,pt %xcc, __restore_asi
mov %i2, %i0
ENDPROC(NG_ret_i2)
ENTRY(NG_ret_i2_and_7_plus_i4)
and %i2, 7, %i2
ba,pt %xcc, __restore_asi
add %i2, %i4, %i0
ENDPROC(NG_ret_i2_and_7_plus_i4)
#endif
.align 64 .align 64
.globl FUNC_NAME .globl FUNC_NAME
...@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
sub %g0, %i4, %i4 ! bytes to align dst sub %g0, %i4, %i4 ! bytes to align dst
sub %i2, %i4, %i2 sub %i2, %i4, %i2
1: subcc %i4, 1, %i4 1: subcc %i4, 1, %i4
EX_LD(LOAD(ldub, %i1, %g1)) EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
EX_ST(STORE(stb, %g1, %o0)) EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
add %i1, 1, %i1 add %i1, 1, %i1
bne,pt %XCC, 1b bne,pt %XCC, 1b
add %o0, 1, %o0 add %o0, 1, %o0
...@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
and %i4, 0x7, GLOBAL_SPARE and %i4, 0x7, GLOBAL_SPARE
sll GLOBAL_SPARE, 3, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE
mov 64, %i5 mov 64, %i5
EX_LD(LOAD_TWIN(%i1, %g2, %g3)) EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
sub %i5, GLOBAL_SPARE, %i5 sub %i5, GLOBAL_SPARE, %i5
mov 16, %o4 mov 16, %o4
mov 32, %o5 mov 32, %o5
...@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
srlx WORD3, PRE_SHIFT, TMP; \ srlx WORD3, PRE_SHIFT, TMP; \
or WORD2, TMP, WORD2; or WORD2, TMP, WORD2;
8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) 8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
LOAD(prefetch, %i1 + %i3, #one_read) LOAD(prefetch, %i1 + %i3, #one_read)
EX_ST(STORE_INIT(%g2, %o0 + 0x00)) EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
EX_ST(STORE_INIT(%g3, %o0 + 0x08)) EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
EX_ST(STORE_INIT(%o2, %o0 + 0x10)) EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
EX_ST(STORE_INIT(%o3, %o0 + 0x18)) EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
EX_ST(STORE_INIT(%g2, %o0 + 0x20)) EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
EX_ST(STORE_INIT(%g3, %o0 + 0x28)) EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
add %i1, 64, %i1 add %i1, 64, %i1
MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
EX_ST(STORE_INIT(%o2, %o0 + 0x30)) EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
EX_ST(STORE_INIT(%o3, %o0 + 0x38)) EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
bne,pt %XCC, 8b bne,pt %XCC, 8b
...@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
ba,pt %XCC, 60f ba,pt %XCC, 60f
add %i1, %i4, %i1 add %i1, %i4, %i1
9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) 9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
LOAD(prefetch, %i1 + %i3, #one_read) LOAD(prefetch, %i1 + %i3, #one_read)
EX_ST(STORE_INIT(%g3, %o0 + 0x00)) EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
EX_ST(STORE_INIT(%o2, %o0 + 0x08)) EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
EX_ST(STORE_INIT(%o3, %o0 + 0x10)) EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
EX_ST(STORE_INIT(%g2, %o0 + 0x18)) EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
EX_ST(STORE_INIT(%g3, %o0 + 0x20)) EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
EX_ST(STORE_INIT(%o2, %o0 + 0x28)) EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
add %i1, 64, %i1 add %i1, 64, %i1
MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
EX_ST(STORE_INIT(%o3, %o0 + 0x30)) EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
EX_ST(STORE_INIT(%g2, %o0 + 0x38)) EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
bne,pt %XCC, 9b bne,pt %XCC, 9b
...@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
* one twin load ahead, then add 8 back into source when * one twin load ahead, then add 8 back into source when
* we finish the loop. * we finish the loop.
*/ */
EX_LD(LOAD_TWIN(%i1, %o4, %o5)) EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
mov 16, %o7 mov 16, %o7
mov 32, %g2 mov 32, %g2
mov 48, %g3 mov 48, %g3
mov 64, %o1 mov 64, %o1
1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) 1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
LOAD(prefetch, %i1 + %o1, #one_read) LOAD(prefetch, %i1 + %o1, #one_read)
EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
EX_ST(STORE_INIT(%o2, %o0 + 0x08)) EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
EX_ST(STORE_INIT(%o3, %o0 + 0x10)) EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
EX_ST(STORE_INIT(%o4, %o0 + 0x18)) EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
EX_ST(STORE_INIT(%o5, %o0 + 0x20)) EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
EX_ST(STORE_INIT(%o2, %o0 + 0x28)) EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
add %i1, 64, %i1 add %i1, 64, %i1
EX_ST(STORE_INIT(%o3, %o0 + 0x30)) EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
EX_ST(STORE_INIT(%o4, %o0 + 0x38)) EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
bne,pt %XCC, 1b bne,pt %XCC, 1b
add %o0, 64, %o0 add %o0, 64, %o0
...@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
mov 32, %g2 mov 32, %g2
mov 48, %g3 mov 48, %g3
mov 64, %o1 mov 64, %o1
1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) 1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
LOAD(prefetch, %i1 + %o1, #one_read) LOAD(prefetch, %i1 + %o1, #one_read)
EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
EX_ST(STORE_INIT(%o5, %o0 + 0x08)) EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
EX_ST(STORE_INIT(%o2, %o0 + 0x10)) EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
EX_ST(STORE_INIT(%o3, %o0 + 0x18)) EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
add %i1, 64, %i1 add %i1, 64, %i1
EX_ST(STORE_INIT(%o4, %o0 + 0x20)) EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
EX_ST(STORE_INIT(%o5, %o0 + 0x28)) EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
EX_ST(STORE_INIT(%o2, %o0 + 0x30)) EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
EX_ST(STORE_INIT(%o3, %o0 + 0x38)) EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
subcc %g1, 64, %g1 subcc %g1, 64, %g1
bne,pt %XCC, 1b bne,pt %XCC, 1b
add %o0, 64, %o0 add %o0, 64, %o0
...@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
andn %i2, 0xf, %i4 andn %i2, 0xf, %i4
and %i2, 0xf, %i2 and %i2, 0xf, %i2
1: subcc %i4, 0x10, %i4 1: subcc %i4, 0x10, %i4
EX_LD(LOAD(ldx, %i1, %o4)) EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
add %i1, 0x08, %i1 add %i1, 0x08, %i1
EX_LD(LOAD(ldx, %i1, %g1)) EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
sub %i1, 0x08, %i1 sub %i1, 0x08, %i1
EX_ST(STORE(stx, %o4, %i1 + %i3)) EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
add %i1, 0x8, %i1 add %i1, 0x8, %i1
EX_ST(STORE(stx, %g1, %i1 + %i3)) EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %i1, 0x8, %i1 add %i1, 0x8, %i1
73: andcc %i2, 0x8, %g0 73: andcc %i2, 0x8, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %i2, 0x8, %i2 sub %i2, 0x8, %i2
EX_LD(LOAD(ldx, %i1, %o4)) EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
EX_ST(STORE(stx, %o4, %i1 + %i3)) EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
add %i1, 0x8, %i1 add %i1, 0x8, %i1
1: andcc %i2, 0x4, %g0 1: andcc %i2, 0x4, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %i2, 0x4, %i2 sub %i2, 0x4, %i2
EX_LD(LOAD(lduw, %i1, %i5)) EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
EX_ST(STORE(stw, %i5, %i1 + %i3)) EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
add %i1, 0x4, %i1 add %i1, 0x4, %i1
1: cmp %i2, 0 1: cmp %i2, 0
be,pt %XCC, 85f be,pt %XCC, 85f
...@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
sub %i2, %g1, %i2 sub %i2, %g1, %i2
1: subcc %g1, 1, %g1 1: subcc %g1, 1, %g1
EX_LD(LOAD(ldub, %i1, %i5)) EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
EX_ST(STORE(stb, %i5, %i1 + %i3)) EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
bgu,pt %icc, 1b bgu,pt %icc, 1b
add %i1, 1, %i1 add %i1, 1, %i1
...@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
8: mov 64, %i3 8: mov 64, %i3
andn %i1, 0x7, %i1 andn %i1, 0x7, %i1
EX_LD(LOAD(ldx, %i1, %g2)) EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
sub %i3, %g1, %i3 sub %i3, %g1, %i3
andn %i2, 0x7, %i4 andn %i2, 0x7, %i4
sllx %g2, %g1, %g2 sllx %g2, %g1, %g2
1: add %i1, 0x8, %i1 1: add %i1, 0x8, %i1
EX_LD(LOAD(ldx, %i1, %g3)) EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
subcc %i4, 0x8, %i4 subcc %i4, 0x8, %i4
srlx %g3, %i3, %i5 srlx %g3, %i3, %i5
or %i5, %g2, %i5 or %i5, %g2, %i5
EX_ST(STORE(stx, %i5, %o0)) EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
add %o0, 0x8, %o0 add %o0, 0x8, %o0
bgu,pt %icc, 1b bgu,pt %icc, 1b
sllx %g3, %g1, %g2 sllx %g3, %g1, %g2
...@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
1: 1:
subcc %i2, 4, %i2 subcc %i2, 4, %i2
EX_LD(LOAD(lduw, %i1, %g1)) EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
EX_ST(STORE(stw, %g1, %i1 + %i3)) EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %i1, 4, %i1 add %i1, 4, %i1
...@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ...@@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
.align 32 .align 32
90: 90:
subcc %i2, 1, %i2 subcc %i2, 1, %i2
EX_LD(LOAD(ldub, %i1, %g1)) EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
EX_ST(STORE(stb, %g1, %i1 + %i3)) EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %i1, 1, %i1 add %i1, 1, %i1
ret ret
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/ */
#define EX_LD(x) \ #define EX_LD(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_LD_FP(x) \ #define EX_LD_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_fp;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_ST_FP(x) \ #define EX_ST_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_fp;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#include <asm/visasm.h> #include <asm/visasm.h>
#include <asm/asi.h> #include <asm/asi.h>
#include <asm/export.h> #include <asm/export.h>
...@@ -24,21 +25,17 @@ ...@@ -24,21 +25,17 @@
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_LD_FP #ifndef EX_LD_FP
#define EX_LD_FP(x) x #define EX_LD_FP(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif #endif
#ifndef EX_ST_FP #ifndef EX_ST_FP
#define EX_ST_FP(x) x #define EX_ST_FP(x,y) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif #endif
#ifndef LOAD #ifndef LOAD
...@@ -79,53 +76,169 @@ ...@@ -79,53 +76,169 @@
faligndata %f7, %f8, %f60; \ faligndata %f7, %f8, %f60; \
faligndata %f8, %f9, %f62; faligndata %f8, %f9, %f62;
#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
EX_LD_FP(LOAD_BLK(%src, %fdest)); \ EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
add %src, 0x40, %src; \ add %src, 0x40, %src; \
subcc %len, 0x40, %len; \ subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
be,pn %xcc, jmptgt; \ be,pn %xcc, jmptgt; \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
#define LOOP_CHUNK1(src, dest, len, branch_dest) \ #define LOOP_CHUNK1(src, dest, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
#define LOOP_CHUNK2(src, dest, len, branch_dest) \ #define LOOP_CHUNK2(src, dest, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
#define LOOP_CHUNK3(src, dest, len, branch_dest) \ #define LOOP_CHUNK3(src, dest, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
#define DO_SYNC membar #Sync; #define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \ #define STORE_SYNC(dest, fsrc) \
EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
DO_SYNC DO_SYNC
#define STORE_JUMP(dest, fsrc, target) \ #define STORE_JUMP(dest, fsrc, target) \
EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
ba,pt %xcc, target; \ ba,pt %xcc, target; \
nop; nop;
#define FINISH_VISCHUNK(dest, f0, f1, left) \ #define FINISH_VISCHUNK(dest, f0, f1) \
subcc %left, 8, %left;\ subcc %g3, 8, %g3; \
bl,pn %xcc, 95f; \ bl,pn %xcc, 95f; \
faligndata %f0, %f1, %f48; \ faligndata %f0, %f1, %f48; \
EX_ST_FP(STORE(std, %f48, %dest)); \ EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
add %dest, 8, %dest; add %dest, 8, %dest;
#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ #define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
subcc %left, 8, %left; \ subcc %g3, 8, %g3; \
bl,pn %xcc, 95f; \ bl,pn %xcc, 95f; \
fsrc2 %f0, %f1; fsrc2 %f0, %f1;
#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ #define UNEVEN_VISCHUNK(dest, f0, f1) \
UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
ba,a,pt %xcc, 93f; ba,a,pt %xcc, 93f;
.register %g2,#scratch .register %g2,#scratch
.register %g3,#scratch .register %g3,#scratch
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
ENTRY(U1_g1_1_fp)
VISExitHalf
add %g1, 1, %g1
add %g1, %g2, %g1
retl
add %g1, %o2, %o0
ENDPROC(U1_g1_1_fp)
ENTRY(U1_g2_0_fp)
VISExitHalf
retl
add %g2, %o2, %o0
ENDPROC(U1_g2_0_fp)
ENTRY(U1_g2_8_fp)
VISExitHalf
add %g2, 8, %g2
retl
add %g2, %o2, %o0
ENDPROC(U1_g2_8_fp)
ENTRY(U1_gs_0_fp)
VISExitHalf
add %GLOBAL_SPARE, %g3, %o0
retl
add %o0, %o2, %o0
ENDPROC(U1_gs_0_fp)
ENTRY(U1_gs_80_fp)
VISExitHalf
add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
add %GLOBAL_SPARE, %g3, %o0
retl
add %o0, %o2, %o0
ENDPROC(U1_gs_80_fp)
ENTRY(U1_gs_40_fp)
VISExitHalf
add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
add %GLOBAL_SPARE, %g3, %o0
retl
add %o0, %o2, %o0
ENDPROC(U1_gs_40_fp)
ENTRY(U1_g3_0_fp)
VISExitHalf
retl
add %g3, %o2, %o0
ENDPROC(U1_g3_0_fp)
ENTRY(U1_g3_8_fp)
VISExitHalf
add %g3, 8, %g3
retl
add %g3, %o2, %o0
ENDPROC(U1_g3_8_fp)
ENTRY(U1_o2_0_fp)
VISExitHalf
retl
mov %o2, %o0
ENDPROC(U1_o2_0_fp)
ENTRY(U1_o2_1_fp)
VISExitHalf
retl
add %o2, 1, %o0
ENDPROC(U1_o2_1_fp)
ENTRY(U1_gs_0)
VISExitHalf
retl
add %GLOBAL_SPARE, %o2, %o0
ENDPROC(U1_gs_0)
ENTRY(U1_gs_8)
VISExitHalf
add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
retl
add %GLOBAL_SPARE, 0x8, %o0
ENDPROC(U1_gs_8)
ENTRY(U1_gs_10)
VISExitHalf
add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
retl
add %GLOBAL_SPARE, 0x10, %o0
ENDPROC(U1_gs_10)
ENTRY(U1_o2_0)
retl
mov %o2, %o0
ENDPROC(U1_o2_0)
ENTRY(U1_o2_8)
retl
add %o2, 8, %o0
ENDPROC(U1_o2_8)
ENTRY(U1_o2_4)
retl
add %o2, 4, %o0
ENDPROC(U1_o2_4)
ENTRY(U1_o2_1)
retl
add %o2, 1, %o0
ENDPROC(U1_o2_1)
ENTRY(U1_g1_0)
retl
add %g1, %o2, %o0
ENDPROC(U1_g1_0)
ENTRY(U1_g1_1)
add %g1, 1, %g1
retl
add %g1, %o2, %o0
ENDPROC(U1_g1_1)
ENTRY(U1_gs_0_o2_adj)
and %o2, 7, %o2
retl
add %GLOBAL_SPARE, %o2, %o0
ENDPROC(U1_gs_0_o2_adj)
ENTRY(U1_gs_8_o2_adj)
and %o2, 7, %o2
add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
retl
add %GLOBAL_SPARE, %o2, %o0
ENDPROC(U1_gs_8_o2_adj)
#endif
.align 64 .align 64
.globl FUNC_NAME .globl FUNC_NAME
...@@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2 and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1 1: subcc %g1, 0x1, %g1
EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x1, %o1 add %o1, 0x1, %o1
...@@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f be,pt %icc, 3f
alignaddr %o1, %g0, %o1 alignaddr %o1, %g0, %o1
EX_LD_FP(LOAD(ldd, %o1, %f4)) EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0 faligndata %f4, %f6, %f0
EX_ST_FP(STORE(std, %f0, %o0)) EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
be,pn %icc, 3f be,pn %icc, 3f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f0 faligndata %f6, %f4, %f0
EX_ST_FP(STORE(std, %f0, %o0)) EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
bne,pt %icc, 1b bne,pt %icc, 1b
add %o0, 0x8, %o0 add %o0, 0x8, %o0
...@@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %g1, %GLOBAL_SPARE, %g1 add %g1, %GLOBAL_SPARE, %g1
subcc %o2, %g3, %o2 subcc %o2, %g3, %o2
EX_LD_FP(LOAD_BLK(%o1, %f0)) EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
add %g1, %g3, %g1 add %g1, %g3, %g1
EX_LD_FP(LOAD_BLK(%o1, %f16)) EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
EX_LD_FP(LOAD_BLK(%o1, %f32)) EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
/* There are 8 instances of the unrolled loop, /* There are 8 instances of the unrolled loop,
...@@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 64 .align 64
1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f0, %f2, %f48 faligndata %f0, %f2, %f48
1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
...@@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 56f) STORE_JUMP(o0, f48, 56f)
1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f2, %f4, %f48 faligndata %f2, %f4, %f48
1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
...@@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 57f) STORE_JUMP(o0, f48, 57f)
1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f4, %f6, %f48 faligndata %f4, %f6, %f48
1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
...@@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 58f) STORE_JUMP(o0, f48, 58f)
1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f6, %f8, %f48 faligndata %f6, %f8, %f48
1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
...@@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 59f) STORE_JUMP(o0, f48, 59f)
1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f8, %f10, %f48 faligndata %f8, %f10, %f48
1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
...@@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 60f) STORE_JUMP(o0, f48, 60f)
1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f10, %f12, %f48 faligndata %f10, %f12, %f48
1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
...@@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 61f) STORE_JUMP(o0, f48, 61f)
1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f12, %f14, %f48 faligndata %f12, %f14, %f48
1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
...@@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 62f) STORE_JUMP(o0, f48, 62f)
1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f14, %f16, %f48 faligndata %f14, %f16, %f48
1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
...@@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_JUMP(o0, f48, 63f) STORE_JUMP(o0, f48, 63f)
40: FINISH_VISCHUNK(o0, f0, f2, g3) 40: FINISH_VISCHUNK(o0, f0, f2)
41: FINISH_VISCHUNK(o0, f2, f4, g3) 41: FINISH_VISCHUNK(o0, f2, f4)
42: FINISH_VISCHUNK(o0, f4, f6, g3) 42: FINISH_VISCHUNK(o0, f4, f6)
43: FINISH_VISCHUNK(o0, f6, f8, g3) 43: FINISH_VISCHUNK(o0, f6, f8)
44: FINISH_VISCHUNK(o0, f8, f10, g3) 44: FINISH_VISCHUNK(o0, f8, f10)
45: FINISH_VISCHUNK(o0, f10, f12, g3) 45: FINISH_VISCHUNK(o0, f10, f12)
46: FINISH_VISCHUNK(o0, f12, f14, g3) 46: FINISH_VISCHUNK(o0, f12, f14)
47: UNEVEN_VISCHUNK(o0, f14, f0, g3) 47: UNEVEN_VISCHUNK(o0, f14, f0)
48: FINISH_VISCHUNK(o0, f16, f18, g3) 48: FINISH_VISCHUNK(o0, f16, f18)
49: FINISH_VISCHUNK(o0, f18, f20, g3) 49: FINISH_VISCHUNK(o0, f18, f20)
50: FINISH_VISCHUNK(o0, f20, f22, g3) 50: FINISH_VISCHUNK(o0, f20, f22)
51: FINISH_VISCHUNK(o0, f22, f24, g3) 51: FINISH_VISCHUNK(o0, f22, f24)
52: FINISH_VISCHUNK(o0, f24, f26, g3) 52: FINISH_VISCHUNK(o0, f24, f26)
53: FINISH_VISCHUNK(o0, f26, f28, g3) 53: FINISH_VISCHUNK(o0, f26, f28)
54: FINISH_VISCHUNK(o0, f28, f30, g3) 54: FINISH_VISCHUNK(o0, f28, f30)
55: UNEVEN_VISCHUNK(o0, f30, f0, g3) 55: UNEVEN_VISCHUNK(o0, f30, f0)
56: FINISH_VISCHUNK(o0, f32, f34, g3) 56: FINISH_VISCHUNK(o0, f32, f34)
57: FINISH_VISCHUNK(o0, f34, f36, g3) 57: FINISH_VISCHUNK(o0, f34, f36)
58: FINISH_VISCHUNK(o0, f36, f38, g3) 58: FINISH_VISCHUNK(o0, f36, f38)
59: FINISH_VISCHUNK(o0, f38, f40, g3) 59: FINISH_VISCHUNK(o0, f38, f40)
60: FINISH_VISCHUNK(o0, f40, f42, g3) 60: FINISH_VISCHUNK(o0, f40, f42)
61: FINISH_VISCHUNK(o0, f42, f44, g3) 61: FINISH_VISCHUNK(o0, f42, f44)
62: FINISH_VISCHUNK(o0, f44, f46, g3) 62: FINISH_VISCHUNK(o0, f44, f46)
63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
93: EX_LD_FP(LOAD(ldd, %o1, %f2)) 93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
add %o1, 8, %o1 add %o1, 8, %o1
subcc %g3, 8, %g3 subcc %g3, 8, %g3
faligndata %f0, %f2, %f8 faligndata %f0, %f2, %f8
EX_ST_FP(STORE(std, %f8, %o0)) EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
bl,pn %xcc, 95f bl,pn %xcc, 95f
add %o0, 8, %o0 add %o0, 8, %o0
EX_LD_FP(LOAD(ldd, %o1, %f0)) EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
add %o1, 8, %o1 add %o1, 8, %o1
subcc %g3, 8, %g3 subcc %g3, 8, %g3
faligndata %f2, %f0, %f8 faligndata %f2, %f0, %f8
EX_ST_FP(STORE(std, %f8, %o0)) EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
bge,pt %xcc, 93b bge,pt %xcc, 93b
add %o0, 8, %o0 add %o0, 8, %o0
95: brz,pt %o2, 2f 95: brz,pt %o2, 2f
mov %g1, %o1 mov %g1, %o1
1: EX_LD_FP(LOAD(ldub, %o1, %o3)) 1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
add %o1, 1, %o1 add %o1, 1, %o1
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_ST_FP(STORE(stb, %o3, %o0)) EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
bne,pt %xcc, 1b bne,pt %xcc, 1b
add %o0, 1, %o0 add %o0, 1, %o0
...@@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
72: andn %o2, 0xf, %GLOBAL_SPARE 72: andn %o2, 0xf, %GLOBAL_SPARE
and %o2, 0xf, %o2 and %o2, 0xf, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
EX_ST(STORE(stx, %g1, %o1 + %o3)) EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x8, %o1 add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0 73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
EX_LD(LOAD(ldx, %o1, %o5)) EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
sub %o2, 0x8, %o2 sub %o2, 0x8, %o2
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0 1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
EX_LD(LOAD(lduw, %o1, %o5)) EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
sub %o2, 0x4, %o2 sub %o2, 0x4, %o2
EX_ST(STORE(stw, %o5, %o1 + %o3)) EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, 85f be,pt %XCC, 85f
...@@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %g1, %g1 sub %g0, %g1, %g1
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: EX_LD(LOAD(ldub, %o1, %o5)) 1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
subcc %g1, 1, %g1 subcc %g1, 1, %g1
EX_ST(STORE(stb, %o5, %o1 + %o3)) EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
bgu,pt %icc, 1b bgu,pt %icc, 1b
add %o1, 1, %o1 add %o1, 1, %o1
...@@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, %o3 8: mov 64, %o3
andn %o1, 0x7, %o1 andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2)) EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
sub %o3, %g1, %o3 sub %o3, %g1, %o3
andn %o2, 0x7, %GLOBAL_SPARE andn %o2, 0x7, %GLOBAL_SPARE
sllx %g2, %g1, %g2 sllx %g2, %g1, %g2
1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
add %o1, 0x8, %o1 add %o1, 0x8, %o1
srlx %g3, %o3, %o5 srlx %g3, %o3, %o5
or %o5, %g2, %o5 or %o5, %g2, %o5
EX_ST(STORE(stx, %o5, %o0)) EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
add %o0, 0x8, %o0 add %o0, 0x8, %o0
bgu,pt %icc, 1b bgu,pt %icc, 1b
sllx %g3, %g1, %g2 sllx %g3, %g1, %g2
...@@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bne,pn %XCC, 90f bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
1: EX_LD(LOAD(lduw, %o1, %g1)) 1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EX_ST(STORE(stw, %g1, %o1 + %o3)) EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
...@@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov EX_RETVAL(%o4), %o0 mov EX_RETVAL(%o4), %o0
.align 32 .align 32
90: EX_LD(LOAD(ldub, %o1, %g1)) 90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_ST(STORE(stb, %g1, %o1 + %o3)) EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/ */
#define EX_LD(x) \ #define EX_LD(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_LD_FP(x) \ #define EX_LD_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_fp;\ .word 98b, y##_fp; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_ST_FP(x) \ #define EX_ST_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one_fp;\ .word 98b, y##_fp; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#include <asm/visasm.h> #include <asm/visasm.h>
#include <asm/asi.h> #include <asm/asi.h>
#define GLOBAL_SPARE %g7 #define GLOBAL_SPARE %g7
...@@ -22,21 +23,17 @@ ...@@ -22,21 +23,17 @@
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_LD_FP #ifndef EX_LD_FP
#define EX_LD_FP(x) x #define EX_LD_FP(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif #endif
#ifndef EX_ST_FP #ifndef EX_ST_FP
#define EX_ST_FP(x) x #define EX_ST_FP(x,y) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif #endif
#ifndef LOAD #ifndef LOAD
...@@ -77,6 +74,87 @@ ...@@ -77,6 +74,87 @@
*/ */
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
__restore_fp:
VISExitHalf
retl
nop
ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
add %g1, 1, %g1
add %g2, %g1, %g2
ba,pt %xcc, __restore_fp
add %o2, %g2, %o0
ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
ENTRY(U3_retl_o2_plus_g2_fp)
ba,pt %xcc, __restore_fp
add %o2, %g2, %o0
ENDPROC(U3_retl_o2_plus_g2_fp)
ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
add %g2, 8, %g2
ba,pt %xcc, __restore_fp
add %o2, %g2, %o0
ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
ENTRY(U3_retl_o2)
retl
mov %o2, %o0
ENDPROC(U3_retl_o2)
ENTRY(U3_retl_o2_plus_1)
retl
add %o2, 1, %o0
ENDPROC(U3_retl_o2_plus_1)
ENTRY(U3_retl_o2_plus_4)
retl
add %o2, 4, %o0
ENDPROC(U3_retl_o2_plus_4)
ENTRY(U3_retl_o2_plus_8)
retl
add %o2, 8, %o0
ENDPROC(U3_retl_o2_plus_8)
ENTRY(U3_retl_o2_plus_g1_plus_1)
add %g1, 1, %g1
retl
add %o2, %g1, %o0
ENDPROC(U3_retl_o2_plus_g1_plus_1)
ENTRY(U3_retl_o2_fp)
ba,pt %xcc, __restore_fp
mov %o2, %o0
ENDPROC(U3_retl_o2_fp)
ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
sll %o3, 6, %o3
add %o3, 0x80, %o3
ba,pt %xcc, __restore_fp
add %o2, %o3, %o0
ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
sll %o3, 6, %o3
add %o3, 0x40, %o3
ba,pt %xcc, __restore_fp
add %o2, %o3, %o0
ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
ENTRY(U3_retl_o2_plus_GS_plus_0x10)
add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
retl
add %o2, GLOBAL_SPARE, %o0
ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
ENTRY(U3_retl_o2_plus_GS_plus_0x08)
add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
retl
add %o2, GLOBAL_SPARE, %o0
ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
ENTRY(U3_retl_o2_and_7_plus_GS)
and %o2, 7, %o2
retl
add %o2, GLOBAL_SPARE, %o2
ENDPROC(U3_retl_o2_and_7_plus_GS)
ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
add GLOBAL_SPARE, 8, GLOBAL_SPARE
and %o2, 7, %o2
retl
add %o2, GLOBAL_SPARE, %o2
ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
#endif
.align 64 .align 64
/* The cheetah's flexible spine, oversized liver, enlarged heart, /* The cheetah's flexible spine, oversized liver, enlarged heart,
...@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2 and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1 1: subcc %g1, 0x1, %g1
EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x1, %o1 add %o1, 0x1, %o1
...@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f be,pt %icc, 3f
alignaddr %o1, %g0, %o1 alignaddr %o1, %g0, %o1
EX_LD_FP(LOAD(ldd, %o1, %f4)) EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0 faligndata %f4, %f6, %f0
EX_ST_FP(STORE(std, %f0, %o0)) EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
be,pn %icc, 3f be,pn %icc, 3f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f2 faligndata %f6, %f4, %f2
EX_ST_FP(STORE(std, %f2, %o0)) EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
bne,pt %icc, 1b bne,pt %icc, 1b
add %o0, 0x8, %o0 add %o0, 0x8, %o0
...@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x080, #one_read)
LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read)
LOAD(prefetch, %o1 + 0x100, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read)
EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
LOAD(prefetch, %o1 + 0x140, #one_read) LOAD(prefetch, %o1 + 0x140, #one_read)
EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
LOAD(prefetch, %o1 + 0x180, #one_read) LOAD(prefetch, %o1 + 0x180, #one_read)
EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
LOAD(prefetch, %o1 + 0x1c0, #one_read) LOAD(prefetch, %o1 + 0x1c0, #one_read)
faligndata %f0, %f2, %f16 faligndata %f0, %f2, %f16
EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
faligndata %f2, %f4, %f18 faligndata %f2, %f4, %f18
EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
faligndata %f4, %f6, %f20 faligndata %f4, %f6, %f20
EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
faligndata %f6, %f8, %f22 faligndata %f6, %f8, %f22
EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
faligndata %f8, %f10, %f24 faligndata %f8, %f10, %f24
EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
faligndata %f10, %f12, %f26 faligndata %f10, %f12, %f26
EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
add %o1, 0x40, %o1 add %o1, 0x40, %o1
...@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 64 .align 64
1: 1:
EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f14, %f0, %f30 faligndata %f14, %f0, %f30
EX_ST_FP(STORE_BLK(%f16, %o0)) EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f0, %f2, %f16 faligndata %f0, %f2, %f16
add %o0, 0x40, %o0 add %o0, 0x40, %o0
EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f2, %f4, %f18 faligndata %f2, %f4, %f18
EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f4, %f6, %f20 faligndata %f4, %f6, %f20
EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
subcc %o3, 0x01, %o3 subcc %o3, 0x01, %o3
faligndata %f6, %f8, %f22 faligndata %f6, %f8, %f22
EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f8, %f10, %f24 faligndata %f8, %f10, %f24
EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
LOAD(prefetch, %o1 + 0x1c0, #one_read) LOAD(prefetch, %o1 + 0x1c0, #one_read)
faligndata %f10, %f12, %f26 faligndata %f10, %f12, %f26
bg,pt %XCC, 1b bg,pt %XCC, 1b
...@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
/* Finally we copy the last full 64-byte block. */ /* Finally we copy the last full 64-byte block. */
2: 2:
EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
faligndata %f14, %f0, %f30 faligndata %f14, %f0, %f30
EX_ST_FP(STORE_BLK(%f16, %o0)) EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f0, %f2, %f16 faligndata %f0, %f2, %f16
EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f2, %f4, %f18 faligndata %f2, %f4, %f18
EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f4, %f6, %f20 faligndata %f4, %f6, %f20
EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f6, %f8, %f22 faligndata %f6, %f8, %f22
EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
faligndata %f8, %f10, %f24 faligndata %f8, %f10, %f24
cmp %g1, 0 cmp %g1, 0
be,pt %XCC, 1f be,pt %XCC, 1f
add %o0, 0x40, %o0 add %o0, 0x40, %o0
EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
1: faligndata %f10, %f12, %f26 1: faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28 faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30 faligndata %f14, %f0, %f30
EX_ST_FP(STORE_BLK(%f16, %o0)) EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
add %o0, 0x40, %o0 add %o0, 0x40, %o0
add %o1, 0x40, %o1 add %o1, 0x40, %o1
membar #Sync membar #Sync
...@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g2, %o2 sub %o2, %g2, %o2
be,a,pt %XCC, 1f be,a,pt %XCC, 1f
EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8 faligndata %f0, %f2, %f8
EX_ST_FP(STORE(std, %f8, %o0)) EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
be,pn %XCC, 2f be,pn %XCC, 2f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f2, %f0, %f8 faligndata %f2, %f0, %f8
EX_ST_FP(STORE(std, %f8, %o0)) EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
bne,pn %XCC, 1b bne,pn %XCC, 1b
add %o0, 0x8, %o0 add %o0, 0x8, %o0
...@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andcc %o2, 0x8, %g0 andcc %o2, 0x8, %g0
be,pt %icc, 1f be,pt %icc, 1f
nop nop
EX_LD(LOAD(ldx, %o1, %o5)) EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
sub %o2, 8, %o2
1: andcc %o2, 0x4, %g0 1: andcc %o2, 0x4, %g0
be,pt %icc, 1f be,pt %icc, 1f
nop nop
EX_LD(LOAD(lduw, %o1, %o5)) EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
EX_ST(STORE(stw, %o5, %o1 + %o3)) EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
sub %o2, 4, %o2
1: andcc %o2, 0x2, %g0 1: andcc %o2, 0x2, %g0
be,pt %icc, 1f be,pt %icc, 1f
nop nop
EX_LD(LOAD(lduh, %o1, %o5)) EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
EX_ST(STORE(sth, %o5, %o1 + %o3)) EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
add %o1, 0x2, %o1 add %o1, 0x2, %o1
sub %o2, 2, %o2
1: andcc %o2, 0x1, %g0 1: andcc %o2, 0x1, %g0
be,pt %icc, 85f be,pt %icc, 85f
nop nop
EX_LD(LOAD(ldub, %o1, %o5)) EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
ba,pt %xcc, 85f ba,pt %xcc, 85f
EX_ST(STORE(stb, %o5, %o1 + %o3)) EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
.align 64 .align 64
70: /* 16 < len <= 64 */ 70: /* 16 < len <= 64 */
...@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0xf, GLOBAL_SPARE andn %o2, 0xf, GLOBAL_SPARE
and %o2, 0xf, %o2 and %o2, 0xf, %o2
1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
EX_ST(STORE(stx, %g1, %o1 + %o3)) EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x8, %o1 add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0 73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %o2, 0x8, %o2 sub %o2, 0x8, %o2
EX_LD(LOAD(ldx, %o1, %o5)) EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0 1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %o2, 0x4, %o2 sub %o2, 0x4, %o2
EX_LD(LOAD(lduw, %o1, %o5)) EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
EX_ST(STORE(stw, %o5, %o1 + %o3)) EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, 85f be,pt %XCC, 85f
...@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: subcc %g1, 1, %g1 1: subcc %g1, 1, %g1
EX_LD(LOAD(ldub, %o1, %o5)) EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
EX_ST(STORE(stb, %o5, %o1 + %o3)) EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
bgu,pt %icc, 1b bgu,pt %icc, 1b
add %o1, 1, %o1 add %o1, 1, %o1
...@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, %o3 8: mov 64, %o3
andn %o1, 0x7, %o1 andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2)) EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
sub %o3, %g1, %o3 sub %o3, %g1, %o3
andn %o2, 0x7, GLOBAL_SPARE andn %o2, 0x7, GLOBAL_SPARE
sllx %g2, %g1, %g2 sllx %g2, %g1, %g2
1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
add %o1, 0x8, %o1 add %o1, 0x8, %o1
srlx %g3, %o3, %o5 srlx %g3, %o3, %o5
or %o5, %g2, %o5 or %o5, %g2, %o5
EX_ST(STORE(stx, %o5, %o0)) EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
add %o0, 0x8, %o0 add %o0, 0x8, %o0
bgu,pt %icc, 1b bgu,pt %icc, 1b
sllx %g3, %g1, %g2 sllx %g3, %g1, %g2
...@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
1: 1:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EX_LD(LOAD(lduw, %o1, %g1)) EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
EX_ST(STORE(stw, %g1, %o1 + %o3)) EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
...@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 32 .align 32
90: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_LD(LOAD(ldub, %o1, %g1)) EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
EX_ST(STORE(stb, %g1, %o1 + %o3)) EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
......
...@@ -9,18 +9,33 @@ ...@@ -9,18 +9,33 @@
#define XCC xcc #define XCC xcc
#define EX(x,y) \ #define EX(x,y,z) \
98: x,y; \ 98: x,y; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_one; \ .word 98b, z; \
.text; \ .text; \
.align 4; .align 4;
#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
.register %g2,#scratch .register %g2,#scratch
.register %g3,#scratch .register %g3,#scratch
.text .text
__retl_o4_plus_8:
add %o4, %o2, %o4
retl
add %o4, 8, %o0
__retl_o2_plus_4:
retl
add %o2, 4, %o0
__retl_o2_plus_1:
retl
add %o2, 1, %o0
.align 32 .align 32
/* Don't try to get too fancy here, just nice and /* Don't try to get too fancy here, just nice and
...@@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ ...@@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x7, %o4 andn %o2, 0x7, %o4
and %o2, 0x7, %o2 and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4 1: subcc %o4, 0x8, %o4
EX(ldxa [%o1] %asi, %o5) EX_O4(ldxa [%o1] %asi, %o5)
EX(stxa %o5, [%o0] %asi) EX_O4(stxa %o5, [%o0] %asi)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o0, 0x8, %o0 add %o0, 0x8, %o0
...@@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ ...@@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
sub %o2, 0x4, %o2 sub %o2, 0x4, %o2
EX(lduwa [%o1] %asi, %o5) EX_O2_4(lduwa [%o1] %asi, %o5)
EX(stwa %o5, [%o0] %asi) EX_O2_4(stwa %o5, [%o0] %asi)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
add %o0, 0x4, %o0 add %o0, 0x4, %o0
1: cmp %o2, 0 1: cmp %o2, 0
...@@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ ...@@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
82: 82:
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EX(lduwa [%o1] %asi, %g1) EX_O2_4(lduwa [%o1] %asi, %g1)
EX(stwa %g1, [%o0] %asi) EX_O2_4(stwa %g1, [%o0] %asi)
add %o1, 4, %o1 add %o1, 4, %o1
bgu,pt %XCC, 82b bgu,pt %XCC, 82b
add %o0, 4, %o0 add %o0, 4, %o0
...@@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ ...@@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */
.align 32 .align 32
90: 90:
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX(lduba [%o1] %asi, %g1) EX_O2_1(lduba [%o1] %asi, %g1)
EX(stba %g1, [%o0] %asi) EX_O2_1(stba %g1, [%o0] %asi)
add %o1, 1, %o1 add %o1, 1, %o1
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %o0, 1, %o0 add %o0, 1, %o0
......
/* user_fixup.c: Fix up user copy faults.
*
* Copyright (C) 2004 David S. Miller <davem@redhat.com>
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <asm/uaccess.h>
/* Calculating the exact fault address when using
* block loads and stores can be very complicated.
*
* Instead of trying to be clever and handling all
* of the cases, just fix things up simply here.
*/
static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
{
unsigned long fault_addr = current_thread_info()->fault_address;
unsigned long end = start + size;
if (fault_addr < start || fault_addr >= end) {
*offset = 0;
} else {
*offset = fault_addr - start;
size = end - fault_addr;
}
return size;
}
unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
{
unsigned long offset;
size = compute_size((unsigned long) from, size, &offset);
if (likely(size))
memset(to + offset, 0, size);
return size;
}
EXPORT_SYMBOL(copy_from_user_fixup);
unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
{
unsigned long offset;
return compute_size((unsigned long) to, size, &offset);
}
EXPORT_SYMBOL(copy_to_user_fixup);
unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
{
unsigned long fault_addr = current_thread_info()->fault_address;
unsigned long start = (unsigned long) to;
unsigned long end = start + size;
if (fault_addr >= start && fault_addr < end)
return end - fault_addr;
start = (unsigned long) from;
end = start + size;
if (fault_addr >= start && fault_addr < end)
return end - fault_addr;
return size;
}
EXPORT_SYMBOL(copy_in_user_fixup);
...@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) ...@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
return (tag == (vaddr >> 22)); return (tag == (vaddr >> 22));
} }
static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
{
unsigned long idx;
for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
struct tsb *ent = &swapper_tsb[idx];
unsigned long match = idx << 13;
match |= (ent->tag << 22);
if (match >= start && match < end)
ent->tag = (1UL << TSB_TAG_INVALID_BIT);
}
}
/* TSB flushes need only occur on the processor initiating the address /* TSB flushes need only occur on the processor initiating the address
* space modification, not on each cpu the address space has run on. * space modification, not on each cpu the address space has run on.
* Only the TLB flush needs that treatment. * Only the TLB flush needs that treatment.
...@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) ...@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
{ {
unsigned long v; unsigned long v;
if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
return flush_tsb_kernel_range_scan(start, end);
for (v = start; v < end; v += PAGE_SIZE) { for (v = start; v < end; v += PAGE_SIZE) {
unsigned long hash = tsb_hash(v, PAGE_SHIFT, unsigned long hash = tsb_hash(v, PAGE_SHIFT,
KERNEL_TSB_NENTRIES); KERNEL_TSB_NENTRIES);
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
.text .text
.align 32 .align 32
.globl __flush_tlb_mm .globl __flush_tlb_mm
__flush_tlb_mm: /* 18 insns */ __flush_tlb_mm: /* 19 insns */
/* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2 ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0 cmp %g2, %o0
...@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ ...@@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */
.align 32 .align 32
.globl __flush_tlb_pending .globl __flush_tlb_pending
__flush_tlb_pending: /* 26 insns */ __flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
rdpr %pstate, %g7 rdpr %pstate, %g7
sllx %o1, 3, %o1 sllx %o1, 3, %o1
...@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */ ...@@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */
.align 32 .align 32
.globl __flush_tlb_kernel_range .globl __flush_tlb_kernel_range
__flush_tlb_kernel_range: /* 16 insns */ __flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */ /* %o0=start, %o1=end */
cmp %o0, %o1 cmp %o0, %o1
be,pn %xcc, 2f be,pn %xcc, 2f
sethi %hi(PAGE_SIZE), %o4
sub %o1, %o0, %o3 sub %o1, %o0, %o3
srlx %o3, 18, %o4
brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow
sethi %hi(PAGE_SIZE), %o4
sub %o3, %o4, %o3 sub %o3, %o4, %o3
or %o0, 0x20, %o0 ! Nucleus or %o0, 0x20, %o0 ! Nucleus
1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
...@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */ ...@@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */
retl retl
nop nop
nop nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
__spitfire_flush_tlb_kernel_range_slow:
mov 63 * 8, %o4
1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3
andcc %o3, 0x40, %g0 /* _PAGE_L_4U */
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %o3
stxa %g0, [%o3] ASI_IMMU
stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS
membar #Sync
2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3
andcc %o3, 0x40, %g0
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %o3
stxa %g0, [%o3] ASI_DMMU
stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS
membar #Sync
2: sub %o4, 8, %o4
brgez,pt %o4, 1b
nop
retl
nop
__spitfire_flush_tlb_mm_slow: __spitfire_flush_tlb_mm_slow:
rdpr %pstate, %g1 rdpr %pstate, %g1
...@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ ...@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */
retl retl
wrpr %g7, 0x0, %pstate wrpr %g7, 0x0, %pstate
__cheetah_flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */
cmp %o0, %o1
be,pn %xcc, 2f
sub %o1, %o0, %o3
srlx %o3, 18, %o4
brnz,pn %o4, 3f
sethi %hi(PAGE_SIZE), %o4
sub %o3, %o4, %o3
or %o0, 0x20, %o0 ! Nucleus
1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP
stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %o3, 1b
sub %o3, %o4, %o3
2: sethi %hi(KERNBASE), %o3
flush %o3
retl
nop
3: mov 0x80, %o4
stxa %g0, [%o4] ASI_DMMU_DEMAP
membar #Sync
stxa %g0, [%o4] ASI_IMMU_DEMAP
membar #Sync
retl
nop
nop
nop
nop
nop
nop
nop
nop
#ifdef DCACHE_ALIASING_POSSIBLE #ifdef DCACHE_ALIASING_POSSIBLE
__cheetah_flush_dcache_page: /* 11 insns */ __cheetah_flush_dcache_page: /* 11 insns */
sethi %hi(PAGE_OFFSET), %g1 sethi %hi(PAGE_OFFSET), %g1
...@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error: ...@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
ret ret
restore restore
__hypervisor_flush_tlb_mm: /* 10 insns */ __hypervisor_flush_tlb_mm: /* 19 insns */
mov %o0, %o2 /* ARG2: mmu context */ mov %o0, %o2 /* ARG2: mmu context */
mov 0, %o0 /* ARG0: CPU lists unimplemented */ mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */ mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov HV_MMU_ALL, %o3 /* ARG3: flags */ mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5 mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP ta HV_FAST_TRAP
brnz,pn %o0, __hypervisor_tlb_tl0_error brnz,pn %o0, 1f
mov HV_FAST_MMU_DEMAP_CTX, %o1 mov HV_FAST_MMU_DEMAP_CTX, %o1
retl retl
nop nop
1: sethi %hi(__hypervisor_tlb_tl0_error), %o5
jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
nop
nop
nop
nop
nop
nop
__hypervisor_flush_tlb_page: /* 11 insns */ __hypervisor_flush_tlb_page: /* 22 insns */
/* %o0 = context, %o1 = vaddr */ /* %o0 = context, %o1 = vaddr */
mov %o0, %g2 mov %o0, %g2
mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */
...@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */ ...@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
srlx %o0, PAGE_SHIFT, %o0 srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP ta HV_MMU_UNMAP_ADDR_TRAP
brnz,pn %o0, __hypervisor_tlb_tl0_error brnz,pn %o0, 1f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
retl retl
nop nop
1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
nop
nop
nop
nop
nop
nop
nop
nop
__hypervisor_flush_tlb_pending: /* 16 insns */ __hypervisor_flush_tlb_pending: /* 27 insns */
/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
sllx %o1, 3, %g1 sllx %o1, 3, %g1
mov %o2, %g2 mov %o2, %g2
...@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ ...@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
srlx %o0, PAGE_SHIFT, %o0 srlx %o0, PAGE_SHIFT, %o0
sllx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP ta HV_MMU_UNMAP_ADDR_TRAP
brnz,pn %o0, __hypervisor_tlb_tl0_error brnz,pn %o0, 1f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
brnz,pt %g1, 1b brnz,pt %g1, 1b
nop nop
retl retl
nop nop
1: sethi %hi(__hypervisor_tlb_tl0_error), %o2
jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
nop
nop
nop
nop
nop
nop
nop
nop
__hypervisor_flush_tlb_kernel_range: /* 16 insns */ __hypervisor_flush_tlb_kernel_range: /* 31 insns */
/* %o0=start, %o1=end */ /* %o0=start, %o1=end */
cmp %o0, %o1 cmp %o0, %o1
be,pn %xcc, 2f be,pn %xcc, 2f
sethi %hi(PAGE_SIZE), %g3 sub %o1, %o0, %g2
srlx %g2, 18, %g3
brnz,pn %g3, 4f
mov %o0, %g1 mov %o0, %g1
sub %o1, %g1, %g2 sethi %hi(PAGE_SIZE), %g3
sub %g2, %g3, %g2 sub %g2, %g3, %g2
1: add %g1, %g2, %o0 /* ARG0: virtual address */ 1: add %g1, %g2, %o0 /* ARG0: virtual address */
mov 0, %o1 /* ARG1: mmu context */ mov 0, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */ mov HV_MMU_ALL, %o2 /* ARG2: flags */
ta HV_MMU_UNMAP_ADDR_TRAP ta HV_MMU_UNMAP_ADDR_TRAP
brnz,pn %o0, __hypervisor_tlb_tl0_error brnz,pn %o0, 3f
mov HV_MMU_UNMAP_ADDR_TRAP, %o1 mov HV_MMU_UNMAP_ADDR_TRAP, %o1
brnz,pt %g2, 1b brnz,pt %g2, 1b
sub %g2, %g3, %g2 sub %g2, %g3, %g2
2: retl 2: retl
nop nop
3: sethi %hi(__hypervisor_tlb_tl0_error), %o2
jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
nop
4: mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov 0, %o2 /* ARG2: mmu context == nucleus */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
brnz,pn %o0, 3b
mov HV_FAST_MMU_DEMAP_CTX, %o1
retl
nop
#ifdef DCACHE_ALIASING_POSSIBLE #ifdef DCACHE_ALIASING_POSSIBLE
/* XXX Niagara and friends have an 8K cache, so no aliasing is /* XXX Niagara and friends have an 8K cache, so no aliasing is
...@@ -394,43 +511,6 @@ tlb_patch_one: ...@@ -394,43 +511,6 @@ tlb_patch_one:
retl retl
nop nop
.globl cheetah_patch_cachetlbops
cheetah_patch_cachetlbops:
save %sp, -128, %sp
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
or %o1, %lo(__cheetah_flush_tlb_mm), %o1
call tlb_patch_one
mov 19, %o2
sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__cheetah_flush_tlb_page), %o1
or %o1, %lo(__cheetah_flush_tlb_page), %o1
call tlb_patch_one
mov 22, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__cheetah_flush_tlb_pending), %o1
or %o1, %lo(__cheetah_flush_tlb_pending), %o1
call tlb_patch_one
mov 27, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
sethi %hi(__cheetah_flush_dcache_page), %o1
or %o1, %lo(__cheetah_flush_dcache_page), %o1
call tlb_patch_one
mov 11, %o2
#endif /* DCACHE_ALIASING_POSSIBLE */
ret
restore
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* These are all called by the slaves of a cross call, at /* These are all called by the slaves of a cross call, at
* trap level 1, with interrupts fully disabled. * trap level 1, with interrupts fully disabled.
...@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops: ...@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
*/ */
.align 32 .align 32
.globl xcall_flush_tlb_mm .globl xcall_flush_tlb_mm
xcall_flush_tlb_mm: /* 21 insns */ xcall_flush_tlb_mm: /* 24 insns */
mov PRIMARY_CONTEXT, %g2 mov PRIMARY_CONTEXT, %g2
ldxa [%g2] ASI_DMMU, %g3 ldxa [%g2] ASI_DMMU, %g3
srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4
...@@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */ ...@@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */
nop nop
nop nop
nop nop
nop
nop
nop
.globl xcall_flush_tlb_page .globl xcall_flush_tlb_page
xcall_flush_tlb_page: /* 17 insns */ xcall_flush_tlb_page: /* 20 insns */
/* %g5=context, %g1=vaddr */ /* %g5=context, %g1=vaddr */
mov PRIMARY_CONTEXT, %g4 mov PRIMARY_CONTEXT, %g4
ldxa [%g4] ASI_DMMU, %g2 ldxa [%g4] ASI_DMMU, %g2
...@@ -490,14 +573,19 @@ xcall_flush_tlb_page: /* 17 insns */ ...@@ -490,14 +573,19 @@ xcall_flush_tlb_page: /* 17 insns */
retry retry
nop nop
nop nop
nop
nop
nop
.globl xcall_flush_tlb_kernel_range .globl xcall_flush_tlb_kernel_range
xcall_flush_tlb_kernel_range: /* 25 insns */ xcall_flush_tlb_kernel_range: /* 44 insns */
sethi %hi(PAGE_SIZE - 1), %g2 sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1 andn %g1, %g2, %g1
andn %g7, %g2, %g7 andn %g7, %g2, %g7
sub %g7, %g1, %g3 sub %g7, %g1, %g3
srlx %g3, 18, %g2
brnz,pn %g2, 2f
add %g2, 1, %g2 add %g2, 1, %g2
sub %g3, %g2, %g3 sub %g3, %g2, %g3
or %g1, 0x20, %g1 ! Nucleus or %g1, 0x20, %g1 ! Nucleus
...@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ ...@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */
brnz,pt %g3, 1b brnz,pt %g3, 1b
sub %g3, %g2, %g3 sub %g3, %g2, %g3
retry retry
2: mov 63 * 8, %g1
1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2
andcc %g2, 0x40, %g0 /* _PAGE_L_4U */
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %g2
stxa %g0, [%g2] ASI_IMMU
stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS
membar #Sync
2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2
andcc %g2, 0x40, %g0
bne,pn %xcc, 2f
mov TLB_TAG_ACCESS, %g2
stxa %g0, [%g2] ASI_DMMU
stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS
membar #Sync
2: sub %g1, 8, %g1
brgez,pt %g1, 1b
nop nop
nop retry
nop nop
nop nop
nop nop
...@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4: ...@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
retry retry
__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */
sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1
andn %g7, %g2, %g7
sub %g7, %g1, %g3
srlx %g3, 18, %g2
brnz,pn %g2, 2f
add %g2, 1, %g2
sub %g3, %g2, %g3
or %g1, 0x20, %g1 ! Nucleus
1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
membar #Sync
brnz,pt %g3, 1b
sub %g3, %g2, %g3
retry
2: mov 0x80, %g2
stxa %g0, [%g2] ASI_DMMU_DEMAP
membar #Sync
stxa %g0, [%g2] ASI_IMMU_DEMAP
membar #Sync
retry
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
#ifdef DCACHE_ALIASING_POSSIBLE #ifdef DCACHE_ALIASING_POSSIBLE
.align 32 .align 32
.globl xcall_flush_dcache_page_cheetah .globl xcall_flush_dcache_page_cheetah
...@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error: ...@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
ba,a,pt %xcc, rtrap ba,a,pt %xcc, rtrap
.globl __hypervisor_xcall_flush_tlb_mm .globl __hypervisor_xcall_flush_tlb_mm
__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ __hypervisor_xcall_flush_tlb_mm: /* 24 insns */
/* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
mov %o0, %g2 mov %o0, %g2
mov %o1, %g3 mov %o1, %g3
...@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ ...@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
mov HV_FAST_MMU_DEMAP_CTX, %o5 mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP ta HV_FAST_TRAP
mov HV_FAST_MMU_DEMAP_CTX, %g6 mov HV_FAST_MMU_DEMAP_CTX, %g6
brnz,pn %o0, __hypervisor_tlb_xcall_error brnz,pn %o0, 1f
mov %o0, %g5 mov %o0, %g5
mov %g2, %o0 mov %g2, %o0
mov %g3, %o1 mov %g3, %o1
...@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ ...@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
mov %g7, %o5 mov %g7, %o5
membar #Sync membar #Sync
retry retry
1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
nop
.globl __hypervisor_xcall_flush_tlb_page .globl __hypervisor_xcall_flush_tlb_page
__hypervisor_xcall_flush_tlb_page: /* 17 insns */ __hypervisor_xcall_flush_tlb_page: /* 20 insns */
/* %g5=ctx, %g1=vaddr */ /* %g5=ctx, %g1=vaddr */
mov %o0, %g2 mov %o0, %g2
mov %o1, %g3 mov %o1, %g3
...@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ ...@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
sllx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0
ta HV_MMU_UNMAP_ADDR_TRAP ta HV_MMU_UNMAP_ADDR_TRAP
mov HV_MMU_UNMAP_ADDR_TRAP, %g6 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
brnz,a,pn %o0, __hypervisor_tlb_xcall_error brnz,a,pn %o0, 1f
mov %o0, %g5 mov %o0, %g5
mov %g2, %o0 mov %g2, %o0
mov %g3, %o1 mov %g3, %o1
mov %g4, %o2 mov %g4, %o2
membar #Sync membar #Sync
retry retry
1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
nop
.globl __hypervisor_xcall_flush_tlb_kernel_range .globl __hypervisor_xcall_flush_tlb_kernel_range
__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ __hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
/* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
sethi %hi(PAGE_SIZE - 1), %g2 sethi %hi(PAGE_SIZE - 1), %g2
or %g2, %lo(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2
andn %g1, %g2, %g1 andn %g1, %g2, %g1
andn %g7, %g2, %g7 andn %g7, %g2, %g7
sub %g7, %g1, %g3 sub %g7, %g1, %g3
srlx %g3, 18, %g7
add %g2, 1, %g2 add %g2, 1, %g2
sub %g3, %g2, %g3 sub %g3, %g2, %g3
mov %o0, %g2 mov %o0, %g2
mov %o1, %g4 mov %o1, %g4
brnz,pn %g7, 2f
mov %o2, %g7 mov %o2, %g7
1: add %g1, %g3, %o0 /* ARG0: virtual address */ 1: add %g1, %g3, %o0 /* ARG0: virtual address */
mov 0, %o1 /* ARG1: mmu context */ mov 0, %o1 /* ARG1: mmu context */
mov HV_MMU_ALL, %o2 /* ARG2: flags */ mov HV_MMU_ALL, %o2 /* ARG2: flags */
ta HV_MMU_UNMAP_ADDR_TRAP ta HV_MMU_UNMAP_ADDR_TRAP
mov HV_MMU_UNMAP_ADDR_TRAP, %g6 mov HV_MMU_UNMAP_ADDR_TRAP, %g6
brnz,pn %o0, __hypervisor_tlb_xcall_error brnz,pn %o0, 1f
mov %o0, %g5 mov %o0, %g5
sethi %hi(PAGE_SIZE), %o2 sethi %hi(PAGE_SIZE), %o2
brnz,pt %g3, 1b brnz,pt %g3, 1b
sub %g3, %o2, %g3 sub %g3, %o2, %g3
mov %g2, %o0 5: mov %g2, %o0
mov %g4, %o1 mov %g4, %o1
mov %g7, %o2 mov %g7, %o2
membar #Sync membar #Sync
retry retry
1: sethi %hi(__hypervisor_tlb_xcall_error), %g4
jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
nop
2: mov %o3, %g1
mov %o5, %g3
mov 0, %o0 /* ARG0: CPU lists unimplemented */
mov 0, %o1 /* ARG1: CPU lists unimplemented */
mov 0, %o2 /* ARG2: mmu context == nucleus */
mov HV_MMU_ALL, %o3 /* ARG3: flags */
mov HV_FAST_MMU_DEMAP_CTX, %o5
ta HV_FAST_TRAP
mov %g1, %o3
brz,pt %o0, 5b
mov %g3, %o5
mov HV_FAST_MMU_DEMAP_CTX, %g6
ba,pt %xcc, 1b
clr %g5
/* These just get rescheduled to PIL vectors. */ /* These just get rescheduled to PIL vectors. */
.globl xcall_call_function .globl xcall_call_function
...@@ -809,6 +985,58 @@ xcall_kgdb_capture: ...@@ -809,6 +985,58 @@ xcall_kgdb_capture:
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
.globl cheetah_patch_cachetlbops
cheetah_patch_cachetlbops:
save %sp, -128, %sp
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
or %o1, %lo(__cheetah_flush_tlb_mm), %o1
call tlb_patch_one
mov 19, %o2
sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__cheetah_flush_tlb_page), %o1
or %o1, %lo(__cheetah_flush_tlb_page), %o1
call tlb_patch_one
mov 22, %o2
sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__cheetah_flush_tlb_pending), %o1
or %o1, %lo(__cheetah_flush_tlb_pending), %o1
call tlb_patch_one
mov 27, %o2
sethi %hi(__flush_tlb_kernel_range), %o0
or %o0, %lo(__flush_tlb_kernel_range), %o0
sethi %hi(__cheetah_flush_tlb_kernel_range), %o1
or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
call tlb_patch_one
mov 31, %o2
#ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
sethi %hi(__cheetah_flush_dcache_page), %o1
or %o1, %lo(__cheetah_flush_dcache_page), %o1
call tlb_patch_one
mov 11, %o2
#endif /* DCACHE_ALIASING_POSSIBLE */
#ifdef CONFIG_SMP
sethi %hi(xcall_flush_tlb_kernel_range), %o0
or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
call tlb_patch_one
mov 44, %o2
#endif /* CONFIG_SMP */
ret
restore
.globl hypervisor_patch_cachetlbops .globl hypervisor_patch_cachetlbops
hypervisor_patch_cachetlbops: hypervisor_patch_cachetlbops:
...@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops: ...@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
sethi %hi(__hypervisor_flush_tlb_mm), %o1 sethi %hi(__hypervisor_flush_tlb_mm), %o1
or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1
call tlb_patch_one call tlb_patch_one
mov 10, %o2 mov 19, %o2
sethi %hi(__flush_tlb_page), %o0 sethi %hi(__flush_tlb_page), %o0
or %o0, %lo(__flush_tlb_page), %o0 or %o0, %lo(__flush_tlb_page), %o0
sethi %hi(__hypervisor_flush_tlb_page), %o1 sethi %hi(__hypervisor_flush_tlb_page), %o1
or %o1, %lo(__hypervisor_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_flush_tlb_page), %o1
call tlb_patch_one call tlb_patch_one
mov 11, %o2 mov 22, %o2
sethi %hi(__flush_tlb_pending), %o0 sethi %hi(__flush_tlb_pending), %o0
or %o0, %lo(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0
sethi %hi(__hypervisor_flush_tlb_pending), %o1 sethi %hi(__hypervisor_flush_tlb_pending), %o1
or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1
call tlb_patch_one call tlb_patch_one
mov 16, %o2 mov 27, %o2
sethi %hi(__flush_tlb_kernel_range), %o0 sethi %hi(__flush_tlb_kernel_range), %o0
or %o0, %lo(__flush_tlb_kernel_range), %o0 or %o0, %lo(__flush_tlb_kernel_range), %o0
sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1
or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
call tlb_patch_one call tlb_patch_one
mov 16, %o2 mov 31, %o2
#ifdef DCACHE_ALIASING_POSSIBLE #ifdef DCACHE_ALIASING_POSSIBLE
sethi %hi(__flush_dcache_page), %o0 sethi %hi(__flush_dcache_page), %o0
...@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops: ...@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
call tlb_patch_one call tlb_patch_one
mov 21, %o2 mov 24, %o2
sethi %hi(xcall_flush_tlb_page), %o0 sethi %hi(xcall_flush_tlb_page), %o0
or %o0, %lo(xcall_flush_tlb_page), %o0 or %o0, %lo(xcall_flush_tlb_page), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
call tlb_patch_one call tlb_patch_one
mov 17, %o2 mov 20, %o2
sethi %hi(xcall_flush_tlb_kernel_range), %o0 sethi %hi(xcall_flush_tlb_kernel_range), %o0
or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0
sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
call tlb_patch_one call tlb_patch_one
mov 25, %o2 mov 44, %o2
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
ret ret
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment