Commit e86c348c authored by Rob Radez's avatar Rob Radez Committed by Keith M. Wesolowski

[SPARC32]: Non-controversial gcc-3.3 build fixes.

parent 4700c8b0
......@@ -145,36 +145,39 @@ cpout: retl ! get outta here
.globl C_LABEL(__csum_partial_copy_start), C_LABEL(__csum_partial_copy_end)
C_LABEL(__csum_partial_copy_start):
#define EX(x,y,a,b,z) \
/* Work around cpp -rob */
#define ALLOC #alloc
#define EXECINSTR #execinstr
#define EX(x,y,a,b) \
98: x,y; \
.section .fixup,z##alloc,z##execinstr; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
99: ba 30f; \
a, b, %o3; \
.section __ex_table,z##alloc; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4
#define EX2(x,y,z) \
#define EX2(x,y) \
98: x,y; \
.section __ex_table,z##alloc; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 30f; \
.text; \
.align 4
#define EX3(x,y,z) \
#define EX3(x,y) \
98: x,y; \
.section __ex_table,z##alloc; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 96f; \
.text; \
.align 4
#define EXT(start,end,handler,z) \
.section __ex_table,z##alloc; \
#define EXT(start,end,handler) \
.section __ex_table,ALLOC; \
.align 4; \
.word start, 0, end, handler; \
.text; \
......@@ -247,21 +250,21 @@ C_LABEL(__csum_partial_copy_start):
cc_end_cruft:
be 1f
andcc %o3, 4, %g0
EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf,#)
EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf)
add %o1, 8, %o1
addcc %g2, %g7, %g7
add %o0, 8, %o0
addxcc %g3, %g7, %g7
EX2(st %g2, [%o1 - 0x08],#)
EX2(st %g2, [%o1 - 0x08])
addx %g0, %g7, %g7
andcc %o3, 4, %g0
EX2(st %g3, [%o1 - 0x04],#)
EX2(st %g3, [%o1 - 0x04])
1: be 1f
andcc %o3, 3, %o3
EX(ld [%o0 + 0x00], %g2, add %o3, 4,#)
EX(ld [%o0 + 0x00], %g2, add %o3, 4)
add %o1, 4, %o1
addcc %g2, %g7, %g7
EX2(st %g2, [%o1 - 0x04],#)
EX2(st %g2, [%o1 - 0x04])
addx %g0, %g7, %g7
andcc %o3, 3, %g0
add %o0, 4, %o0
......@@ -271,14 +274,14 @@ cc_end_cruft:
subcc %o3, 2, %o3
b 4f
or %g0, %g0, %o4
2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2,#)
2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2)
add %o0, 2, %o0
EX2(sth %o4, [%o1 + 0x00],#)
EX2(sth %o4, [%o1 + 0x00])
be 6f
add %o1, 2, %o1
sll %o4, 16, %o4
4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1,#)
EX2(stb %o5, [%o1 + 0x00],#)
4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1)
EX2(stb %o5, [%o1 + 0x00])
sll %o5, 8, %o5
or %o5, %o4, %o4
6: addcc %o4, %g7, %g7
......@@ -295,9 +298,9 @@ cc_dword_align:
andcc %o0, 0x2, %g0
be 1f
andcc %o0, 0x4, %g0
EX(lduh [%o0 + 0x00], %g4, add %g1, 0,#)
EX(lduh [%o0 + 0x00], %g4, add %g1, 0)
sub %g1, 2, %g1
EX2(sth %g4, [%o1 + 0x00],#)
EX2(sth %g4, [%o1 + 0x00])
add %o0, 2, %o0
sll %g4, 16, %g4
addcc %g4, %g7, %g7
......@@ -311,9 +314,9 @@ cc_dword_align:
or %g3, %g7, %g7
1: be 3f
andcc %g1, 0xffffff80, %g0
EX(ld [%o0 + 0x00], %g4, add %g1, 0,#)
EX(ld [%o0 + 0x00], %g4, add %g1, 0)
sub %g1, 4, %g1
EX2(st %g4, [%o1 + 0x00],#)
EX2(st %g4, [%o1 + 0x00])
add %o0, 4, %o0
addcc %g4, %g7, %g7
add %o1, 4, %o1
......@@ -342,7 +345,7 @@ C_LABEL(__csum_partial_copy_sparc_generic):
CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
10: EXT(5b, 10b, 20f,#) ! note for exception handling
10: EXT(5b, 10b, 20f) ! note for exception handling
sub %g1, 128, %g1 ! detract from length
addx %g0, %g7, %g7 ! add in last carry bit
andcc %g1, 0xffffff80, %g0 ! more to csum?
......@@ -367,7 +370,7 @@ cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
12: EXT(cctbl, 12b, 22f,#) ! note for exception table handling
12: EXT(cctbl, 12b, 22f) ! note for exception table handling
addx %g0, %g7, %g7
andcc %o3, 0xf, %g0 ! check for low bits set
ccte: bne cc_end_cruft ! something left, handle it out of band
......@@ -378,7 +381,7 @@ ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o
CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
11: EXT(ccdbl, 11b, 21f,#) ! note for exception table handling
11: EXT(ccdbl, 11b, 21f) ! note for exception table handling
sub %g1, 128, %g1 ! detract from length
addx %g0, %g7, %g7 ! add in last carry bit
andcc %g1, 0xffffff80, %g0 ! more to csum?
......@@ -395,9 +398,9 @@ ccslow: cmp %g1, 0
be,a 1f
srl %g1, 1, %g4
sub %g1, 1, %g1
EX(ldub [%o0], %g5, add %g1, 1,#)
EX(ldub [%o0], %g5, add %g1, 1)
add %o0, 1, %o0
EX2(stb %g5, [%o1],#)
EX2(stb %g5, [%o1])
srl %g1, 1, %g4
add %o1, 1, %o1
1: cmp %g4, 0
......@@ -406,34 +409,34 @@ ccslow: cmp %g1, 0
andcc %o0, 2, %g0
be,a 1f
srl %g4, 1, %g4
EX(lduh [%o0], %o4, add %g1, 0,#)
EX(lduh [%o0], %o4, add %g1, 0)
sub %g1, 2, %g1
srl %o4, 8, %g2
sub %g4, 1, %g4
EX2(stb %g2, [%o1],#)
EX2(stb %g2, [%o1])
add %o4, %g5, %g5
EX2(stb %o4, [%o1 + 1],#)
EX2(stb %o4, [%o1 + 1])
add %o0, 2, %o0
srl %g4, 1, %g4
add %o1, 2, %o1
1: cmp %g4, 0
be,a 2f
andcc %g1, 2, %g0
EX3(ld [%o0], %o4,#)
EX3(ld [%o0], %o4)
5: srl %o4, 24, %g2
srl %o4, 16, %g3
EX2(stb %g2, [%o1],#)
EX2(stb %g2, [%o1])
srl %o4, 8, %g2
EX2(stb %g3, [%o1 + 1],#)
EX2(stb %g3, [%o1 + 1])
add %o0, 4, %o0
EX2(stb %g2, [%o1 + 2],#)
EX2(stb %g2, [%o1 + 2])
addcc %o4, %g5, %g5
EX2(stb %o4, [%o1 + 3],#)
EX2(stb %o4, [%o1 + 3])
addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
subcc %g4, 1, %g4 ! tricks
bne,a 5b
EX3(ld [%o0], %o4,#)
EX3(ld [%o0], %o4)
sll %g5, 16, %g2
srl %g5, 16, %g5
srl %g2, 16, %g2
......@@ -441,19 +444,19 @@ ccslow: cmp %g1, 0
add %g2, %g5, %g5
2: be,a 3f
andcc %g1, 1, %g0
EX(lduh [%o0], %o4, and %g1, 3,#)
EX(lduh [%o0], %o4, and %g1, 3)
andcc %g1, 1, %g0
srl %o4, 8, %g2
add %o0, 2, %o0
EX2(stb %g2, [%o1],#)
EX2(stb %g2, [%o1])
add %g5, %o4, %g5
EX2(stb %o4, [%o1 + 1],#)
EX2(stb %o4, [%o1 + 1])
add %o1, 2, %o1
3: be,a 1f
sll %g5, 16, %o4
EX(ldub [%o0], %g2, add %g0, 1,#)
EX(ldub [%o0], %g2, add %g0, 1)
sll %g2, 8, %o4
EX2(stb %g2, [%o1],#)
EX2(stb %g2, [%o1])
add %g5, %o4, %g5
sll %g5, 16, %o4
1: addcc %o4, %g5, %g5
......
......@@ -16,41 +16,44 @@
#include <asm/asmmacro.h>
#include <asm/page.h>
#define EX(x,y,a,b,z) \
/* Work around cpp -rob */
#define ALLOC #alloc
#define EXECINSTR #execinstr
#define EX(x,y,a,b) \
98: x,y; \
.section .fixup,z##alloc,z##execinstr; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
99: ba fixupretl; \
a, b, %g3; \
.section __ex_table,z##alloc; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4
#define EX2(x,y,c,d,e,a,b,z) \
#define EX2(x,y,c,d,e,a,b) \
98: x,y; \
.section .fixup,z##alloc,z##execinstr; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
99: c, d, e; \
ba fixupretl; \
a, b, %g3; \
.section __ex_table,z##alloc; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4
#define EXO2(x,y,z) \
98: x,##y; \
.section __ex_table,z##alloc; \
#define EXO2(x,y) \
98: x, y; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 97f; \
.text; \
.align 4
#define EXT(start,end,handler,z) \
.section __ex_table,z##alloc; \
#define EXT(start,end,handler) \
.section __ex_table,ALLOC; \
.align 4; \
.word start, 0, end, handler; \
.text; \
......@@ -121,23 +124,23 @@ dword_align:
be 4f
andcc %o1, 2, %g0
EXO2(ldub [%o1], %g2,#)
EXO2(ldub [%o1], %g2)
add %o1, 1, %o1
EXO2(stb %g2, [%o0],#)
EXO2(stb %g2, [%o0])
sub %o2, 1, %o2
bne 3f
add %o0, 1, %o0
EXO2(lduh [%o1], %g2,#)
EXO2(lduh [%o1], %g2)
add %o1, 2, %o1
EXO2(sth %g2, [%o0],#)
EXO2(sth %g2, [%o0])
sub %o2, 2, %o2
b 3f
add %o0, 2, %o0
4:
EXO2(lduh [%o1], %g2,#)
EXO2(lduh [%o1], %g2)
add %o1, 2, %o1
EXO2(sth %g2, [%o0],#)
EXO2(sth %g2, [%o0])
sub %o2, 2, %o2
b 3f
add %o0, 2, %o0
......@@ -160,9 +163,9 @@ C_LABEL(__copy_user): /* %o0=dst %o1=src %o2=len */
be 2f
mov %o2, %g1
EXO2(ld [%o1], %o4,#)
EXO2(ld [%o1], %o4)
sub %g1, 4, %g1
EXO2(st %o4, [%o0],#)
EXO2(st %o4, [%o0])
add %o1, 4, %o1
add %o0, 4, %o0
2:
......@@ -177,7 +180,7 @@ C_LABEL(__copy_user): /* %o0=dst %o1=src %o2=len */
MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
80:
EXT(5b, 80b, 50f,#)
EXT(5b, 80b, 50f)
subcc %g7, 128, %g7
add %o1, 128, %o1
bne 5b
......@@ -204,37 +207,37 @@ copy_user_table:
MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
copy_user_table_end:
EXT(copy_user_table, copy_user_table_end, 51f,#)
EXT(copy_user_table, copy_user_table_end, 51f)
be copy_user_last7
andcc %g1, 4, %g0
EX(ldd [%o1], %g2, and %g1, 0xf,#)
EX(ldd [%o1], %g2, and %g1, 0xf)
add %o0, 8, %o0
add %o1, 8, %o1
EX(st %g2, [%o0 - 0x08], and %g1, 0xf,#)
EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4,#)
EX(st %g2, [%o0 - 0x08], and %g1, 0xf)
EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
copy_user_last7:
be 1f
andcc %g1, 2, %g0
EX(ld [%o1], %g2, and %g1, 7,#)
EX(ld [%o1], %g2, and %g1, 7)
add %o1, 4, %o1
EX(st %g2, [%o0], and %g1, 7,#)
EX(st %g2, [%o0], and %g1, 7)
add %o0, 4, %o0
1:
be 1f
andcc %g1, 1, %g0
EX(lduh [%o1], %g2, and %g1, 3,#)
EX(lduh [%o1], %g2, and %g1, 3)
add %o1, 2, %o1
EX(sth %g2, [%o0], and %g1, 3,#)
EX(sth %g2, [%o0], and %g1, 3)
add %o0, 2, %o0
1:
be 1f
nop
EX(ldub [%o1], %g2, add %g0, 1,#)
EX(stb %g2, [%o0], add %g0, 1,#)
EX(ldub [%o1], %g2, add %g0, 1)
EX(stb %g2, [%o0], add %g0, 1)
1:
retl
clr %o0
......@@ -245,7 +248,7 @@ ldd_std:
MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
81:
EXT(ldd_std, 81b, 52f,#)
EXT(ldd_std, 81b, 52f)
subcc %g7, 128, %g7
add %o1, 128, %o1
bne ldd_std
......@@ -274,9 +277,9 @@ cannot_optimize:
be 10f
nop
EXO2(ldub [%o1], %g2,#)
EXO2(ldub [%o1], %g2)
add %o1, 1, %o1
EXO2(stb %g2, [%o0],#)
EXO2(stb %g2, [%o0])
sub %o2, 1, %o2
andcc %o2, 0xfffffff0, %o3
be short_end
......@@ -285,7 +288,7 @@ cannot_optimize:
MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
82:
EXT(10b, 82b, 53f,#)
EXT(10b, 82b, 53f)
subcc %o3, 0x10, %o3
add %o1, 0x10, %o1
bne 10b
......@@ -303,7 +306,7 @@ byte_chunk:
MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
83:
EXT(byte_chunk, 83b, 54f,#)
EXT(byte_chunk, 83b, 54f)
subcc %o3, 0x10, %o3
add %o1, 0x10, %o1
bne byte_chunk
......@@ -328,11 +331,11 @@ short_end:
MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
short_table_end:
EXT(84b, short_table_end, 55f,#)
EXT(84b, short_table_end, 55f)
be 1f
nop
EX(ldub [%o1], %g2, add %g0, 1,#)
EX(stb %g2, [%o0], add %g0, 1,#)
EX(ldub [%o1], %g2, add %g0, 1)
EX(stb %g2, [%o0], add %g0, 1)
1:
retl
clr %o0
......@@ -344,11 +347,11 @@ short_aligned_end:
be 1f
andcc %o2, 4, %g0
EXO2(ld [%o1 + 0x00], %g2,#)
EXO2(ld [%o1 + 0x04], %g3,#)
EXO2(ld [%o1 + 0x00], %g2)
EXO2(ld [%o1 + 0x04], %g3)
add %o1, 8, %o1
EXO2(st %g2, [%o0 + 0x00],#)
EX(st %g3, [%o0 + 0x04], sub %o2, 4,#)
EXO2(st %g2, [%o0 + 0x00])
EX(st %g3, [%o0 + 0x04], sub %o2, 4)
add %o0, 8, %o0
1:
b copy_user_last7
......
......@@ -10,20 +10,23 @@
#include <asm/cprefix.h>
#include <asm/ptrace.h>
#define EX(x,y,a,b,z) \
/* Work around cpp -rob */
#define ALLOC #alloc
#define EXECINSTR #execinstr
#define EX(x,y,a,b) \
98: x,y; \
.section .fixup,z##alloc,z##execinstr; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
99: ba 30f; \
a, b, %o0; \
.section __ex_table,z##alloc; \
.section __ex_table,ALLOC; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4
#define EXT(start,end,handler,z) \
.section __ex_table,z##alloc; \
#define EXT(start,end,handler) \
.section __ex_table,ALLOC; \
.align 4; \
.word start, 0, end, handler; \
.text; \
......@@ -74,13 +77,13 @@ C_LABEL(memset):
3:
cmp %o2, 3
be 2f
EX(stb %g3, [%o0], sub %o1, 0,#)
EX(stb %g3, [%o0], sub %o1, 0)
cmp %o2, 2
be 2f
EX(stb %g3, [%o0 + 0x01], sub %o1, 1,#)
EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
EX(stb %g3, [%o0 + 0x02], sub %o1, 2,#)
EX(stb %g3, [%o0 + 0x02], sub %o1, 2)
2:
sub %o2, 4, %o2
add %o1, %o2, %o1
......@@ -101,7 +104,7 @@ C_LABEL(__bzero):
be 2f
mov %g3, %g2
EX(st %g3, [%o0], sub %o1, 0,#)
EX(st %g3, [%o0], sub %o1, 0)
sub %o1, 4, %o1
add %o0, 4, %o0
2:
......@@ -113,7 +116,7 @@ C_LABEL(__bzero):
subcc %o3, 128, %o3
ZERO_BIG_BLOCK(%o0, 0x40, %g2)
11:
EXT(10b, 11b, 20f,#)
EXT(10b, 11b, 20f)
bne 10b
add %o0, 128, %o0
......@@ -138,17 +141,17 @@ C_LABEL(__bzero):
be 1f
andcc %o1, 2, %g0
EX(st %g3, [%o0], and %o1, 7,#)
EX(st %g3, [%o0], and %o1, 7)
add %o0, 4, %o0
1:
be 1f
andcc %o1, 1, %g0
EX(sth %g3, [%o0], and %o1, 3,#)
EX(sth %g3, [%o0], and %o1, 3)
add %o0, 2, %o0
1:
bne,a 8f
EX(stb %g3, [%o0], and %o1, 1,#)
EX(stb %g3, [%o0], and %o1, 1)
8:
retl
clr %o0
......@@ -161,7 +164,7 @@ C_LABEL(__bzero):
add %o0, 1, %o0
subcc %o1, 1, %o1
bne,a 8b
EX(stb %g3, [%o0 - 1], add %o1, 1,#)
EX(stb %g3, [%o0 - 1], add %o1, 1)
0:
retl
clr %o0
......
......@@ -4,8 +4,8 @@
#include <asm/byteorder.h>
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addcc %r4,%5,%1
addx %r2,%3,%0" \
__asm__ ("addcc %r4,%5,%1\n\t" \
"addx %r2,%3,%0\n" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "%rJ" ((USItype)(ah)), \
......@@ -14,8 +14,8 @@
"rI" ((USItype)(bl)) \
: "cc")
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
__asm__ ("subcc %r4,%5,%1
subx %r2,%3,%0" \
__asm__ ("subcc %r4,%5,%1\n\t" \
"subx %r2,%3,%0\n" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "rJ" ((USItype)(ah)), \
......@@ -25,46 +25,46 @@
: "cc")
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("! Inlined umul_ppmm
wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
sra %3,31,%%g2 ! Don't move this insn
and %2,%%g2,%%g2 ! Don't move this insn
andcc %%g0,0,%%g1 ! Don't move this insn
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,%3,%%g1
mulscc %%g1,0,%%g1
add %%g1,%%g2,%0
rd %%y,%1" \
__asm__ ("! Inlined umul_ppmm\n\t" \
"wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n\t" \
"sra %3,31,%%g2 ! Don't move this insn\n\t" \
"and %2,%%g2,%%g2 ! Don't move this insn\n\t" \
"andcc %%g0,0,%%g1 ! Don't move this insn\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,0,%%g1\n\t" \
"add %%g1,%%g2,%0\n\t" \
"rd %%y,%1\n" \
: "=r" ((USItype)(w1)), \
"=r" ((USItype)(w0)) \
: "%rI" ((USItype)(u)), \
......@@ -74,30 +74,30 @@
/* It's quite necessary to add this much assembler for the sparc.
The default udiv_qrnnd (in C) is more than 10 times slower! */
#define udiv_qrnnd(q, r, n1, n0, d) \
__asm__ ("! Inlined udiv_qrnnd
mov 32,%%g1
subcc %1,%2,%%g0
1: bcs 5f
addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb
sub %1,%2,%1 ! this kills msb of n
addx %1,%1,%1 ! so this can't give carry
subcc %%g1,1,%%g1
2: bne 1b
subcc %1,%2,%%g0
bcs 3f
addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb
b 3f
sub %1,%2,%1 ! this kills msb of n
4: sub %1,%2,%1
5: addxcc %1,%1,%1
bcc 2b
subcc %%g1,1,%%g1
! Got carry from n. Subtract next step to cancel this carry.
bne 4b
addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb
sub %1,%2,%1
3: xnor %0,0,%0
! End of inline udiv_qrnnd" \
__asm__ ("! Inlined udiv_qrnnd\n\t" \
"mov 32,%%g1\n\t" \
"subcc %1,%2,%%g0\n\t" \
"1: bcs 5f\n\t" \
"addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
"sub %1,%2,%1 ! this kills msb of n\n\t" \
"addx %1,%1,%1 ! so this can't give carry\n\t" \
"subcc %%g1,1,%%g1\n\t" \
"2: bne 1b\n\t" \
"subcc %1,%2,%%g0\n\t" \
"bcs 3f\n\t" \
"addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
"b 3f\n\t" \
"sub %1,%2,%1 ! this kills msb of n\n\t" \
"4: sub %1,%2,%1\n\t" \
"5: addxcc %1,%1,%1\n\t" \
"bcc 2b\n\t" \
"subcc %%g1,1,%%g1\n\t" \
"! Got carry from n. Subtract next step to cancel this carry.\n\t" \
"bne 4b\n\t" \
"addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
"sub %1,%2,%1\n\t" \
"3: xnor %0,0,%0\n\t" \
"! End of inline udiv_qrnnd\n" \
: "=&r" ((USItype)(q)), \
"=&r" ((USItype)(r)) \
: "r" ((USItype)(d)), \
......
......@@ -77,9 +77,9 @@
/* Some assembly to speed things up. */
#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
__asm__ ("addcc %r7,%8,%2
addxcc %r5,%6,%1
addx %r3,%4,%0" \
__asm__ ("addcc %r7,%8,%2\n\t" \
"addxcc %r5,%6,%1\n\t" \
"addx %r3,%4,%0\n" \
: "=r" ((USItype)(r2)), \
"=&r" ((USItype)(r1)), \
"=&r" ((USItype)(r0)) \
......@@ -92,9 +92,9 @@
: "cc")
#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
__asm__ ("subcc %r7,%8,%2
subxcc %r5,%6,%1
subx %r3,%4,%0" \
__asm__ ("subcc %r7,%8,%2\n\t" \
"subxcc %r5,%6,%1\n\t" \
"subx %r3,%4,%0\n" \
: "=r" ((USItype)(r2)), \
"=&r" ((USItype)(r1)), \
"=&r" ((USItype)(r0)) \
......@@ -111,11 +111,11 @@
/* We need to fool gcc, as we need to pass more than 10 \
input/outputs. */ \
register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \
__asm__ __volatile__ ("
addcc %r8,%9,%1
addxcc %r6,%7,%0
addxcc %r4,%5,%%g2
addx %r2,%3,%%g1" \
__asm__ __volatile__ ( \
"addcc %r8,%9,%1\n\t" \
"addxcc %r6,%7,%0\n\t" \
"addxcc %r4,%5,%%g2\n\t" \
"addx %r2,%3,%%g1\n\t" \
: "=&r" ((USItype)(r1)), \
"=&r" ((USItype)(r0)) \
: "%rJ" ((USItype)(x3)), \
......@@ -136,11 +136,11 @@
/* We need to fool gcc, as we need to pass more than 10 \
input/outputs. */ \
register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \
__asm__ __volatile__ ("
subcc %r8,%9,%1
subxcc %r6,%7,%0
subxcc %r4,%5,%%g2
subx %r2,%3,%%g1" \
__asm__ __volatile__ ( \
"subcc %r8,%9,%1\n\t" \
"subxcc %r6,%7,%0\n\t" \
"subxcc %r4,%5,%%g2\n\t" \
"subx %r2,%3,%%g1\n\t" \
: "=&r" ((USItype)(r1)), \
"=&r" ((USItype)(r0)) \
: "%rJ" ((USItype)(x3)), \
......@@ -161,10 +161,10 @@
#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)
#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
__asm__ ("addcc %3,%4,%3
addxcc %2,%%g0,%2
addxcc %1,%%g0,%1
addx %0,%%g0,%0" \
__asm__ ("addcc %3,%4,%3\n\t" \
"addxcc %2,%%g0,%2\n\t" \
"addxcc %1,%%g0,%1\n\t" \
"addx %0,%%g0,%0\n\t" \
: "=&r" ((USItype)(x3)), \
"=&r" ((USItype)(x2)), \
"=&r" ((USItype)(x1)), \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment