Commit ed99e2bc authored by Atsushi Nemoto's avatar Atsushi Nemoto Committed by Ralf Baechle

[MIPS] Optimize csum_partial for 64bit kernel

Make csum_partial 64-bit powered.
Signed-off-by: default avatarAtsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 773ff788
...@@ -29,30 +29,49 @@ ...@@ -29,30 +29,49 @@
#define t5 $13 #define t5 $13
#define t6 $14 #define t6 $14
#define t7 $15 #define t7 $15
#define USE_DOUBLE
#endif #endif
#ifdef USE_DOUBLE
#define LOAD ld
#define ADD daddu
#define NBYTES 8
#else
#define LOAD lw
#define ADD addu
#define NBYTES 4
#endif /* USE_DOUBLE */
#define UNIT(unit) ((unit)*NBYTES)
#define ADDC(sum,reg) \ #define ADDC(sum,reg) \
addu sum, reg; \ ADD sum, reg; \
sltu v1, sum, reg; \ sltu v1, sum, reg; \
addu sum, v1 ADD sum, v1
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
lw _t0, (offset + 0x00)(src); \ LOAD _t0, (offset + UNIT(0))(src); \
lw _t1, (offset + 0x04)(src); \ LOAD _t1, (offset + UNIT(1))(src); \
lw _t2, (offset + 0x08)(src); \ LOAD _t2, (offset + UNIT(2))(src); \
lw _t3, (offset + 0x0c)(src); \ LOAD _t3, (offset + UNIT(3))(src); \
ADDC(sum, _t0); \
ADDC(sum, _t1); \
ADDC(sum, _t2); \
ADDC(sum, _t3); \
lw _t0, (offset + 0x10)(src); \
lw _t1, (offset + 0x14)(src); \
lw _t2, (offset + 0x18)(src); \
lw _t3, (offset + 0x1c)(src); \
ADDC(sum, _t0); \ ADDC(sum, _t0); \
ADDC(sum, _t1); \ ADDC(sum, _t1); \
ADDC(sum, _t2); \ ADDC(sum, _t2); \
ADDC(sum, _t3); \ ADDC(sum, _t3)
#ifdef USE_DOUBLE
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
#else
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
#endif
/* /*
* a0: source address * a0: source address
...@@ -117,11 +136,17 @@ qword_align: ...@@ -117,11 +136,17 @@ qword_align:
beqz t8, oword_align beqz t8, oword_align
andi t8, src, 0x10 andi t8, src, 0x10
#ifdef USE_DOUBLE
ld t0, 0x00(src)
LONG_SUBU a1, a1, 0x8
ADDC(sum, t0)
#else
lw t0, 0x00(src) lw t0, 0x00(src)
lw t1, 0x04(src) lw t1, 0x04(src)
LONG_SUBU a1, a1, 0x8 LONG_SUBU a1, a1, 0x8
ADDC(sum, t0) ADDC(sum, t0)
ADDC(sum, t1) ADDC(sum, t1)
#endif
PTR_ADDU src, src, 0x8 PTR_ADDU src, src, 0x8
andi t8, src, 0x10 andi t8, src, 0x10
...@@ -129,14 +154,14 @@ oword_align: ...@@ -129,14 +154,14 @@ oword_align:
beqz t8, begin_movement beqz t8, begin_movement
LONG_SRL t8, a1, 0x7 LONG_SRL t8, a1, 0x7
lw t3, 0x08(src) #ifdef USE_DOUBLE
lw t4, 0x0c(src) ld t0, 0x00(src)
lw t0, 0x00(src) ld t1, 0x08(src)
lw t1, 0x04(src)
ADDC(sum, t3)
ADDC(sum, t4)
ADDC(sum, t0) ADDC(sum, t0)
ADDC(sum, t1) ADDC(sum, t1)
#else
CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
#endif
LONG_SUBU a1, a1, 0x10 LONG_SUBU a1, a1, 0x10
PTR_ADDU src, src, 0x10 PTR_ADDU src, src, 0x10
LONG_SRL t8, a1, 0x7 LONG_SRL t8, a1, 0x7
...@@ -219,6 +244,13 @@ small_csumcpy: ...@@ -219,6 +244,13 @@ small_csumcpy:
1: ADDC(sum, t1) 1: ADDC(sum, t1)
/* fold checksum */ /* fold checksum */
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
sltu v1, sum, v1
dsra32 sum, sum, 0
addu sum, v1
#endif
sll v1, sum, 16 sll v1, sum, 16
addu sum, v1 addu sum, v1
sltu v1, sum, v1 sltu v1, sum, v1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment