Commit 57ed1dbf authored by Asit K. Mallick's avatar Asit K. Mallick Committed by David Mosberger

[PATCH] ia64: replace RAID xor routine into an assembly file

parent ade39b5b
......@@ -124,6 +124,16 @@ EXPORT_SYMBOL_NOVERS(__udivdi3);
EXPORT_SYMBOL_NOVERS(__moddi3);
EXPORT_SYMBOL_NOVERS(__umoddi3);
extern void xor_ia64_2(void);
extern void xor_ia64_3(void);
extern void xor_ia64_4(void);
extern void xor_ia64_5(void);
EXPORT_SYMBOL_NOVERS(xor_ia64_2);
EXPORT_SYMBOL_NOVERS(xor_ia64_3);
EXPORT_SYMBOL_NOVERS(xor_ia64_4);
EXPORT_SYMBOL_NOVERS(xor_ia64_5);
extern unsigned long ia64_iobase;
EXPORT_SYMBOL(ia64_iobase);
......
......@@ -9,7 +9,7 @@ obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
checksum.o clear_page.o csum_partial_copy.o copy_page.o \
clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \
flush.o io.o ip_fast_csum.o do_csum.o \
memset.o strlen.o swiotlb.o
memset.o strlen.o swiotlb.o xor.o
obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
......
/*
* arch/ia64/lib/xor.S
*
* Optimized RAID-5 checksumming functions for IA-64.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* You should have received a copy of the GNU General Public License
* (for example /usr/src/linux/COPYING); if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <asm/asmmacro.h>
GLOBAL_ENTRY(xor_ia64_2)
.prologue
.fframe 0
.save ar.pfs, r31
alloc r31 = ar.pfs, 3, 0, 13, 16
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
.body
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
mov ar.lc = in0
mov pr.rot = 1 << 16
;;
.rotr s1[6+1], s2[6+1], d[2]
.rotp p[6+2]
0:
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
(p[6+1])st8.nta [r8] = d[1], 8
nop.f 0
br.ctop.dptk.few 0b
;;
mov ar.lc = r30
mov pr = r29, -1
br.ret.sptk.few rp
END(xor_ia64_2)
GLOBAL_ENTRY(xor_ia64_3)
.prologue
.fframe 0
.save ar.pfs, r31
alloc r31 = ar.pfs, 4, 0, 20, 24
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
.body
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
mov r18 = in3
mov ar.lc = in0
mov pr.rot = 1 << 16
;;
.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
.rotp p[6+2]
0:
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
;;
(p[0]) ld8.nta s3[0] = [r18], 8
(p[6+1])st8.nta [r8] = d[1], 8
(p[6]) xor d[0] = d[0], s3[6]
br.ctop.dptk.few 0b
;;
mov ar.lc = r30
mov pr = r29, -1
br.ret.sptk.few rp
END(xor_ia64_3)
GLOBAL_ENTRY(xor_ia64_4)
.prologue
.fframe 0
.save ar.pfs, r31
alloc r31 = ar.pfs, 5, 0, 27, 32
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
.body
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
mov r18 = in3
mov ar.lc = in0
mov pr.rot = 1 << 16
mov r19 = in4
;;
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
.rotp p[6+2]
0:
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
(p[0]) ld8.nta s3[0] = [r18], 8
(p[0]) ld8.nta s4[0] = [r19], 8
(p[6]) xor r20 = s3[6], s4[6]
;;
(p[6+1])st8.nta [r8] = d[1], 8
(p[6]) xor d[0] = d[0], r20
br.ctop.dptk.few 0b
;;
mov ar.lc = r30
mov pr = r29, -1
br.ret.sptk.few rp
END(xor_ia64_4)
GLOBAL_ENTRY(xor_ia64_5)
.prologue
.fframe 0
.save ar.pfs, r31
alloc r31 = ar.pfs, 6, 0, 34, 40
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
.body
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
mov r18 = in3
mov ar.lc = in0
mov pr.rot = 1 << 16
mov r19 = in4
mov r20 = in5
;;
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
.rotp p[6+2]
0:
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
(p[0]) ld8.nta s3[0] = [r18], 8
(p[0]) ld8.nta s4[0] = [r19], 8
(p[6]) xor r21 = s3[6], s4[6]
;;
(p[0]) ld8.nta s5[0] = [r20], 8
(p[6+1])st8.nta [r8] = d[1], 8
(p[6]) xor d[0] = d[0], r21
;;
(p[6]) xor d[0] = d[0], s5[6]
nop.f 0
br.ctop.dptk.few 0b
;;
mov ar.lc = r30
mov pr = r29, -1
br.ret.sptk.few rp
END(xor_ia64_5)
......@@ -22,256 +22,6 @@ extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
unsigned long *, unsigned long *, unsigned long *);
asm ("
.text
// Assume L2 memory latency of 6 cycles.
.proc xor_ia64_2
xor_ia64_2:
.prologue
.fframe 0
{ .mii
.save ar.pfs, r31
alloc r31 = ar.pfs, 3, 0, 13, 16
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
}
.body
{ .mii
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
}
{ .mmi
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
}
{ .mii
mov ar.lc = in0
mov pr.rot = 1 << 16
;;
}
.rotr s1[6+1], s2[6+1], d[2]
.rotp p[6+2]
0: { .mmi
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
}
{ .mfb
(p[6+1]) st8.nta [r8] = d[1], 8
nop.f 0
br.ctop.dptk.few 0b
;;
}
{ .mii
mov ar.lc = r30
mov pr = r29, -1
}
{ .bbb
br.ret.sptk.few rp
}
.endp xor_ia64_2
.proc xor_ia64_3
xor_ia64_3:
.prologue
.fframe 0
{ .mii
.save ar.pfs, r31
alloc r31 = ar.pfs, 4, 0, 20, 24
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
}
.body
{ .mii
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
}
{ .mmi
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
}
{ .mii
mov r18 = in3
mov ar.lc = in0
mov pr.rot = 1 << 16
;;
}
.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
.rotp p[6+2]
0: { .mmi
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
;;
}
{ .mmi
(p[0]) ld8.nta s3[0] = [r18], 8
(p[6+1]) st8.nta [r8] = d[1], 8
(p[6]) xor d[0] = d[0], s3[6]
}
{ .bbb
br.ctop.dptk.few 0b
;;
}
{ .mii
mov ar.lc = r30
mov pr = r29, -1
}
{ .bbb
br.ret.sptk.few rp
}
.endp xor_ia64_3
.proc xor_ia64_4
xor_ia64_4:
.prologue
.fframe 0
{ .mii
.save ar.pfs, r31
alloc r31 = ar.pfs, 5, 0, 27, 32
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
}
.body
{ .mii
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
}
{ .mmi
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
}
{ .mii
mov r18 = in3
mov ar.lc = in0
mov pr.rot = 1 << 16
}
{ .mfb
mov r19 = in4
;;
}
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
.rotp p[6+2]
0: { .mmi
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
}
{ .mmi
(p[0]) ld8.nta s3[0] = [r18], 8
(p[0]) ld8.nta s4[0] = [r19], 8
(p[6]) xor r20 = s3[6], s4[6]
;;
}
{ .mib
(p[6+1]) st8.nta [r8] = d[1], 8
(p[6]) xor d[0] = d[0], r20
br.ctop.dptk.few 0b
;;
}
{ .mii
mov ar.lc = r30
mov pr = r29, -1
}
{ .bbb
br.ret.sptk.few rp
}
.endp xor_ia64_4
.proc xor_ia64_5
xor_ia64_5:
.prologue
.fframe 0
{ .mii
.save ar.pfs, r31
alloc r31 = ar.pfs, 6, 0, 34, 40
.save ar.lc, r30
mov r30 = ar.lc
.save pr, r29
mov r29 = pr
;;
}
.body
{ .mii
mov r8 = in1
mov ar.ec = 6 + 2
shr in0 = in0, 3
;;
}
{ .mmi
adds in0 = -1, in0
mov r16 = in1
mov r17 = in2
;;
}
{ .mii
mov r18 = in3
mov ar.lc = in0
mov pr.rot = 1 << 16
}
{ .mib
mov r19 = in4
mov r20 = in5
;;
}
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
.rotp p[6+2]
0: { .mmi
(p[0]) ld8.nta s1[0] = [r16], 8
(p[0]) ld8.nta s2[0] = [r17], 8
(p[6]) xor d[0] = s1[6], s2[6]
}
{ .mmi
(p[0]) ld8.nta s3[0] = [r18], 8
(p[0]) ld8.nta s4[0] = [r19], 8
(p[6]) xor r21 = s3[6], s4[6]
;;
}
{ .mmi
(p[0]) ld8.nta s5[0] = [r20], 8
(p[6+1]) st8.nta [r8] = d[1], 8
(p[6]) xor d[0] = d[0], r21
;;
}
{ .mfb
(p[6]) xor d[0] = d[0], s5[6]
nop.f 0
br.ctop.dptk.few 0b
;;
}
{ .mii
mov ar.lc = r30
mov pr = r29, -1
}
{ .bbb
br.ret.sptk.few rp
}
.endp xor_ia64_5
");
static struct xor_block_template xor_block_ia64 = {
name: "ia64",
do_2: xor_ia64_2,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment