Commit 61c3c39f authored by Alexandre Oliva's avatar Alexandre Oliva Committed by Linus Torvalds

[PATCH] Improve code generation for x86 raid XOR functions

This helps avoid doing push/pop pairs on register contents that we just
don't care about. After we've done the xor, the pointers are dead anyway.
parent 05c14f17
...@@ -182,11 +182,15 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -182,11 +182,15 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
kernel_fpu_begin(); kernel_fpu_begin();
/* need to save/restore p4/p5 manually otherwise gcc's 10 argument /* Make sure GCC forgets anything it knows about p4 or p5,
limit gets exceeded (+ counts as two arguments) */ such that it won't pass to the asm volatile below a
register that is shared with any other variable. That's
because we modify p4 and p5 there, but we can't mark them
as read/write, otherwise we'd overflow the 10-asm-operands
limit of GCC < 3.1. */
__asm__ ("" : "+r" (p4), "+r" (p5));
__asm__ __volatile__ ( __asm__ __volatile__ (
" pushl %4\n"
" pushl %5\n"
#undef BLOCK #undef BLOCK
#define BLOCK(i) \ #define BLOCK(i) \
LD(i,0) \ LD(i,0) \
...@@ -229,13 +233,16 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -229,13 +233,16 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
" addl $128, %5 ;\n" " addl $128, %5 ;\n"
" decl %0 ;\n" " decl %0 ;\n"
" jnz 1b ;\n" " jnz 1b ;\n"
" popl %5\n"
" popl %4\n"
: "+r" (lines), : "+r" (lines),
"+r" (p1), "+r" (p2), "+r" (p3) "+r" (p1), "+r" (p2), "+r" (p3)
: "r" (p4), "r" (p5) : "r" (p4), "r" (p5)
: "memory"); : "memory");
/* p4 and p5 were modified, and now the variables are dead.
Clobber them just to be sure nobody does something stupid
like assuming they have some legal value. */
__asm__ ("" : "=r" (p4), "=r" (p5));
kernel_fpu_end(); kernel_fpu_end();
} }
...@@ -425,10 +432,15 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -425,10 +432,15 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
kernel_fpu_begin(); kernel_fpu_begin();
/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ /* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a
register that is shared with any other variable. That's
because we modify p4 and p5 there, but we can't mark them
as read/write, otherwise we'd overflow the 10-asm-operands
limit of GCC < 3.1. */
__asm__ ("" : "+r" (p4), "+r" (p5));
__asm__ __volatile__ ( __asm__ __volatile__ (
" pushl %4\n"
" pushl %5\n"
" .align 32,0x90 ;\n" " .align 32,0x90 ;\n"
" 1: ;\n" " 1: ;\n"
" movq (%1), %%mm0 ;\n" " movq (%1), %%mm0 ;\n"
...@@ -487,13 +499,16 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -487,13 +499,16 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
" addl $64, %5 ;\n" " addl $64, %5 ;\n"
" decl %0 ;\n" " decl %0 ;\n"
" jnz 1b ;\n" " jnz 1b ;\n"
" popl %5\n"
" popl %4\n"
: "+r" (lines), : "+r" (lines),
"+r" (p1), "+r" (p2), "+r" (p3) "+r" (p1), "+r" (p2), "+r" (p3)
: "r" (p4), "r" (p5) : "r" (p4), "r" (p5)
: "memory"); : "memory");
/* p4 and p5 were modified, and now the variables are dead.
Clobber them just to be sure nobody does something stupid
like assuming they have some legal value. */
__asm__ ("" : "=r" (p4), "=r" (p5));
kernel_fpu_end(); kernel_fpu_end();
} }
...@@ -757,10 +772,15 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -757,10 +772,15 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
XMMS_SAVE; XMMS_SAVE;
/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ /* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a
register that is shared with any other variable. That's
because we modify p4 and p5 there, but we can't mark them
as read/write, otherwise we'd overflow the 10-asm-operands
limit of GCC < 3.1. */
__asm__ ("" : "+r" (p4), "+r" (p5));
__asm__ __volatile__ ( __asm__ __volatile__ (
" pushl %4\n"
" pushl %5\n"
#undef BLOCK #undef BLOCK
#define BLOCK(i) \ #define BLOCK(i) \
PF1(i) \ PF1(i) \
...@@ -817,13 +837,16 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, ...@@ -817,13 +837,16 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
" addl $256, %5 ;\n" " addl $256, %5 ;\n"
" decl %0 ;\n" " decl %0 ;\n"
" jnz 1b ;\n" " jnz 1b ;\n"
" popl %5\n"
" popl %4\n"
: "+r" (lines), : "+r" (lines),
"+r" (p1), "+r" (p2), "+r" (p3) "+r" (p1), "+r" (p2), "+r" (p3)
: "r" (p4), "r" (p5) : "r" (p4), "r" (p5)
: "memory"); : "memory");
/* p4 and p5 were modified, and now the variables are dead.
Clobber them just to be sure nobody does something stupid
like assuming they have some legal value. */
__asm__ ("" : "=r" (p4), "=r" (p5));
XMMS_RESTORE; XMMS_RESTORE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment