Commit cb8095bb authored by Jan Beulich's avatar Jan Beulich Committed by H. Peter Anvin

x86: atomic64 assembly improvements

In the "xchg" implementation, %ebx and %ecx don't need to be copied
into %eax and %edx respectively (this is only necessary when desiring
to only read the stored value).

In the "add_unless" implementation, swapping the use of %ecx and %esi
for passing arguments allows %esi to become an input only (i.e.
permitting the register to be re-used to address the same object
without reload).

In "{add,sub}_return", doing the initial read64 through the passed in
%ecx decreases a register dependency.

In "inc_not_zero", a branch can be eliminated by or-ing together the
two halves of the current (64-bit) value, and code size can be further
reduced by adjusting the arithmetic slightly.

v2: Undo the folding of "xchg" and "set".
Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com
Cc: Luca Barbieri <luca@luca-barbieri.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 819165fb
...@@ -288,9 +288,8 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) ...@@ -288,9 +288,8 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
unsigned low = (unsigned)u; unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32); unsigned high = (unsigned)(u >> 32);
alternative_atomic64(add_unless, alternative_atomic64(add_unless,
ASM_OUTPUT2("+A" (a), "+c" (v), ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
"+S" (low), "+D" (high)), "S" (v) : "memory");
ASM_NO_INPUT_CLOBBER("memory"));
return (int)a; return (int)a;
} }
......
...@@ -137,13 +137,13 @@ BEGIN(dec_return) ...@@ -137,13 +137,13 @@ BEGIN(dec_return)
RET_ENDP RET_ENDP
#undef v #undef v
#define v %ecx #define v %esi
BEGIN(add_unless) BEGIN(add_unless)
addl %eax, %esi addl %eax, %ecx
adcl %edx, %edi adcl %edx, %edi
addl (v), %eax addl (v), %eax
adcl 4(v), %edx adcl 4(v), %edx
cmpl %eax, %esi cmpl %eax, %ecx
je 3f je 3f
1: 1:
movl %eax, (v) movl %eax, (v)
......
...@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8) ...@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
ENTRY(atomic64_xchg_cx8) ENTRY(atomic64_xchg_cx8)
CFI_STARTPROC CFI_STARTPROC
movl %ebx, %eax
movl %ecx, %edx
1: 1:
LOCK_PREFIX LOCK_PREFIX
cmpxchg8b (%esi) cmpxchg8b (%esi)
...@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8) ...@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
movl %edx, %edi movl %edx, %edi
movl %ecx, %ebp movl %ecx, %ebp
read64 %ebp read64 %ecx
1: 1:
movl %eax, %ebx movl %eax, %ebx
movl %edx, %ecx movl %edx, %ecx
...@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8) ...@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
SAVE ebx SAVE ebx
/* these just push these two parameters on the stack */ /* these just push these two parameters on the stack */
SAVE edi SAVE edi
SAVE esi SAVE ecx
movl %ecx, %ebp movl %eax, %ebp
movl %eax, %esi
movl %edx, %edi movl %edx, %edi
read64 %ebp read64 %esi
1: 1:
cmpl %eax, 0(%esp) cmpl %eax, 0(%esp)
je 4f je 4f
2: 2:
movl %eax, %ebx movl %eax, %ebx
movl %edx, %ecx movl %edx, %ecx
addl %esi, %ebx addl %ebp, %ebx
adcl %edi, %ecx adcl %edi, %ecx
LOCK_PREFIX LOCK_PREFIX
cmpxchg8b (%ebp) cmpxchg8b (%esi)
jne 1b jne 1b
movl $1, %eax movl $1, %eax
...@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8) ...@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
read64 %esi read64 %esi
1: 1:
testl %eax, %eax movl %eax, %ecx
je 4f orl %edx, %ecx
2: jz 3f
movl %eax, %ebx movl %eax, %ebx
movl %edx, %ecx xorl %ecx, %ecx
addl $1, %ebx addl $1, %ebx
adcl $0, %ecx adcl %edx, %ecx
LOCK_PREFIX LOCK_PREFIX
cmpxchg8b (%esi) cmpxchg8b (%esi)
jne 1b jne 1b
...@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8) ...@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
3: 3:
RESTORE ebx RESTORE ebx
ret ret
4:
testl %edx, %edx
jne 2b
jmp 3b
CFI_ENDPROC CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8) ENDPROC(atomic64_inc_not_zero_cx8)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment