diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index 26463d8a467ae8cc0923a37b0d1719e15fb91d6d..0b6cd475557d58a8442660b4de969a9c1edd01ff 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S @@ -7,8 +7,22 @@ #include <asm/asi.h> .text - .align 64 + /* We use these stubs for the uncommon case + * of contention on the atomic value. This is + * so that we can keep the main fast path 8 + * instructions long and thus fit into a single + * L2 cache line. + */ +__atomic_add_membar: + ba,pt %xcc, __atomic_add + membar #StoreLoad | #StoreStore + +__atomic_sub_membar: + ba,pt %xcc, __atomic_sub + membar #StoreLoad | #StoreStore + + .align 64 .globl __atomic_add .type __atomic_add,#function __atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ @@ -16,10 +30,10 @@ __atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ add %g5, %o0, %g7 cas [%o1], %g5, %g7 cmp %g5, %g7 - bne,pn %icc, __atomic_add - membar #StoreLoad | #StoreStore + bne,pn %icc, __atomic_add_membar + add %g7, %o0, %g7 retl - add %g7, %o0, %o0 + sra %g7, 0, %o0 .size __atomic_add, .-__atomic_add .globl __atomic_sub @@ -29,10 +43,10 @@ __atomic_sub: /* %o0 = increment, %o1 = atomic_ptr */ sub %g5, %o0, %g7 cas [%o1], %g5, %g7 cmp %g5, %g7 - bne,pn %icc, __atomic_sub - membar #StoreLoad | #StoreStore + bne,pn %icc, __atomic_sub_membar + sub %g7, %o0, %g7 retl - sub %g7, %o0, %o0 + sra %g7, 0, %o0 .size __atomic_sub, .-__atomic_sub .globl __atomic64_add