Commit 3a22d332 authored by Davi Arnaut's avatar Davi Arnaut

Bug#22320: my_atomic-t unit test fails

Bug#52261: 64 bit atomic operations do not work on Solaris i386
           gcc in debug compilation

One of the various problems was that the source operand to
CMPXCHG8b was marked as a input/output operand, causing GCC
to use the EBX register as the destination register for the
CMPXCHG8b instruction. This could lead to crashes as the EBX
register is also implicitly used by the instruction, causing
the value to be potentially garbaged and a protection fault
once the value is used to access a position in memory.

Another problem was the lack of proper clobbers for the atomic
operations and, also, a discrepancy between the implementations
for the Compare and Set operation. The specific problems are
described and fixed by Kristian Nielsen patches:

Patch: 1

Fix bugs in my_atomic_cas*(val,cmp,new) that *cmp is accessed
after CAS succeds.

In the gcc builtin implementation, problem was that *cmp was
read again after atomic CAS to check if old *val == *cmp;
this fails if CAS is successful and another thread modifies
*cmp in-between.

In the x86-gcc implementation, problem was that *cmp was set
also in the case of successful CAS; this means there is a
window where it can clobber a value written by another thread
after successful CAS.

Patch 2:

Add a GCC asm "memory" clobber to primitives that imply a
memory barrier.

This signifies to GCC that any potentially aliased memory
must be flushed before the operation, and re-read after the
operation, so that read or modification in other threads of
such memory values will work as intended.

In effect, it makes these primitives work as memory barriers
for the compiler as well as the CPU. This is better and more
correct than adding "volatile" to variables.
parent 75437057
...@@ -22,8 +22,9 @@ ...@@ -22,8 +22,9 @@
v= __sync_lock_test_and_set(a, v); v= __sync_lock_test_and_set(a, v);
#define make_atomic_cas_body(S) \ #define make_atomic_cas_body(S) \
int ## S sav; \ int ## S sav; \
sav= __sync_val_compare_and_swap(a, *cmp, set); \ int ## S cmp_val= *cmp; \
if (!(ret= (sav == *cmp))) *cmp= sav; sav= __sync_val_compare_and_swap(a, cmp_val, set);\
if (!(ret= (sav == cmp_val))) *cmp= sav
#ifdef MY_ATOMIC_MODE_DUMMY #ifdef MY_ATOMIC_MODE_DUMMY
#define make_atomic_load_body(S) ret= *a #define make_atomic_load_body(S) ret= *a
......
...@@ -29,21 +29,22 @@ ...@@ -29,21 +29,22 @@
We choose implementation as follows: We choose implementation as follows:
------------------------------------ ------------------------------------
On Windows using Visual C++ the native implementation should be On Windows using Visual C++ the native implementation should be
preferrable. When using gcc we prefer the native x86 implementation, preferrable. When using gcc we prefer the Solaris implementation
we prefer the Solaris implementation before the gcc because of before the gcc because of stability preference, we choose gcc
stability preference, we choose gcc implementation if nothing else builtins if available, otherwise we choose the somewhat broken
works on gcc. If neither Visual C++ or gcc we still choose the native x86 implementation. If neither Visual C++ or gcc we still
Solaris implementation on Solaris (mainly for SunStudio compiles. choose the Solaris implementation on Solaris (mainly for SunStudio
compilers).
*/ */
# if defined(_MSV_VER) # if defined(_MSV_VER)
# include "generic-msvc.h" # include "generic-msvc.h"
# elif __GNUC__ # elif __GNUC__
# if defined(__i386__) || defined(__x86_64__) # if defined(HAVE_SOLARIS_ATOMIC)
# include "x86-gcc.h"
# elif defined(HAVE_SOLARIS_ATOMIC)
# include "solaris.h" # include "solaris.h"
# elif defined(HAVE_GCC_ATOMIC_BUILTINS) # elif defined(HAVE_GCC_ATOMIC_BUILTINS)
# include "gcc_builtins.h" # include "gcc_builtins.h"
# elif defined(__i386__) || defined(__x86_64__)
# include "x86-gcc.h"
# endif # endif
# elif defined(HAVE_SOLARIS_ATOMIC) # elif defined(HAVE_SOLARIS_ATOMIC)
# include "solaris.h" # include "solaris.h"
......
...@@ -53,18 +53,29 @@ ...@@ -53,18 +53,29 @@
#endif #endif
#define make_atomic_add_body32 \ #define make_atomic_add_body32 \
asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) asm volatile (LOCK_prefix "; xadd %0, %1;" \
: "+r" (v), "=m" (*a) \
: "m" (*a) \
: "memory")
#define make_atomic_cas_body32 \ #define make_atomic_cas_body32 \
__typeof__(*cmp) sav; \
asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \
: "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set)) : "=m" (*a), "=a" (sav), "=q" (ret) \
: "r" (set), "m" (*a), "a" (*cmp) \
: "memory"); \
if (!ret) \
*cmp= sav
#ifdef __x86_64__ #ifdef __x86_64__
#define make_atomic_add_body64 make_atomic_add_body32 #define make_atomic_add_body64 make_atomic_add_body32
#define make_atomic_cas_body64 make_atomic_cas_body32 #define make_atomic_cas_body64 make_atomic_cas_body32
#define make_atomic_fas_body(S) \ #define make_atomic_fas_body(S) \
asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) asm volatile ("xchg %0, %1;" \
: "+r" (v), "=m" (*a) \
: "m" (*a) \
: "memory")
/* /*
Actually 32-bit reads/writes are always atomic on x86 Actually 32-bit reads/writes are always atomic on x86
...@@ -73,9 +84,14 @@ ...@@ -73,9 +84,14 @@
#define make_atomic_load_body(S) \ #define make_atomic_load_body(S) \
ret=0; \ ret=0; \
asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ asm volatile (LOCK_prefix "; cmpxchg %2, %0" \
: "+m" (*a), "+a" (ret): "r" (ret)) : "=m" (*a), "=a" (ret) \
: "r" (ret), "m" (*a) \
: "memory")
#define make_atomic_store_body(S) \ #define make_atomic_store_body(S) \
asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) asm volatile ("; xchg %0, %1;" \
: "=m" (*a), "+r" (v) \
: "m" (*a) \
: "memory")
#else #else
/* /*
...@@ -104,12 +120,13 @@ ...@@ -104,12 +120,13 @@
platforms the much simpler make_atomic_cas_body32 will work platforms the much simpler make_atomic_cas_body32 will work
fine. fine.
*/ */
#define make_atomic_cas_body64 \ #define make_atomic_cas_body64 \
int32 ebx=(set & 0xFFFFFFFF), ecx=(set >> 32); \ int32 ebx=(set & 0xFFFFFFFF), ecx=(set >> 32); \
asm volatile ("push %%ebx; movl %3, %%ebx;" \ asm volatile ("push %%ebx; movl %3, %%ebx;" \
LOCK_prefix "; cmpxchg8b %0; setz %2; pop %%ebx"\ LOCK_prefix "; cmpxchg8b %0; setz %2; pop %%ebx" \
: "+m" (*a), "+A" (*cmp), "=c" (ret) \ : "=m" (*a), "+A" (*cmp), "=c" (ret) \
:"m" (ebx), "c" (ecx)) : "m" (ebx), "c" (ecx), "m" (*a) \
: "memory", "esp")
#endif #endif
/* /*
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
This header defines five atomic operations: This header defines five atomic operations:
my_atomic_add#(&var, what) my_atomic_add#(&var, what)
'Fetch and Add'
add 'what' to *var, and return the old value of *var add 'what' to *var, and return the old value of *var
my_atomic_fas#(&var, what) my_atomic_fas#(&var, what)
...@@ -27,9 +28,10 @@ ...@@ -27,9 +28,10 @@
store 'what' in *var, and return the old value of *var store 'what' in *var, and return the old value of *var
my_atomic_cas#(&var, &old, new) my_atomic_cas#(&var, &old, new)
'Compare And Swap' An odd variation of 'Compare And Set/Swap'
if *var is equal to *old, then store 'new' in *var, and return TRUE if *var is equal to *old, then store 'new' in *var, and return TRUE
otherwise store *var in *old, and return FALSE otherwise store *var in *old, and return FALSE
Usually, &old should not be accessed if the operation is successful.
my_atomic_load#(&var) my_atomic_load#(&var)
return *var return *var
......
...@@ -15,13 +15,6 @@ ...@@ -15,13 +15,6 @@
#include "thr_template.c" #include "thr_template.c"
/* at least gcc 3.4.5 and 3.4.6 (but not 3.2.3) on RHEL */
#if __GNUC__ == 3 && __GNUC_MINOR__ == 4
#define GCC_BUG_WORKAROUND volatile
#else
#define GCC_BUG_WORKAROUND
#endif
volatile uint32 b32; volatile uint32 b32;
volatile int32 c32; volatile int32 c32;
my_atomic_rwlock_t rwl; my_atomic_rwlock_t rwl;
...@@ -29,8 +22,8 @@ my_atomic_rwlock_t rwl; ...@@ -29,8 +22,8 @@ my_atomic_rwlock_t rwl;
/* add and sub a random number in a loop. Must get 0 at the end */ /* add and sub a random number in a loop. Must get 0 at the end */
pthread_handler_t test_atomic_add(void *arg) pthread_handler_t test_atomic_add(void *arg)
{ {
int m= (*(int *)arg)/2; int m= (*(int *)arg)/2;
GCC_BUG_WORKAROUND int32 x; int32 x;
for (x= ((int)(intptr)(&m)); m ; m--) for (x= ((int)(intptr)(&m)); m ; m--)
{ {
x= (x*m+0x87654321) & INT_MAX32; x= (x*m+0x87654321) & INT_MAX32;
...@@ -52,8 +45,8 @@ volatile int64 a64; ...@@ -52,8 +45,8 @@ volatile int64 a64;
/* add and sub a random number in a loop. Must get 0 at the end */ /* add and sub a random number in a loop. Must get 0 at the end */
pthread_handler_t test_atomic_add64(void *arg) pthread_handler_t test_atomic_add64(void *arg)
{ {
int m= (*(int *)arg)/2; int m= (*(int *)arg)/2;
GCC_BUG_WORKAROUND int64 x; int64 x;
for (x= ((int64)(intptr)(&m)); m ; m--) for (x= ((int64)(intptr)(&m)); m ; m--)
{ {
x= (x*m+0xfdecba987654321LL) & INT_MAX64; x= (x*m+0xfdecba987654321LL) & INT_MAX64;
...@@ -128,8 +121,8 @@ pthread_handler_t test_atomic_fas(void *arg) ...@@ -128,8 +121,8 @@ pthread_handler_t test_atomic_fas(void *arg)
*/ */
pthread_handler_t test_atomic_cas(void *arg) pthread_handler_t test_atomic_cas(void *arg)
{ {
int m= (*(int *)arg)/2, ok= 0; int m= (*(int *)arg)/2, ok= 0;
GCC_BUG_WORKAROUND int32 x, y; int32 x, y;
for (x= ((int)(intptr)(&m)); m ; m--) for (x= ((int)(intptr)(&m)); m ; m--)
{ {
my_atomic_rwlock_wrlock(&rwl); my_atomic_rwlock_wrlock(&rwl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment