Commit 4aea170b authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

A follow-up to the patch for bug #56405 "Deadlock in the MDL deadlock

detector". This patch addresses performance regression in OLTP_RO/MyISAM
test on Windows introduced by the fix for bug #56405. Thus it makes
original patch acceptable as a solution for bug #56585 "Slowdown of
readonly sysbench benchmarks (e.g point_select) on Windows 5.5".

With this patch, MySQL will use native Windows condition variables and 
reader-writer locks  if  they are supported by the OS.

This speeds up MyISAM and the effect comes mostly from using native
rwlocks. Native conditions improve scalability with higher number of 
concurrent users in other situations, e.g for prlocks.

Benchmark numbers for this patch as measured on Win2008R2 quad
core machine are attached to the bug report.
( direct link http://bugs.mysql.com/file.php?id=15883 )

Note that currently we require at least Windows7/WS2008R2 for 
reader-writer locks, even though native rwlock is available also on Vista.
Reason is that "trylock" APIs are missing on Vista, and trylock is used in
the server (in a single place in query cache).

While this patch could have been written differently, to enable the native
rwlock optimization also on Vista/WS2008 (e.g using native locks everywhere
but portable implementation in query cache), this would come at the 
expense of the code clarity, as it would introduce a new  "try-able" rwlock
type, to handle Vista case.

Another way to improve performance for the special case 
(OLTP_RO/MYISAM/Vista) would be to eliminate "trylock" usage from server,
 but this is outside of the scope here.


Native conditions variables are used beginning with Vista though the effect
of using condition variables alone is not measurable in this benchmark.
But when used together with native rwlocks on Win7, native conditions improve 
performance in high-concurrency OLTP_RO/MyISAM (128 and more sysbench 
users).
parent f5ecd2af
......@@ -48,19 +48,30 @@ typedef struct st_pthread_link {
struct st_pthread_link *next;
} pthread_link;
typedef struct {
/**
Implementation of Windows condition variables.
We use native conditions on Vista and later, and fallback to own
implementation on earlier OS version.
*/
typedef union
{
/* Native condition (used on Vista and later) */
CONDITION_VARIABLE native_cond;
/* Own implementation (used on XP) */
struct
{
uint32 waiting;
CRITICAL_SECTION lock_waiting;
enum {
enum
{
SIGNAL= 0,
BROADCAST= 1,
MAX_EVENTS= 2
} EVENTS;
HANDLE events[MAX_EVENTS];
HANDLE broadcast_block_event;
};
} pthread_cond_t;
......@@ -679,6 +690,47 @@ extern int rw_pr_destroy(rw_pr_lock_t *);
#ifdef NEED_MY_RW_LOCK
#ifdef _WIN32
/**
Implementation of Windows rwlock.
We use native (slim) rwlocks on Win7 and later, and fallback to portable
implementation on earlier Windows.
slim rwlock are also available on Vista/WS2008, but we do not use it
("trylock" APIs are missing on Vista)
*/
typedef union
{
/* Native rwlock (is_srwlock == TRUE) */
struct
{
SRWLOCK srwlock; /* native reader writer lock */
BOOL have_exclusive_srwlock; /* used for unlock */
};
/*
Portable implementation (is_srwlock == FALSE)
Fields are identical with Unix my_rw_lock_t fields.
*/
struct
{
pthread_mutex_t lock; /* lock for structure */
pthread_cond_t readers; /* waiting readers */
pthread_cond_t writers; /* waiting writers */
int state; /* -1:writer,0:free,>0:readers */
int waiters; /* number of waiting writers */
#ifdef SAFE_MUTEX
pthread_t write_thread;
#endif
};
} my_rw_lock_t;
#else /* _WIN32 */
/*
On systems which don't support native read/write locks we have
to use own implementation.
......@@ -694,6 +746,8 @@ typedef struct st_my_rw_lock_t {
#endif
} my_rw_lock_t;
#endif /*! _WIN32 */
extern int my_rw_init(my_rw_lock_t *);
extern int my_rw_destroy(my_rw_lock_t *);
extern int my_rw_rdlock(my_rw_lock_t *);
......
......@@ -24,7 +24,108 @@
#include <process.h>
#include <sys/timeb.h>
int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
/*
Windows native condition variables. We use runtime loading / function
pointers, because they are not available on XP
*/
/* Prototypes and function pointers for condition variable functions */
typedef VOID (WINAPI * InitializeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
typedef BOOL (WINAPI * SleepConditionVariableCSProc)
(PCONDITION_VARIABLE ConditionVariable,
PCRITICAL_SECTION CriticalSection,
DWORD dwMilliseconds);
typedef VOID (WINAPI * WakeAllConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
typedef VOID (WINAPI * WakeConditionVariableProc)
(PCONDITION_VARIABLE ConditionVariable);
static InitializeConditionVariableProc my_InitializeConditionVariable;
static SleepConditionVariableCSProc my_SleepConditionVariableCS;
static WakeAllConditionVariableProc my_WakeAllConditionVariable;
static WakeConditionVariableProc my_WakeConditionVariable;
/**
Indicates if we have native condition variables,
initialized first time pthread_cond_init is called.
*/
static BOOL have_native_conditions= FALSE;
/**
Check if native conditions can be used, load function pointers
*/
static void check_native_cond_availability(void)
{
HMODULE module= GetModuleHandle("kernel32");
my_InitializeConditionVariable= (InitializeConditionVariableProc)
GetProcAddress(module, "InitializeConditionVariable");
my_SleepConditionVariableCS= (SleepConditionVariableCSProc)
GetProcAddress(module, "SleepConditionVariableCS");
my_WakeAllConditionVariable= (WakeAllConditionVariableProc)
GetProcAddress(module, "WakeAllConditionVariable");
my_WakeConditionVariable= (WakeConditionVariableProc)
GetProcAddress(module, "WakeConditionVariable");
if (my_InitializeConditionVariable)
have_native_conditions= TRUE;
}
/**
Convert abstime to milliseconds
*/
static DWORD get_milliseconds(const struct timespec *abstime)
{
long long millis;
union ft64 now;
if (abstime == NULL)
return INFINITE;
GetSystemTimeAsFileTime(&now.ft);
/*
Calculate time left to abstime
- subtract start time from current time(values are in 100ns units)
- convert to millisec by dividing with 10000
*/
millis= (abstime->tv.i64 - now.i64) / 10000;
/* Don't allow the timeout to be negative */
if (millis < 0)
return 0;
/*
Make sure the calculated timeout does not exceed original timeout
value which could cause "wait for ever" if system time changes
*/
if (millis > abstime->max_timeout_msec)
millis= abstime->max_timeout_msec;
if (millis > UINT_MAX)
millis= UINT_MAX;
return (DWORD)millis;
}
/*
Old (pre-vista) implementation using events
*/
static int legacy_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
{
cond->waiting= 0;
InitializeCriticalSection(&cond->lock_waiting);
......@@ -53,7 +154,8 @@ int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
return 0;
}
int pthread_cond_destroy(pthread_cond_t *cond)
static int legacy_cond_destroy(pthread_cond_t *cond)
{
DeleteCriticalSection(&cond->lock_waiting);
......@@ -65,48 +167,13 @@ int pthread_cond_destroy(pthread_cond_t *cond)
}
int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
{
return pthread_cond_timedwait(cond,mutex,NULL);
}
int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
static int legacy_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
struct timespec *abstime)
{
int result;
long timeout;
union ft64 now;
if( abstime != NULL )
{
GetSystemTimeAsFileTime(&now.ft);
/*
Calculate time left to abstime
- subtract start time from current time(values are in 100ns units)
- convert to millisec by dividing with 10000
*/
timeout= (long)((abstime->tv.i64 - now.i64) / 10000);
/* Don't allow the timeout to be negative */
if (timeout < 0)
timeout= 0L;
/*
Make sure the calucated timeout does not exceed original timeout
value which could cause "wait for ever" if system time changes
*/
if (timeout > abstime->max_timeout_msec)
timeout= abstime->max_timeout_msec;
}
else
{
/* No time specified; don't expire */
timeout= INFINITE;
}
DWORD timeout;
timeout= get_milliseconds(abstime);
/*
Block access if previous broadcast hasn't finished.
This is just for safety and should normally not
......@@ -142,7 +209,7 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
return result == WAIT_TIMEOUT ? ETIMEDOUT : 0;
}
int pthread_cond_signal(pthread_cond_t *cond)
static int legacy_cond_signal(pthread_cond_t *cond)
{
EnterCriticalSection(&cond->lock_waiting);
......@@ -155,7 +222,7 @@ int pthread_cond_signal(pthread_cond_t *cond)
}
int pthread_cond_broadcast(pthread_cond_t *cond)
static int legacy_cond_broadcast(pthread_cond_t *cond)
{
EnterCriticalSection(&cond->lock_waiting);
/*
......@@ -177,6 +244,87 @@ int pthread_cond_broadcast(pthread_cond_t *cond)
}
/*
Posix API functions. Just choose between native and legacy implementation.
*/
int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
{
/*
Once initialization is used here rather than in my_init(), to
1) avoid my_init() pitfalls- undefined order in which initialization should
run
2) be potentially useful C++ (in static constructors that run before main())
3) just to simplify the API.
Also, the overhead of my_pthread_once is very small.
*/
static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT;
my_pthread_once(&once_control, check_native_cond_availability);
if (have_native_conditions)
{
my_InitializeConditionVariable(&cond->native_cond);
return 0;
}
else
return legacy_cond_init(cond, attr);
}
int pthread_cond_destroy(pthread_cond_t *cond)
{
if (have_native_conditions)
return 0; /* no destroy function */
else
return legacy_cond_destroy(cond);
}
int pthread_cond_broadcast(pthread_cond_t *cond)
{
if (have_native_conditions)
{
my_WakeAllConditionVariable(&cond->native_cond);
return 0;
}
else
return legacy_cond_broadcast(cond);
}
int pthread_cond_signal(pthread_cond_t *cond)
{
if (have_native_conditions)
{
my_WakeConditionVariable(&cond->native_cond);
return 0;
}
else
return legacy_cond_signal(cond);
}
int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
struct timespec *abstime)
{
if (have_native_conditions)
{
DWORD timeout= get_milliseconds(abstime);
if (!my_SleepConditionVariableCS(&cond->native_cond, mutex, timeout))
return ETIMEDOUT;
return 0;
}
else
return legacy_cond_timedwait(cond, mutex, abstime);
}
int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
{
return pthread_cond_timedwait(cond, mutex, NULL);
}
int pthread_attr_init(pthread_attr_t *connect_att)
{
connect_att->dwStackSize = 0;
......
......@@ -155,8 +155,19 @@ int pthread_cancel(pthread_t thread)
int my_pthread_once(my_pthread_once_t *once_control,
void (*init_routine)(void))
{
LONG state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS,
LONG state;
/*
Do "dirty" read to find out if initialization is already done, to
save an interlocked operation in common case. Memory barriers are ensured by
Visual C++ volatile implementation.
*/
if (*once_control == MY_PTHREAD_ONCE_DONE)
return 0;
state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS,
MY_PTHREAD_ONCE_INIT);
switch(state)
{
case MY_PTHREAD_ONCE_INIT:
......
......@@ -20,6 +20,119 @@
#if defined(NEED_MY_RW_LOCK)
#include <errno.h>
#ifdef _WIN32
static BOOL have_srwlock= FALSE;
/* Prototypes and function pointers for windows functions */
typedef VOID (WINAPI* srw_func) (PSRWLOCK SRWLock);
typedef BOOL (WINAPI* srw_bool_func) (PSRWLOCK SRWLock);
static srw_func my_InitializeSRWLock;
static srw_func my_AcquireSRWLockExclusive;
static srw_func my_ReleaseSRWLockExclusive;
static srw_func my_AcquireSRWLockShared;
static srw_func my_ReleaseSRWLockShared;
static srw_bool_func my_TryAcquireSRWLockExclusive;
static srw_bool_func my_TryAcquireSRWLockShared;
/**
Check for presence of Windows slim reader writer lock function.
Load function pointers.
*/
static void check_srwlock_availability(void)
{
HMODULE module= GetModuleHandle("kernel32");
my_InitializeSRWLock= (srw_func) GetProcAddress(module,
"InitializeSRWLock");
my_AcquireSRWLockExclusive= (srw_func) GetProcAddress(module,
"AcquireSRWLockExclusive");
my_AcquireSRWLockShared= (srw_func) GetProcAddress(module,
"AcquireSRWLockShared");
my_ReleaseSRWLockExclusive= (srw_func) GetProcAddress(module,
"ReleaseSRWLockExclusive");
my_ReleaseSRWLockShared= (srw_func) GetProcAddress(module,
"ReleaseSRWLockShared");
my_TryAcquireSRWLockExclusive= (srw_bool_func) GetProcAddress(module,
"TryAcquireSRWLockExclusive");
my_TryAcquireSRWLockShared= (srw_bool_func) GetProcAddress(module,
"TryAcquireSRWLockShared");
/*
We currently require TryAcquireSRWLockExclusive. This API is missing on
Vista, this means SRWLock are only used starting with Win7.
If "trylock" usage for rwlocks is eliminated from server codebase (it is used
in a single place currently, in query cache), then SRWLock can be enabled on
Vista too. In this case condition below needs to be changed to e.g check
for my_InitializeSRWLock.
*/
if (my_TryAcquireSRWLockExclusive)
have_srwlock= TRUE;
}
static int srw_init(my_rw_lock_t *rwp)
{
my_InitializeSRWLock(&rwp->srwlock);
rwp->have_exclusive_srwlock = FALSE;
return 0;
}
static int srw_rdlock(my_rw_lock_t *rwp)
{
my_AcquireSRWLockShared(&rwp->srwlock);
return 0;
}
static int srw_tryrdlock(my_rw_lock_t *rwp)
{
if (!my_TryAcquireSRWLockShared(&rwp->srwlock))
return EBUSY;
return 0;
}
static int srw_wrlock(my_rw_lock_t *rwp)
{
my_AcquireSRWLockExclusive(&rwp->srwlock);
rwp->have_exclusive_srwlock= TRUE;
return 0;
}
static int srw_trywrlock(my_rw_lock_t *rwp)
{
if (!my_TryAcquireSRWLockExclusive(&rwp->srwlock))
return EBUSY;
rwp->have_exclusive_srwlock= TRUE;
return 0;
}
static int srw_unlock(my_rw_lock_t *rwp)
{
if (rwp->have_exclusive_srwlock)
{
rwp->have_exclusive_srwlock= FALSE;
my_ReleaseSRWLockExclusive(&rwp->srwlock);
}
else
{
my_ReleaseSRWLockShared(&rwp->srwlock);
}
return 0;
}
#endif /*_WIN32 */
/*
Source base from Sun Microsystems SPILT, simplified for MySQL use
-- Joshua Chamas
......@@ -63,6 +176,22 @@ int my_rw_init(my_rw_lock_t *rwp)
{
pthread_condattr_t cond_attr;
#ifdef _WIN32
/*
Once initialization is used here rather than in my_init(), in order to
- avoid my_init() pitfalls- (undefined order in which initialization should
run)
- be potentially useful C++ (static constructors)
- just to simplify the API.
Also, the overhead is of my_pthread_once is very small.
*/
static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT;
my_pthread_once(&once_control, check_srwlock_availability);
if (have_srwlock)
return srw_init(rwp);
#endif
pthread_mutex_init( &rwp->lock, MY_MUTEX_INIT_FAST);
pthread_condattr_init( &cond_attr );
pthread_cond_init( &rwp->readers, &cond_attr );
......@@ -81,6 +210,10 @@ int my_rw_init(my_rw_lock_t *rwp)
int my_rw_destroy(my_rw_lock_t *rwp)
{
#ifdef _WIN32
if (have_srwlock)
return 0; /* no destroy function */
#endif
DBUG_ASSERT(rwp->state == 0);
pthread_mutex_destroy( &rwp->lock );
pthread_cond_destroy( &rwp->readers );
......@@ -91,6 +224,11 @@ int my_rw_destroy(my_rw_lock_t *rwp)
int my_rw_rdlock(my_rw_lock_t *rwp)
{
#ifdef _WIN32
if (have_srwlock)
return srw_rdlock(rwp);
#endif
pthread_mutex_lock(&rwp->lock);
/* active or queued writers */
......@@ -105,6 +243,12 @@ int my_rw_rdlock(my_rw_lock_t *rwp)
int my_rw_tryrdlock(my_rw_lock_t *rwp)
{
int res;
#ifdef _WIN32
if (have_srwlock)
return srw_tryrdlock(rwp);
#endif
pthread_mutex_lock(&rwp->lock);
if ((rwp->state < 0 ) || rwp->waiters)
res= EBUSY; /* Can't get lock */
......@@ -120,6 +264,11 @@ int my_rw_tryrdlock(my_rw_lock_t *rwp)
int my_rw_wrlock(my_rw_lock_t *rwp)
{
#ifdef _WIN32
if (have_srwlock)
return srw_wrlock(rwp);
#endif
pthread_mutex_lock(&rwp->lock);
rwp->waiters++; /* another writer queued */
......@@ -140,6 +289,12 @@ int my_rw_wrlock(my_rw_lock_t *rwp)
int my_rw_trywrlock(my_rw_lock_t *rwp)
{
int res;
#ifdef _WIN32
if (have_srwlock)
return srw_trywrlock(rwp);
#endif
pthread_mutex_lock(&rwp->lock);
if (rwp->state)
res= EBUSY; /* Can't get lock */
......@@ -158,6 +313,11 @@ int my_rw_trywrlock(my_rw_lock_t *rwp)
int my_rw_unlock(my_rw_lock_t *rwp)
{
#ifdef _WIN32
if (have_srwlock)
return srw_unlock(rwp);
#endif
DBUG_PRINT("rw_unlock",
("state: %d waiters: %d", rwp->state, rwp->waiters));
pthread_mutex_lock(&rwp->lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment