Commit 3f4aa5f7 authored by unknown's avatar unknown

Merge bk-internal.mysql.com:/home/bk/mysql-maria

into  janus.mylan:/usr/home/serg/Abk/mysql-maria

parents 3becab22 915cebdd
...@@ -117,8 +117,9 @@ valgrind_flags="-USAFEMALLOC -UFORCE_INIT_OF_VARS -DHAVE_purify " ...@@ -117,8 +117,9 @@ valgrind_flags="-USAFEMALLOC -UFORCE_INIT_OF_VARS -DHAVE_purify "
valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max" valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max"
# #
# Used in -debug builds # Used in -debug builds
debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS " debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS"
debug_cflags="$debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC -DSAFE_MUTEX" debug_cflags="$debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC -DSAFE_MUTEX"
debug_cflags="$debug_cflags -DMY_LF_EXTRA_DEBUG"
error_inject="--with-error-inject " error_inject="--with-error-inject "
# #
# Base C++ flags for all builds # Base C++ flags for all builds
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#ifdef make_atomic_cas_body #ifdef make_atomic_cas_body
typedef struct { } my_atomic_rwlock_t; typedef struct { } my_atomic_rwlock_t __attribute__ ((unused));
#define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_destroy(name)
#define my_atomic_rwlock_init(name) #define my_atomic_rwlock_init(name)
#define my_atomic_rwlock_rdlock(name) #define my_atomic_rwlock_rdlock(name)
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
func() is a _func() protected by my_atomic_rwlock_wrlock() func() is a _func() protected by my_atomic_rwlock_wrlock()
*/ */
#define lock_wrap(f,t,proto_args, args, lock) \ #define lock_wrap(f, t, proto_args, args, lock) \
t _ ## f proto_args; \ t _ ## f proto_args; \
static inline t f proto_args \ static inline t f proto_args \
{ \ { \
...@@ -35,7 +35,7 @@ static inline t f proto_args \ ...@@ -35,7 +35,7 @@ static inline t f proto_args \
return ret; \ return ret; \
} }
#define lock_wrap_void(f,proto_args, args, lock) \ #define lock_wrap_void(f, proto_args, args, lock) \
void _ ## f proto_args; \ void _ ## f proto_args; \
static inline void f proto_args \ static inline void f proto_args \
{ \ { \
...@@ -44,14 +44,14 @@ static inline void f proto_args \ ...@@ -44,14 +44,14 @@ static inline void f proto_args \
my_atomic_rwlock_wrunlock(lock); \ my_atomic_rwlock_wrunlock(lock); \
} }
#define nolock_wrap(f,t,proto_args, args) \ #define nolock_wrap(f, t, proto_args, args) \
t _ ## f proto_args; \ t _ ## f proto_args; \
static inline t f proto_args \ static inline t f proto_args \
{ \ { \
return _ ## f args; \ return _ ## f args; \
} }
#define nolock_wrap_void(f,proto_args, args) \ #define nolock_wrap_void(f, proto_args, args) \
void _ ## f proto_args; \ void _ ## f proto_args; \
static inline void f proto_args \ static inline void f proto_args \
{ \ { \
...@@ -80,14 +80,14 @@ void lf_dynarray_destroy(LF_DYNARRAY *array); ...@@ -80,14 +80,14 @@ void lf_dynarray_destroy(LF_DYNARRAY *array);
nolock_wrap(lf_dynarray_value, void *, nolock_wrap(lf_dynarray_value, void *,
(LF_DYNARRAY *array, uint idx), (LF_DYNARRAY *array, uint idx),
(array,idx)); (array, idx));
lock_wrap(lf_dynarray_lvalue, void *, lock_wrap(lf_dynarray_lvalue, void *,
(LF_DYNARRAY *array, uint idx), (LF_DYNARRAY *array, uint idx),
(array,idx), (array, idx),
&array->lock); &array->lock);
nolock_wrap(lf_dynarray_iterate, int, nolock_wrap(lf_dynarray_iterate, int,
(LF_DYNARRAY *array, lf_dynarray_func func, void *arg), (LF_DYNARRAY *array, lf_dynarray_func func, void *arg),
(array,func,arg)); (array, func, arg));
/* /*
pin manager for memory allocator, lf_alloc-pin.c pin manager for memory allocator, lf_alloc-pin.c
...@@ -115,9 +115,14 @@ typedef struct { ...@@ -115,9 +115,14 @@ typedef struct {
uint32 volatile link; uint32 volatile link;
/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ /* we want sizeof(LF_PINS) to be 128 to avoid false sharing */
char pad[128-sizeof(uint32)*2 char pad[128-sizeof(uint32)*2
-sizeof(void *)*(LF_PINBOX_PINS+2)]; -sizeof(LF_PINBOX *)
-sizeof(void *)*(LF_PINBOX_PINS+1)];
} LF_PINS; } LF_PINS;
/*
shortcut macros to do an atomic_wrlock on a structure that uses pins
(e.g. lf_hash).
*/
#define lf_rwlock_by_pins(PINS) \ #define lf_rwlock_by_pins(PINS) \
my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock) my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock)
#define lf_rwunlock_by_pins(PINS) \ #define lf_rwunlock_by_pins(PINS) \
...@@ -131,11 +136,11 @@ typedef struct { ...@@ -131,11 +136,11 @@ typedef struct {
#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) #if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG)
#define LF_REQUIRE_PINS(N) \ #define LF_REQUIRE_PINS(N) \
static const char require_pins[LF_PINBOX_PINS-N]; \ static const char require_pins[LF_PINBOX_PINS-N]; \
static const int LF_NUM_PINS_IN_THIS_FILE=N; static const int LF_NUM_PINS_IN_THIS_FILE= N;
#define _lf_pin(PINS, PIN, ADDR) \ #define _lf_pin(PINS, PIN, ADDR) \
( \ ( \
my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)), \ assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \
assert(PIN < LF_NUM_PINS_IN_THIS_FILE) \ my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \
) )
#else #else
#define LF_REQUIRE_PINS(N) #define LF_REQUIRE_PINS(N)
...@@ -151,7 +156,7 @@ typedef struct { ...@@ -151,7 +156,7 @@ typedef struct {
} while (0) } while (0)
#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) #define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL)
#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0) #define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0)
#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN]==0) #define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0)
void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
lf_pinbox_free_func *free_func, void * free_func_arg); lf_pinbox_free_func *free_func, void * free_func_arg);
...@@ -167,16 +172,20 @@ lock_wrap_void(lf_pinbox_put_pins, ...@@ -167,16 +172,20 @@ lock_wrap_void(lf_pinbox_put_pins,
&pins->pinbox->pinstack.lock); &pins->pinbox->pinstack.lock);
lock_wrap_void(lf_pinbox_free, lock_wrap_void(lf_pinbox_free,
(LF_PINS *pins, void *addr), (LF_PINS *pins, void *addr),
(pins,addr), (pins, addr),
&pins->pinbox->pinstack.lock); &pins->pinbox->pinstack.lock);
/* /*
memory allocator, lf_alloc-pin.c memory allocator, lf_alloc-pin.c
*/ */
struct st_lf_alloc_node {
struct st_lf_alloc_node *next;
};
typedef struct st_lf_allocator { typedef struct st_lf_allocator {
LF_PINBOX pinbox; LF_PINBOX pinbox;
void * volatile top; struct st_lf_alloc_node * volatile top;
uint element_size; uint element_size;
uint32 volatile mallocs; uint32 volatile mallocs;
} LF_ALLOCATOR; } LF_ALLOCATOR;
...@@ -184,13 +193,17 @@ typedef struct st_lf_allocator { ...@@ -184,13 +193,17 @@ typedef struct st_lf_allocator {
void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset); void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
void lf_alloc_destroy(LF_ALLOCATOR *allocator); void lf_alloc_destroy(LF_ALLOCATOR *allocator);
uint lf_alloc_in_pool(LF_ALLOCATOR *allocator); uint lf_alloc_in_pool(LF_ALLOCATOR *allocator);
/*
shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR
see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
*/
#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) #define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR))
#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) #define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR))
#define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox) #define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox)
#define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox) #define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox)
#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS) #define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS)
#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS) #define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS)
#define lf_alloc_real_free(ALLOC,ADDR) my_free((gptr)(ADDR), MYF(0)) #define lf_alloc_real_free(ALLOC, ADDR) my_free((gptr)(ADDR), MYF(0))
lock_wrap(lf_alloc_new, void *, lock_wrap(lf_alloc_new, void *,
(LF_PINS *pins), (LF_PINS *pins),
...@@ -222,6 +235,10 @@ void lf_hash_destroy(LF_HASH *hash); ...@@ -222,6 +235,10 @@ void lf_hash_destroy(LF_HASH *hash);
int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data); int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data);
void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
/*
shortcut macros to access underlying pinbox functions from an LF_HASH
see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
*/
#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc) #define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc)
#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc) #define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc)
#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS) #define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS)
......
...@@ -173,14 +173,6 @@ make_atomic_fas(ptr) ...@@ -173,14 +173,6 @@ make_atomic_fas(ptr)
#define LF_BACKOFF (1) #define LF_BACKOFF (1)
#endif #endif
#if SIZEOF_CHARP == SIZEOF_INT
typedef int intptr;
#elif SIZEOF_CHARP == SIZEOF_LONG
typedef long intptr;
#else
#error
#endif
#define MY_ATOMIC_OK 0 #define MY_ATOMIC_OK 0
#define MY_ATOMIC_NOT_1CPU 1 #define MY_ATOMIC_NOT_1CPU 1
extern int my_atomic_initialize(); extern int my_atomic_initialize();
......
...@@ -432,7 +432,8 @@ C_MODE_END ...@@ -432,7 +432,8 @@ C_MODE_END
#define compile_time_assert(X) \ #define compile_time_assert(X) \
do \ do \
{ \ { \
char compile_time_assert[(X) ? 1 : -1]; \ char compile_time_assert[(X) ? 1 : -1] \
__attribute__ ((unused)); \
} while(0) } while(0)
/* Go around some bugs in different OS and compilers */ /* Go around some bugs in different OS and compilers */
...@@ -964,6 +965,14 @@ typedef unsigned __int64 my_ulonglong; ...@@ -964,6 +965,14 @@ typedef unsigned __int64 my_ulonglong;
typedef unsigned long long my_ulonglong; typedef unsigned long long my_ulonglong;
#endif #endif
#if SIZEOF_CHARP == SIZEOF_INT
typedef int intptr;
#elif SIZEOF_CHARP == SIZEOF_LONG
typedef long intptr;
#else
#error
#endif
#ifdef USE_RAID #ifdef USE_RAID
/* /*
The following is done with a if to not get problems with pre-processors The following is done with a if to not get problems with pre-processors
......
...@@ -91,7 +91,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins); ...@@ -91,7 +91,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins);
See the latter for details. See the latter for details.
*/ */
void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
lf_pinbox_free_func *free_func,void *free_func_arg) lf_pinbox_free_func *free_func, void *free_func_arg)
{ {
DBUG_ASSERT(sizeof(LF_PINS) == 128); DBUG_ASSERT(sizeof(LF_PINS) == 128);
DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
...@@ -306,7 +306,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) ...@@ -306,7 +306,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins)
{ {
if (addr) /* use binary search */ if (addr) /* use binary search */
{ {
void **a,**b,**c; void **a, **b, **c;
for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2)
if (cur == *c) if (cur == *c)
a= b= c; a= b= c;
...@@ -337,13 +337,13 @@ static void _lf_pinbox_real_free(LF_PINS *pins) ...@@ -337,13 +337,13 @@ static void _lf_pinbox_real_free(LF_PINS *pins)
callback for _lf_pinbox_real_free to free an unpinned object - callback for _lf_pinbox_real_free to free an unpinned object -
add it back to the allocator stack add it back to the allocator stack
*/ */
static void alloc_free(void *node, LF_ALLOCATOR *allocator) static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator)
{ {
void *tmp; struct st_lf_alloc_node *tmp;
tmp= allocator->top; tmp= allocator->top;
do do
{ {
(*(void **)node)= tmp; node->next= tmp;
} while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) &&
LF_BACKOFF); LF_BACKOFF);
} }
...@@ -379,12 +379,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) ...@@ -379,12 +379,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
*/ */
void lf_alloc_destroy(LF_ALLOCATOR *allocator) void lf_alloc_destroy(LF_ALLOCATOR *allocator)
{ {
void *el= allocator->top; struct st_lf_alloc_node *node= allocator->top;
while (el) while (node)
{ {
void *tmp= *(void **)el; struct st_lf_alloc_node *tmp= node->next;
my_free(el, MYF(0)); my_free((void *)node, MYF(0));
el= tmp; node= tmp;
} }
lf_pinbox_destroy(&allocator->pinbox); lf_pinbox_destroy(&allocator->pinbox);
allocator->top= 0; allocator->top= 0;
...@@ -400,7 +400,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator) ...@@ -400,7 +400,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator)
void *_lf_alloc_new(LF_PINS *pins) void *_lf_alloc_new(LF_PINS *pins)
{ {
LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
void *node; struct st_lf_alloc_node *node;
for (;;) for (;;)
{ {
do do
...@@ -410,7 +410,8 @@ void *_lf_alloc_new(LF_PINS *pins) ...@@ -410,7 +410,8 @@ void *_lf_alloc_new(LF_PINS *pins)
} while (node != allocator->top && LF_BACKOFF); } while (node != allocator->top && LF_BACKOFF);
if (!node) if (!node)
{ {
if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) if (!(node= (void *)my_malloc(allocator->element_size,
MYF(MY_WME|MY_ZEROFILL))))
break; break;
#ifdef MY_LF_EXTRA_DEBUG #ifdef MY_LF_EXTRA_DEBUG
my_atomic_add32(&allocator->mallocs, 1); my_atomic_add32(&allocator->mallocs, 1);
...@@ -434,8 +435,8 @@ void *_lf_alloc_new(LF_PINS *pins) ...@@ -434,8 +435,8 @@ void *_lf_alloc_new(LF_PINS *pins)
uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) uint lf_alloc_in_pool(LF_ALLOCATOR *allocator)
{ {
uint i; uint i;
void *node; struct st_lf_alloc_node *node;
for (node= allocator->top, i= 0; node; node= *(void **)node, i++) for (node= allocator->top, i= 0; node; node= node->next, i++)
/* no op */; /* no op */;
return i; return i;
} }
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
(so no pointer into the array may ever become invalid). (so no pointer into the array may ever become invalid).
Memory is allocated in non-contiguous chunks. Memory is allocated in non-contiguous chunks.
This data structure is not space efficient for sparce arrays. This data structure is not space efficient for sparse arrays.
The number of elements is limited to 2^16 The number of elements is limited to 4311810304
Every element is aligned to sizeof(element) boundary Every element is aligned to sizeof(element) boundary
(to avoid false sharing if element is big enough). (to avoid false sharing if element is big enough).
...@@ -49,7 +49,8 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) ...@@ -49,7 +49,8 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
static void recursive_free(void **alloc, int level) static void recursive_free(void **alloc, int level)
{ {
if (!alloc) return; if (!alloc)
return;
if (level) if (level)
{ {
...@@ -68,10 +69,9 @@ void lf_dynarray_destroy(LF_DYNARRAY *array) ...@@ -68,10 +69,9 @@ void lf_dynarray_destroy(LF_DYNARRAY *array)
for (i= 0; i < LF_DYNARRAY_LEVELS; i++) for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
recursive_free(array->level[i], i); recursive_free(array->level[i], i);
my_atomic_rwlock_destroy(&array->lock); my_atomic_rwlock_destroy(&array->lock);
bzero(array, sizeof(*array));
} }
static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]=
{ {
0, /* +1 here to to avoid -1's below */ 0, /* +1 here to to avoid -1's below */
LF_DYNARRAY_LEVEL_LENGTH, LF_DYNARRAY_LEVEL_LENGTH,
...@@ -82,6 +82,15 @@ static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= ...@@ -82,6 +82,15 @@ static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH
}; };
static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
{
0, /* +1 here to to avoid -1's below */
LF_DYNARRAY_LEVEL_LENGTH,
LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH,
LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
LF_DYNARRAY_LEVEL_LENGTH,
};
/* /*
Returns a valid lvalue pointer to the element number 'idx'. Returns a valid lvalue pointer to the element number 'idx'.
Allocates memory if necessary. Allocates memory if necessary.
...@@ -91,16 +100,17 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) ...@@ -91,16 +100,17 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
void * ptr, * volatile * ptr_ptr= 0; void * ptr, * volatile * ptr_ptr= 0;
int i; int i;
for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
/* no-op */;
ptr_ptr= &array->level[i]; ptr_ptr= &array->level[i];
idx-= dynarray_idxes_in_prev_level[i]; idx-= dynarray_idxes_in_prev_levels[i];
for (; i > 0; i--) for (; i > 0; i--)
{ {
if (!(ptr= *ptr_ptr)) if (!(ptr= *ptr_ptr))
{ {
void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *),
MYF(MY_WME|MY_ZEROFILL)); MYF(MY_WME|MY_ZEROFILL));
if (!alloc) if (unlikely(!alloc))
return(NULL); return(NULL);
if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) if (my_atomic_casptr(ptr_ptr, &ptr, alloc))
ptr= alloc; ptr= alloc;
...@@ -116,7 +126,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) ...@@ -116,7 +126,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
max(array->size_of_element, sizeof(void *)), max(array->size_of_element, sizeof(void *)),
MYF(MY_WME|MY_ZEROFILL)); MYF(MY_WME|MY_ZEROFILL));
if (!alloc) if (unlikely(!alloc))
return(NULL); return(NULL);
/* reserve the space for free() address */ /* reserve the space for free() address */
data= alloc + sizeof(void *); data= alloc + sizeof(void *);
...@@ -143,9 +153,10 @@ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) ...@@ -143,9 +153,10 @@ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx)
void * ptr, * volatile * ptr_ptr= 0; void * ptr, * volatile * ptr_ptr= 0;
int i; int i;
for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
/* no-op */;
ptr_ptr= &array->level[i]; ptr_ptr= &array->level[i];
idx-= dynarray_idxes_in_prev_level[i]; idx-= dynarray_idxes_in_prev_levels[i];
for (; i > 0; i--) for (; i > 0; i--)
{ {
if (!(ptr= *ptr_ptr)) if (!(ptr= *ptr_ptr))
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
(but how to do it in lf_hash_delete ?) (but how to do it in lf_hash_delete ?)
*/ */
#include <my_global.h> #include <my_global.h>
#include <m_string.h>
#include <my_sys.h> #include <my_sys.h>
#include <my_bit.h> #include <my_bit.h>
#include <lf.h> #include <lf.h>
...@@ -33,7 +34,7 @@ LF_REQUIRE_PINS(3); ...@@ -33,7 +34,7 @@ LF_REQUIRE_PINS(3);
typedef struct { typedef struct {
intptr volatile link; /* a pointer to the next element in a listand a flag */ intptr volatile link; /* a pointer to the next element in a listand a flag */
uint32 hashnr; /* reversed hash number, for sorting */ uint32 hashnr; /* reversed hash number, for sorting */
const uchar *key; const byte *key;
uint keylen; uint keylen;
} LF_SLIST; } LF_SLIST;
...@@ -67,31 +68,31 @@ typedef struct { ...@@ -67,31 +68,31 @@ typedef struct {
pins[0..2] are used, they are NOT removed on return pins[0..2] are used, they are NOT removed on return
*/ */
static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
{ {
uint32 cur_hashnr; uint32 cur_hashnr;
const uchar *cur_key; const byte *cur_key;
uint cur_keylen; uint cur_keylen;
intptr link; intptr link;
retry: retry:
cursor->prev=(intptr *)head; cursor->prev= (intptr *)head;
do { do {
cursor->curr=PTR(*cursor->prev); cursor->curr= PTR(*cursor->prev);
_lf_pin(pins,1,cursor->curr); _lf_pin(pins, 1, cursor->curr);
} while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
for (;;) for (;;)
{ {
if (!cursor->curr) if (!cursor->curr)
return 0; return 0;
do { // XXX or goto retry ? do { // XXX or goto retry ?
link=cursor->curr->link; link= cursor->curr->link;
cursor->next=PTR(link); cursor->next= PTR(link);
_lf_pin(pins, 0, cursor->next); _lf_pin(pins, 0, cursor->next);
} while(link != cursor->curr->link && LF_BACKOFF); } while(link != cursor->curr->link && LF_BACKOFF);
cur_hashnr=cursor->curr->hashnr; cur_hashnr= cursor->curr->hashnr;
cur_key=cursor->curr->key; cur_key= cursor->curr->key;
cur_keylen=cursor->curr->keylen; cur_keylen= cursor->curr->keylen;
if (*cursor->prev != (intptr)cursor->curr) if (*cursor->prev != (intptr)cursor->curr)
{ {
LF_BACKOFF; LF_BACKOFF;
...@@ -101,12 +102,12 @@ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, ...@@ -101,12 +102,12 @@ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
{ {
if (cur_hashnr >= hashnr) if (cur_hashnr >= hashnr)
{ {
int r=1; int r= 1;
if (cur_hashnr > hashnr || if (cur_hashnr > hashnr ||
(r=my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
return !r; return !r;
} }
cursor->prev=&(cursor->curr->link); cursor->prev= &(cursor->curr->link);
_lf_pin(pins, 2, cursor->curr); _lf_pin(pins, 2, cursor->curr);
} }
else else
...@@ -120,7 +121,7 @@ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, ...@@ -120,7 +121,7 @@ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
goto retry; goto retry;
} }
} }
cursor->curr=cursor->next; cursor->curr= cursor->next;
_lf_pin(pins, 1, cursor->curr); _lf_pin(pins, 1, cursor->curr);
} }
} }
...@@ -141,21 +142,21 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, ...@@ -141,21 +142,21 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
LF_SLIST *node, LF_PINS *pins, uint flags) LF_SLIST *node, LF_PINS *pins, uint flags)
{ {
CURSOR cursor; CURSOR cursor;
int res=-1; int res= -1;
do do
{ {
if (lfind(head, cs, node->hashnr, node->key, node->keylen, if (lfind(head, cs, node->hashnr, node->key, node->keylen,
&cursor, pins) && &cursor, pins) &&
(flags & LF_HASH_UNIQUE)) (flags & LF_HASH_UNIQUE))
res=0; /* duplicate found */ res= 0; /* duplicate found */
else else
{ {
node->link=(intptr)cursor.curr; node->link= (intptr)cursor.curr;
assert(node->link != (intptr)node); assert(node->link != (intptr)node);
assert(cursor.prev != &node->link); assert(cursor.prev != &node->link);
if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
res=1; /* inserted ok */ res= 1; /* inserted ok */
} }
} while (res == -1); } while (res == -1);
_lf_unpin(pins, 0); _lf_unpin(pins, 0);
...@@ -177,10 +178,10 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, ...@@ -177,10 +178,10 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
it uses pins[0..2], on return all pins are removed. it uses pins[0..2], on return all pins are removed.
*/ */
static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
const uchar *key, uint keylen, LF_PINS *pins) const byte *key, uint keylen, LF_PINS *pins)
{ {
CURSOR cursor; CURSOR cursor;
int res=-1; int res= -1;
do do
{ {
...@@ -218,30 +219,30 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, ...@@ -218,30 +219,30 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
all other pins are removed. all other pins are removed.
*/ */
static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
uint32 hashnr, const uchar *key, uint keylen, uint32 hashnr, const byte *key, uint keylen,
LF_PINS *pins) LF_PINS *pins)
{ {
CURSOR cursor; CURSOR cursor;
int res=lfind(head, cs, hashnr, key, keylen, &cursor, pins); int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins);
if (res) _lf_pin(pins, 2, cursor.curr); if (res) _lf_pin(pins, 2, cursor.curr);
_lf_unpin(pins, 0); _lf_unpin(pins, 0);
_lf_unpin(pins, 1); _lf_unpin(pins, 1);
return res ? cursor.curr : 0; return res ? cursor.curr : 0;
} }
static inline const uchar* hash_key(const LF_HASH *hash, static inline const byte* hash_key(const LF_HASH *hash,
const uchar *record, uint *length) const byte *record, uint *length)
{ {
if (hash->get_key) if (hash->get_key)
return (*hash->get_key)(record,length,0); return (*hash->get_key)(record, length, 0);
*length=hash->key_length; *length= hash->key_length;
return record + hash->key_offset; return record + hash->key_offset;
} }
static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen)
{ {
ulong nr1=1, nr2=4; ulong nr1= 1, nr2= 4;
hash->charset->coll->hash_sort(hash->charset,key,keylen,&nr1,&nr2); hash->charset->coll->hash_sort(hash->charset, key, keylen, &nr1, &nr2);
return nr1 & INT_MAX32; return nr1 & INT_MAX32;
} }
...@@ -258,28 +259,28 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, ...@@ -258,28 +259,28 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
offsetof(LF_SLIST, key)); offsetof(LF_SLIST, key));
lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST **));
hash->size=1; hash->size= 1;
hash->count=0; hash->count= 0;
hash->element_size=element_size; hash->element_size= element_size;
hash->flags=flags; hash->flags= flags;
hash->charset=charset ? charset : &my_charset_bin; hash->charset= charset ? charset : &my_charset_bin;
hash->key_offset=key_offset; hash->key_offset= key_offset;
hash->key_length=key_length; hash->key_length= key_length;
hash->get_key=get_key; hash->get_key= get_key;
DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
} }
void lf_hash_destroy(LF_HASH *hash) void lf_hash_destroy(LF_HASH *hash)
{ {
LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0);
while (el) while (el)
{ {
intptr next=el->link; intptr next= el->link;
if (el->hashnr & 1) if (el->hashnr & 1)
lf_alloc_real_free(&hash->alloc, el); lf_alloc_real_free(&hash->alloc, el);
else else
my_free((void *)el, MYF(0)); my_free((void *)el, MYF(0));
el=(LF_SLIST *)next; el= (LF_SLIST *)next;
} }
lf_alloc_destroy(&hash->alloc); lf_alloc_destroy(&hash->alloc);
lf_dynarray_destroy(&hash->array); lf_dynarray_destroy(&hash->array);
...@@ -299,19 +300,19 @@ void lf_hash_destroy(LF_HASH *hash) ...@@ -299,19 +300,19 @@ void lf_hash_destroy(LF_HASH *hash)
*/ */
int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
{ {
uint csize, bucket, hashnr; int csize, bucket, hashnr;
LF_SLIST *node, * volatile *el; LF_SLIST *node, * volatile *el;
lf_rwlock_by_pins(pins); lf_rwlock_by_pins(pins);
node=(LF_SLIST *)_lf_alloc_new(pins); node= (LF_SLIST *)_lf_alloc_new(pins);
memcpy(node+1, data, hash->element_size); memcpy(node+1, data, hash->element_size);
node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); node->key= hash_key(hash, (byte *)(node+1), &node->keylen);
hashnr= calc_hash(hash, node->key, node->keylen); hashnr= calc_hash(hash, node->key, node->keylen);
bucket= hashnr % hash->size; bucket= hashnr % hash->size;
el=_lf_dynarray_lvalue(&hash->array, bucket); el= _lf_dynarray_lvalue(&hash->array, bucket);
if (*el == NULL) if (*el == NULL)
initialize_bucket(hash, el, bucket, pins); initialize_bucket(hash, el, bucket, pins);
node->hashnr=my_reverse_bits(hashnr) | 1; node->hashnr= my_reverse_bits(hashnr) | 1;
if (linsert(el, hash->charset, node, pins, hash->flags)) if (linsert(el, hash->charset, node, pins, hash->flags))
{ {
_lf_alloc_free(pins, node); _lf_alloc_free(pins, node);
...@@ -335,15 +336,15 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) ...@@ -335,15 +336,15 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
{ {
LF_SLIST * volatile *el; LF_SLIST * volatile *el;
uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
bucket= hashnr % hash->size; bucket= hashnr % hash->size;
lf_rwlock_by_pins(pins); lf_rwlock_by_pins(pins);
el=_lf_dynarray_lvalue(&hash->array, bucket); el= _lf_dynarray_lvalue(&hash->array, bucket);
if (*el == NULL) if (*el == NULL)
initialize_bucket(hash, el, bucket, pins); initialize_bucket(hash, el, bucket, pins);
if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
(uchar *)key, keylen, pins)) (byte *)key, keylen, pins))
{ {
lf_rwunlock_by_pins(pins); lf_rwunlock_by_pins(pins);
return 1; return 1;
...@@ -360,33 +361,33 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) ...@@ -360,33 +361,33 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
{ {
LF_SLIST * volatile *el, *found; LF_SLIST * volatile *el, *found;
uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
bucket= hashnr % hash->size; bucket= hashnr % hash->size;
lf_rwlock_by_pins(pins); lf_rwlock_by_pins(pins);
el=_lf_dynarray_lvalue(&hash->array, bucket); el= _lf_dynarray_lvalue(&hash->array, bucket);
if (*el == NULL) if (*el == NULL)
initialize_bucket(hash, el, bucket, pins); initialize_bucket(hash, el, bucket, pins);
found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
(uchar *)key, keylen, pins); (byte *)key, keylen, pins);
lf_rwunlock_by_pins(pins); lf_rwunlock_by_pins(pins);
return found ? found+1 : 0; return found ? found+1 : 0;
} }
static char *dummy_key=""; static char *dummy_key= "";
static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
uint bucket, LF_PINS *pins) uint bucket, LF_PINS *pins)
{ {
uint parent= my_clear_highest_bit(bucket); uint parent= my_clear_highest_bit(bucket);
LF_SLIST *dummy=(LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
LF_SLIST **tmp=0, *cur; LF_SLIST **tmp= 0, *cur;
LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
if (*el == NULL && bucket) if (*el == NULL && bucket)
initialize_bucket(hash, el, parent, pins); initialize_bucket(hash, el, parent, pins);
dummy->hashnr=my_reverse_bits(bucket); dummy->hashnr= my_reverse_bits(bucket);
dummy->key=dummy_key; dummy->key= dummy_key;
dummy->keylen=0; dummy->keylen= 0;
if ((cur= linsert(el, hash->charset, dummy, pins, 0))) if ((cur= linsert(el, hash->charset, dummy, pins, 0)))
{ {
my_free((void *)dummy, MYF(0)); my_free((void *)dummy, MYF(0));
......
...@@ -35,10 +35,6 @@ ulonglong my_getsystime() ...@@ -35,10 +35,6 @@ ulonglong my_getsystime()
LARGE_INTEGER t_cnt; LARGE_INTEGER t_cnt;
if (!offset) if (!offset)
{ {
/* strictly speaking there should be a mutex to protect
initialization section. But my_getsystime() is called from
UUID() code, and UUID() calls are serialized with a mutex anyway
*/
LARGE_INTEGER li; LARGE_INTEGER li;
FILETIME ft; FILETIME ft;
GetSystemTimeAsFileTime(&ft); GetSystemTimeAsFileTime(&ft);
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
SLX - Shared + Loose eXclusive SLX - Shared + Loose eXclusive
LSIX - Loose Shared + Intention eXclusive LSIX - Loose Shared + Intention eXclusive
*/ */
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX }; enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
struct lockman_lock; struct lockman_lock;
...@@ -55,9 +55,10 @@ typedef struct { ...@@ -55,9 +55,10 @@ typedef struct {
uint lock_timeout; uint lock_timeout;
loid_to_lo_func *loid_to_lo; loid_to_lo_func *loid_to_lo;
} LOCKMAN; } LOCKMAN;
#define DIDNT_GET_THE_LOCK 0
enum lockman_getlock_result { enum lockman_getlock_result {
DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK, NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
GOT_THE_LOCK,
GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
}; };
......
// TODO - allocate everything from dynarrays !!! (benchmark) #warning TODO - allocate everything from dynarrays !!! (benchmark)
// automatically place S instead of LS if possible #warning automatically place S instead of LS if possible
/* Copyright (C) 2006 MySQL AB /* Copyright (C) 2006 MySQL AB
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
...@@ -16,10 +16,8 @@ ...@@ -16,10 +16,8 @@
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h> #include <my_base.h>
#include <my_sys.h> #include <hash.h>
#include <my_bit.h>
#include <lf.h>
#include "tablockman.h" #include "tablockman.h"
/* /*
...@@ -53,45 +51,54 @@ ...@@ -53,45 +51,54 @@
resource, check if a conflicting lock exists, if yes - find who owns it. resource, check if a conflicting lock exists, if yes - find who owns it.
Solution: every resource has a structure with Solution: every resource has a structure with
1. Hash of "active" (see below for the description of "active") granted 1. Hash of latest (see the lock upgrade section below) granted locks with
locks with loid as a key. Thus, checking if a given transaction has a loid as a key. Thus, checking if a given transaction has a lock on
lock on this resource is O(1) operation. this resource is O(1) operation.
2. Doubly-linked lists of all granted locks - one list for every lock 2. Doubly-linked lists of all granted locks - one list for every lock
type. Thus, checking if a conflicting lock exists is a check whether type. Thus, checking if a conflicting lock exists is a check whether
an appropriate list head pointer is not null, also O(1). an appropriate list head pointer is not null, also O(1).
3. Every lock has a loid of the owner, thus checking who owns a 3. Every lock has a loid of the owner, thus checking who owns a
conflicting lock is also O(1). conflicting lock is also O(1).
4. Deque of waiting locks. It's a deque not a fifo, because for lock 4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo,
upgrades requests are added to the queue head, not tail. There's never because for lock upgrades requests are added to the queue head, not
a need to scan the queue. tail. This is a single place where there it gets O(N) on number
of locks - when a transaction wakes up from waiting on a condition,
Result: adding or removing a lock is always a O(1) operation, it does not it may need to scan the queue backward to the beginning to find
depend on the number of locks on the resource, or number of transactions, a conflicting lock. It is guaranteed though that "all transactions
or number of resources. It _does_ depend on the number of different lock before it" received the same - or earlier - signal. In other words a
levels - O(number_of_lock_levels) - but it's a constant. transaction needs to scan all transactions before it that received the
signal but didn't have a chance to resume the execution yet, so
practically OS scheduler won't let the scan to be O(N).
Waiting: if there is a conflicting lock or if wait queue is not empty, a Waiting: if there is a conflicting lock or if wait queue is not empty, a
requested lock cannot be granted at once. It is added to the end of the requested lock cannot be granted at once. It is added to the end of the
wait queue. If there is a conflicting lock - the "blocker" transaction is wait queue. If a queue was empty and there is a conflicting lock - the
the owner of this lock. If there's no conflict but a queue was not empty, "blocker" transaction is the owner of this lock. If a queue is not empty,
than the "blocker" is the transaction that the owner of the lock at the an owner of the previous lock in the queue is the "blocker". But if the
end of the queue is waiting for (in other words, our lock is added to the previous lock is compatible with the request, then the "blocker" is the
end of the wait queue, and our blocker is the same as of the lock right transaction that the owner of the lock at the end of the queue is waiting
before us). for (in other words, our lock is added to the end of the wait queue, and
our blocker is the same as of the lock right before us).
Lock upgrades: when a thread that has a lock on a given resource, Lock upgrades: when a thread that has a lock on a given resource,
requests a new lock on the same resource and the old lock is not enough requests a new lock on the same resource and the old lock is not enough
to satisfy new lock requirements (which is defined by to satisfy new lock requirements (which is defined by
lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock
(defineded by lock_combining_matrix as above) is placed. Depending on (defined by lock_combining_matrix as above) is placed. Depending on
other granted locks it is immediately active or it has to wait. Here the other granted locks it is immediately granted or it has to wait. Here the
lock is added to the start of the waiting queue, not to the end. Old lock is added to the start of the waiting queue, not to the end. Old
lock, is removed from the hash, but not from the doubly-linked lists. lock, is removed from the hash, but not from the doubly-linked lists.
(indeed, a transaction checks "do I have a lock on this resource ?" by (indeed, a transaction checks "do I have a lock on this resource ?" by
looking in a hash, and it should find a latest lock, so old locks must be looking in a hash, and it should find a latest lock, so old locks must be
removed; but a transaction checks "are the conflicting locks ?" by removed; but a transaction checks "are there conflicting locks ?" by
checking doubly-linked lists, it doesn't matter if it will find an old checking doubly-linked lists, it doesn't matter if it will find an old
lock - if it would be removed, a new lock would be also a conflict). lock - if it would be removed, a new lock would be also a conflict).
So, a hash contains only "latest" locks - there can be only one latest
lock per resource per transaction. But doubly-linked lists contain all
locks, even "obsolete" ones, because it doesnt't hurt. Note that old
locks can not be freed early, in particular they stay in the
'active_locks' list of a lock owner, because they may be "re-enabled"
on a savepoint rollback.
To better support table-row relations where one needs to lock the table To better support table-row relations where one needs to lock the table
with an intention lock before locking the row, extended diagnostics is with an intention lock before locking the row, extended diagnostics is
...@@ -107,6 +114,18 @@ ...@@ -107,6 +114,18 @@
Instant duration locks are not supported. Though they're trivial to add, Instant duration locks are not supported. Though they're trivial to add,
they are normally only used on rows, not on tables. So, presumably, they are normally only used on rows, not on tables. So, presumably,
they are not needed here. they are not needed here.
Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes
(TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex).
table mutex protects operations on the table lock structures, and lock
owner pointers waiting_for and waiting_for_loid.
lock owner mutex is only used to wait on lock owner condition
(TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock
structures, and only lock owner itself may access them.
The pool mutex protects a pool of unused locks. Note the locking order:
first the table mutex, then the owner mutex or a pool mutex.
Table mutex lock cannot be attempted when owner or pool mutex are locked.
No mutex lock can be attempted if owner or pool mutex are locked.
*/ */
/* /*
...@@ -122,9 +141,9 @@ ...@@ -122,9 +141,9 @@
0 - incompatible 0 - incompatible
-1 - "impossible", so that we can assert the impossibility. -1 - "impossible", so that we can assert the impossibility.
*/ */
static int lock_compatibility_matrix[10][10]= static const int lock_compatibility_matrix[10][10]=
{ /* N S X IS IX SIX LS LX SLX LSIX */ { /* N S X IS IX SIX LS LX SLX LSIX */
{ -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */
{ -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */
{ -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */
{ -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */
...@@ -144,18 +163,18 @@ static int lock_compatibility_matrix[10][10]= ...@@ -144,18 +163,18 @@ static int lock_compatibility_matrix[10][10]=
One should never get N from it, we assert the impossibility One should never get N from it, we assert the impossibility
*/ */
static enum lock_type lock_combining_matrix[10][10]= static const enum lock_type lock_combining_matrix[10][10]=
{/* N S X IS IX SIX LS LX SLX LSIX */ {/* N S X IS IX SIX LS LX SLX LSIX */
{ N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */ { N, N, N, N, N, N, N, N, N, N}, /* N */
{ S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ { N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
{ X, X, X, X, X, X, X, X, X, X}, /* X */ { N, X, X, X, X, X, X, X, X, X}, /* X */
{ IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
{ IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ { N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
{ SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ { N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
{ LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ { N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
{ LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ { N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
{ SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ { N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
{ LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ { N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
}; };
/* /*
...@@ -176,7 +195,7 @@ static enum lock_type lock_combining_matrix[10][10]= ...@@ -176,7 +195,7 @@ static enum lock_type lock_combining_matrix[10][10]=
#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE #define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
#define A GOT_THE_LOCK #define A GOT_THE_LOCK
#define x GOT_THE_LOCK #define x GOT_THE_LOCK
static enum lockman_getlock_result getlock_result[10][10]= static const enum lockman_getlock_result getlock_result[10][10]=
{/* N S X IS IX SIX LS LX SLX LSIX */ {/* N S X IS IX SIX LS LX SLX LSIX */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */
{ 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */
...@@ -200,37 +219,47 @@ static enum lockman_getlock_result getlock_result[10][10]= ...@@ -200,37 +219,47 @@ static enum lockman_getlock_result getlock_result[10][10]=
*/ */
struct st_table_lock { struct st_table_lock {
#warning do we need upgraded_from ?
struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev; struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev;
struct st_locked_table *table; struct st_locked_table *table;
uint16 loid; uint16 loid;
char lock_type; uchar lock_type;
}; };
#define hash_insert my_hash_insert /* for consistency :) */ #define hash_insert my_hash_insert /* for consistency :) */
#define remove_from_wait_queue(LOCK, TABLE) \
do \ static inline
{ \ TABLE_LOCK *find_loid(LOCKED_TABLE *table, uint16 loid)
if ((LOCK)->prev) \ {
{ \ return (TABLE_LOCK *)hash_search(& table->latest_locks,
DBUG_ASSERT((TABLE)->wait_queue_out != (LOCK)); \ (byte *)& loid, sizeof(loid));
(LOCK)->prev->next= (LOCK)->next; \ }
} \
else \ static inline
{ \ void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table)
DBUG_ASSERT((TABLE)->wait_queue_out == (LOCK)); \ {
(TABLE)->wait_queue_out= (LOCK)->next; \ DBUG_ASSERT(table == lock->table);
} \ if (lock->prev)
if ((LOCK)->next) \ {
{ \ DBUG_ASSERT(table->wait_queue_out != lock);
DBUG_ASSERT((TABLE)->wait_queue_in != (LOCK)); \ lock->prev->next= lock->next;
(LOCK)->next->prev= (LOCK)->prev; \ }
} \ else
else \ {
{ \ DBUG_ASSERT(table->wait_queue_out == lock);
DBUG_ASSERT((TABLE)->wait_queue_in == (LOCK)); \ table->wait_queue_out= lock->next;
(TABLE)->wait_queue_in= (LOCK)->prev; \ }
} \ if (lock->next)
} while (0) {
DBUG_ASSERT(table->wait_queue_in != lock);
lock->next->prev= lock->prev;
}
else
{
DBUG_ASSERT(table->wait_queue_in == lock);
table->wait_queue_in= lock->prev;
}
}
/* /*
DESCRIPTION DESCRIPTION
...@@ -243,24 +272,31 @@ enum lockman_getlock_result ...@@ -243,24 +272,31 @@ enum lockman_getlock_result
tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
LOCKED_TABLE *table, enum lock_type lock) LOCKED_TABLE *table, enum lock_type lock)
{ {
TABLE_LOCK *old, *new, *blocker; TABLE_LOCK *old, *new, *blocker, *blocker2;
TABLE_LOCK_OWNER *wait_for; TABLE_LOCK_OWNER *wait_for;
ulonglong deadline; ulonglong deadline;
struct timespec timeout; struct timespec timeout;
enum lock_type new_lock; enum lock_type new_lock;
enum lockman_getlock_result res;
int i; int i;
DBUG_ASSERT(lo->waiting_lock == 0);
DBUG_ASSERT(lo->waiting_for == 0);
DBUG_ASSERT(lo->waiting_for_loid == 0);
pthread_mutex_lock(& table->mutex); pthread_mutex_lock(& table->mutex);
/* do we alreasy have a lock on this resource ? */ /* do we already have a lock on this resource ? */
old= (TABLE_LOCK *)hash_search(& table->active, (byte *)&lo->loid, old= find_loid(table, lo->loid);
sizeof(lo->loid));
/* and if yes, is it enough to satisfy the new request */ /* calculate the level of the upgraded lock, if yes */
if (old && lock_combining_matrix[old->lock_type][lock] == old->lock_type) new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock;
/* and check if old lock is enough to satisfy the new request */
if (old && new_lock == old->lock_type)
{ {
/* yes */ /* yes */
pthread_mutex_unlock(& table->mutex); res= getlock_result[old->lock_type][lock];
return getlock_result[old->lock_type][lock]; goto ret;
} }
/* no, placing a new lock. first - take a free lock structure from the pool */ /* no, placing a new lock. first - take a free lock structure from the pool */
...@@ -275,48 +311,81 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -275,48 +311,81 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
{ {
pthread_mutex_unlock(& lm->pool_mutex); pthread_mutex_unlock(& lm->pool_mutex);
new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME)); new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME));
if (!new) if (unlikely(!new))
{ {
pthread_mutex_unlock(& table->mutex); res= NO_MEMORY_FOR_LOCK;
return DIDNT_GET_THE_LOCK; goto ret;
} }
} }
/* calculate the level of the upgraded lock */
new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock;
new->loid= lo->loid; new->loid= lo->loid;
new->lock_type= new_lock; new->lock_type= new_lock;
new->table= table; new->table= table;
/* and try to place it */ /* and try to place it */
for (new->prev= table->wait_queue_in ; ; ) for (new->prev= table->wait_queue_in;;)
{ {
/* waiting queue is not empty and we're not upgrading */ wait_for= 0;
if (!old && new->prev) if (!old)
{ {
/* need to wait */ /* not upgrading - a lock must be added to the _end_ of the wait queue */
DBUG_ASSERT(table->wait_queue_out); for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev)
DBUG_ASSERT(table->wait_queue_in); {
blocker= new->prev; TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid);
/* wait for a previous lock in the queue or for a lock it's waiting for */
if (lock_compatibility_matrix[blocker->lock_type][lock]) /* find a blocking lock */
wait_for= lm->loid_to_tlo(blocker->loid)->waiting_for; DBUG_ASSERT(table->wait_queue_out);
else DBUG_ASSERT(table->wait_queue_in);
wait_for= lm->loid_to_tlo(blocker->loid); if (!lock_compatibility_matrix[blocker->lock_type][lock])
{
/* found! */
wait_for= tmp;
}
else
{
/*
hmm, the lock before doesn't block us, let's look one step further.
the condition below means:
if we never waited on a condition yet
OR
the lock before ours (blocker) waits on a lock (blocker2) that is
present in the hash AND and conflicts with 'blocker'
the condition after OR may fail if 'blocker2' was removed from
the hash, its signal woke us up, but 'blocker' itself didn't see
the signal yet.
*/
if (!lo->waiting_lock ||
((blocker2= find_loid(table, tmp->waiting_for_loid)) &&
!lock_compatibility_matrix[blocker2->lock_type]
[blocker->lock_type]))
{
/* but it's waiting for a real lock. we'll wait for the same lock */
wait_for= tmp->waiting_for;
}
/*
otherwise - a lock it's waiting for doesn't exist.
We've no choice but to scan the wait queue backwards, looking
for a conflicting lock or a lock waiting for a real lock.
QQ is there a way to avoid this scanning ?
*/
}
}
} }
else
if (wait_for == 0)
{ {
/* checking for compatibility with existing locks */ /* checking for compatibility with existing locks */
for (blocker= 0, i= 0; i < LOCK_TYPES; i++) for (blocker= 0, i= 0; i < LOCK_TYPES; i++)
{ {
if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock]) if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock])
{ {
/* the first lock in the list may be our own - skip it */ blocker= table->active_locks[i];
for (blocker= table->active_locks[i]; /* if the first lock in the list is our own - skip it */
blocker && blocker->loid == lo->loid; if (blocker->loid == lo->loid)
blocker= blocker->next) /* no-op */; blocker= blocker->next;
if (blocker) if (blocker) /* found a conflicting lock, need to wait */
break; break;
} }
} }
...@@ -327,6 +396,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -327,6 +396,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
/* ok, we're here - the wait is inevitable */ /* ok, we're here - the wait is inevitable */
lo->waiting_for= wait_for; lo->waiting_for= wait_for;
lo->waiting_for_loid= wait_for->loid;
if (!lo->waiting_lock) /* first iteration of the for() loop */ if (!lo->waiting_lock) /* first iteration of the for() loop */
{ {
/* lock upgrade or new lock request ? */ /* lock upgrade or new lock request ? */
...@@ -338,7 +408,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -338,7 +408,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
new->next->prev= new; new->next->prev= new;
table->wait_queue_out= new; table->wait_queue_out= new;
if (!table->wait_queue_in) if (!table->wait_queue_in)
table->wait_queue_in=table->wait_queue_out; table->wait_queue_in= table->wait_queue_out;
} }
else else
{ {
...@@ -348,7 +418,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -348,7 +418,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
new->prev->next= new; new->prev->next= new;
table->wait_queue_in= new; table->wait_queue_in= new;
if (!table->wait_queue_out) if (!table->wait_queue_out)
table->wait_queue_out=table->wait_queue_in; table->wait_queue_out= table->wait_queue_in;
} }
lo->waiting_lock= new; lo->waiting_lock= new;
...@@ -356,22 +426,28 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -356,22 +426,28 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
timeout.tv_sec= deadline/10000000; timeout.tv_sec= deadline/10000000;
timeout.tv_nsec= (deadline % 10000000) * 100; timeout.tv_nsec= (deadline % 10000000) * 100;
} }
else
{
if (my_getsystime() > deadline)
{
pthread_mutex_unlock(& table->mutex);
return DIDNT_GET_THE_LOCK;
}
}
/* now really wait */ /*
prepare to wait.
we must lock blocker's mutex to wait on blocker's cond.
and we must release table's mutex.
note that blocker's mutex is locked _before_ table's mutex is released
*/
pthread_mutex_lock(wait_for->mutex); pthread_mutex_lock(wait_for->mutex);
pthread_mutex_unlock(& table->mutex); pthread_mutex_unlock(& table->mutex);
pthread_cond_timedwait(wait_for->cond, wait_for->mutex, &timeout); /* now really wait */
i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout);
pthread_mutex_unlock(wait_for->mutex); pthread_mutex_unlock(wait_for->mutex);
if (i == ETIMEDOUT || i == ETIME)
{
/* we rely on the caller to rollback and release all locks */
res= LOCK_TIMEOUT;
goto ret2;
}
pthread_mutex_lock(& table->mutex); pthread_mutex_lock(& table->mutex);
/* ... and repeat from the beginning */ /* ... and repeat from the beginning */
...@@ -384,6 +460,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -384,6 +460,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
remove_from_wait_queue(new, table); remove_from_wait_queue(new, table);
lo->waiting_lock= 0; lo->waiting_lock= 0;
lo->waiting_for= 0; lo->waiting_for= 0;
lo->waiting_for_loid= 0;
} }
/* add it to the list of all locks of this lock owner */ /* add it to the list of all locks of this lock owner */
...@@ -396,20 +473,20 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, ...@@ -396,20 +473,20 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
new->next->prev= new; new->next->prev= new;
table->active_locks[new_lock-1]= new; table->active_locks[new_lock-1]= new;
/* remove the old lock from the hash, if upgrading */ /* update the latest_locks hash */
if (old) if (old)
{ hash_delete(& table->latest_locks, (byte *)old);
new->upgraded_from= old; hash_insert(& table->latest_locks, (byte *)new);
hash_delete(& table->active, (byte *)old);
}
else
new->upgraded_from= 0;
/* and add a new lock to the hash, voila */ new->upgraded_from= old;
hash_insert(& table->active, (byte *)new);
res= getlock_result[lock][lock];
ret:
pthread_mutex_unlock(& table->mutex); pthread_mutex_unlock(& table->mutex);
return getlock_result[lock][lock]; ret2:
DBUG_ASSERT(res);
return res;
} }
/* /*
...@@ -443,6 +520,17 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) ...@@ -443,6 +520,17 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
Signal our blocker to release this next lock (after we removed our Signal our blocker to release this next lock (after we removed our
lock from the wait queue, of course). lock from the wait queue, of course).
*/ */
/*
An example to clarify the above:
trn1> S-lock the table. Granted.
trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1
trn3> IS-lock the table. The queue is not empty, so IS-lock is added
to the queue. It's compatible with the waiting IX-lock, so trn3
waits for trn2->waiting_for, that is trn1.
if trn1 releases the lock it signals trn1->cond and both waiting
transactions are awaken. But if trn2 times out, trn3 must be notified
too (as IS and S locks are compatible). So trn2 must signal trn1->cond.
*/
if (lock->prev && if (lock->prev &&
lock_compatibility_matrix[lock->prev->lock_type][lock->lock_type]) lock_compatibility_matrix[lock->prev->lock_type][lock->lock_type])
{ {
...@@ -451,6 +539,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) ...@@ -451,6 +539,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
pthread_mutex_unlock(lo->waiting_for->mutex); pthread_mutex_unlock(lo->waiting_for->mutex);
} }
lo->waiting_for= 0; lo->waiting_for= 0;
lo->waiting_for_loid= 0;
pthread_mutex_unlock(& lock->table->mutex); pthread_mutex_unlock(& lock->table->mutex);
lock->next= local_pool; lock->next= local_pool;
...@@ -465,11 +554,12 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) ...@@ -465,11 +554,12 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
pthread_mutex_t *mutex= & lock->table->mutex; pthread_mutex_t *mutex= & lock->table->mutex;
DBUG_ASSERT(cur->loid == lo->loid); DBUG_ASSERT(cur->loid == lo->loid);
DBUG_ASSERT(lock != lock->next_in_lo);
lock= lock->next_in_lo; lock= lock->next_in_lo;
/* TODO ? group locks by table to reduce the number of mutex locks */ /* TODO ? group locks by table to reduce the number of mutex locks */
pthread_mutex_lock(mutex); pthread_mutex_lock(mutex);
hash_delete(& cur->table->active, (byte *)cur); hash_delete(& cur->table->latest_locks, (byte *)cur);
if (cur->prev) if (cur->prev)
cur->prev->next= cur->next; cur->prev->next= cur->next;
...@@ -506,7 +596,8 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) ...@@ -506,7 +596,8 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout)
lm->pool= 0; lm->pool= 0;
lm->loid_to_tlo= func; lm->loid_to_tlo= func;
lm->lock_timeout= timeout; lm->lock_timeout= timeout;
pthread_mutex_init(&lm->pool_mutex, MY_MUTEX_INIT_FAST); pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST);
my_getsystime(); /* ensure that my_getsystime() is initialized */
} }
void tablockman_destroy(TABLOCKMAN *lm) void tablockman_destroy(TABLOCKMAN *lm)
...@@ -517,36 +608,54 @@ void tablockman_destroy(TABLOCKMAN *lm) ...@@ -517,36 +608,54 @@ void tablockman_destroy(TABLOCKMAN *lm)
lm->pool= tmp->next; lm->pool= tmp->next;
my_free((void *)tmp, MYF(0)); my_free((void *)tmp, MYF(0));
} }
pthread_mutex_destroy(&lm->pool_mutex); pthread_mutex_destroy(& lm->pool_mutex);
} }
/*
initialize a LOCKED_TABLE structure
SYNOPSYS
lt a LOCKED_TABLE to initialize
initial_hash_size initial size for 'latest_locks' hash
*/
void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size) void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size)
{ {
TABLE_LOCK *unused;
bzero(lt, sizeof(*lt)); bzero(lt, sizeof(*lt));
pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST); pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST);
hash_init(& lt->active, &my_charset_bin, initial_hash_size, hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size,
offsetof(TABLE_LOCK, loid), sizeof(unused->loid), 0, 0, 0); offsetof(TABLE_LOCK, loid),
sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0);
} }
void tablockman_destroy_locked_table(LOCKED_TABLE *lt) void tablockman_destroy_locked_table(LOCKED_TABLE *lt)
{ {
hash_free(& lt->active); int i;
DBUG_ASSERT(lt->wait_queue_out == 0);
DBUG_ASSERT(lt->wait_queue_in == 0);
DBUG_ASSERT(lt->latest_locks.records == 0);
for (i= 0; i<LOCK_TYPES; i++)
DBUG_ASSERT(lt->active_locks[i] == 0);
hash_free(& lt->latest_locks);
pthread_mutex_destroy(& lt->mutex); pthread_mutex_destroy(& lt->mutex);
} }
#ifdef EXTRA_DEBUG #ifdef EXTRA_DEBUG
static char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX",
"LS", "LX", "SLX", "LSIX"}; "LS", "LX", "SLX", "LSIX"};
void print_tlo(TABLE_LOCK_OWNER *lo) void tablockman_print_tlo(TABLE_LOCK_OWNER *lo)
{ {
TABLE_LOCK *lock; TABLE_LOCK *lock;
printf("lo%d>", lo->loid); printf("lo%d>", lo->loid);
if ((lock= lo->waiting_lock)) if ((lock= lo->waiting_lock))
printf(" (%s.%p)", lock2str[lock->lock_type], lock->table); printf(" (%s.0x%lx)", lock2str[lock->lock_type], (intptr)lock->table);
for (lock= lo->active_locks; lock && lock != lock->next_in_lo; lock= lock->next_in_lo) for (lock= lo->active_locks;
printf(" %s.%p", lock2str[lock->lock_type], lock->table); lock && lock != lock->next_in_lo;
lock= lock->next_in_lo)
printf(" %s.0x%lx", lock2str[lock->lock_type], (intptr)lock->table);
if (lock && lock == lock->next_in_lo) if (lock && lock == lock->next_in_lo)
printf("!"); printf("!");
printf("\n"); printf("\n");
......
...@@ -33,45 +33,45 @@ ...@@ -33,45 +33,45 @@
LSIX - Loose Shared + Intention eXclusive LSIX - Loose Shared + Intention eXclusive
*/ */
#ifndef _lockman_h #ifndef _lockman_h
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX }; #warning TODO remove N-locks
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
enum lockman_getlock_result { enum lockman_getlock_result {
DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK, NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
GOT_THE_LOCK,
GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
}; };
#endif #endif
#define LOCK_TYPES LSIX #define LOCK_TYPES (LOCK_TYPE_LAST-1)
typedef struct st_table_lock_owner TABLE_LOCK_OWNER;
typedef struct st_table_lock TABLE_LOCK; typedef struct st_table_lock TABLE_LOCK;
typedef struct st_locked_table LOCKED_TABLE;
typedef struct st_table_lock_owner {
TABLE_LOCK *active_locks; /* list of active locks */
TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */
struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */
pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */
pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */
} TABLE_LOCK_OWNER;
typedef struct st_locked_table {
pthread_mutex_t mutex; /* mutex for everything below */
HASH latest_locks; /* latest locks in a hash */
TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */
TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/
} LOCKED_TABLE;
typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16); typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16);
typedef struct { typedef struct {
pthread_mutex_t pool_mutex; pthread_mutex_t pool_mutex;
TABLE_LOCK *pool; /* lifo pool of free locks */ TABLE_LOCK *pool; /* lifo pool of free locks */
uint lock_timeout; uint lock_timeout; /* lock timeout in milliseconds */
loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */ loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */
} TABLOCKMAN; } TABLOCKMAN;
struct st_table_lock_owner {
TABLE_LOCK *active_locks; /* list of active locks */
TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */
TABLE_LOCK_OWNER *waiting_for; /* transaction we're wating for */
pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */
pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
uint16 loid; /* Lock Owner IDentifier */
};
struct st_locked_table {
pthread_mutex_t mutex; /* mutex for everything below */
HASH active; /* active locks ina hash */
TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */
TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque */
};
void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint); void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint);
void tablockman_destroy(TABLOCKMAN *); void tablockman_destroy(TABLOCKMAN *);
enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *, enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *,
...@@ -81,7 +81,7 @@ void tablockman_init_locked_table(LOCKED_TABLE *, int); ...@@ -81,7 +81,7 @@ void tablockman_init_locked_table(LOCKED_TABLE *, int);
void tablockman_destroy_locked_table(LOCKED_TABLE *); void tablockman_destroy_locked_table(LOCKED_TABLE *);
#ifdef EXTRA_DEBUG #ifdef EXTRA_DEBUG
void print_tlo(TABLE_LOCK_OWNER *); void tablockman_print_tlo(TABLE_LOCK_OWNER *);
#endif #endif
#endif #endif
......
...@@ -69,7 +69,8 @@ static TRN *short_trid_to_TRN(uint16 short_trid) ...@@ -69,7 +69,8 @@ static TRN *short_trid_to_TRN(uint16 short_trid)
return (TRN *)trn; return (TRN *)trn;
} }
static byte *trn_get_hash_key(const byte *trn, uint* len, my_bool unused) static byte *trn_get_hash_key(const byte *trn, uint* len,
my_bool unused __attribute__ ((unused)))
{ {
*len= sizeof(TrID); *len= sizeof(TrID);
return (byte *) & ((*((TRN **)trn))->trid); return (byte *) & ((*((TRN **)trn))->trid);
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
lockman for row and table locks
*/
//#define EXTRA_VERBOSE //#define EXTRA_VERBOSE
#include <tap.h> #include <tap.h>
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
lockman for row locks, tablockman for table locks
*/
//#define EXTRA_VERBOSE //#define EXTRA_VERBOSE
#include <tap.h> #include <tap.h>
...@@ -64,7 +68,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid) ...@@ -64,7 +68,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
#define lock_ok_l(O, R, L) \ #define lock_ok_l(O, R, L) \
test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
#define lock_conflict(O, R, L) \ #define lock_conflict(O, R, L) \
test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
void test_tablockman_simple() void test_tablockman_simple()
{ {
...@@ -164,8 +168,11 @@ int Ntables= 10; ...@@ -164,8 +168,11 @@ int Ntables= 10;
int table_lock_ratio= 10; int table_lock_ratio= 10;
enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
char *res2str[4]= { char *res2str[]= {
"DIDN'T GET THE LOCK", "DIDN'T GET THE LOCK",
"OUT OF MEMORY",
"DEADLOCK",
"LOCK TIMEOUT",
"GOT THE LOCK", "GOT THE LOCK",
"GOT THE LOCK NEED TO LOCK A SUBRESOURCE", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
"GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
...@@ -191,7 +198,7 @@ pthread_handler_t test_lockman(void *arg) ...@@ -191,7 +198,7 @@ pthread_handler_t test_lockman(void *arg)
res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]);
DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
lock2str[locklevel], res2str[res])); lock2str[locklevel], res2str[res]));
if (res == DIDNT_GET_THE_LOCK) if (res < GOT_THE_LOCK)
{ {
lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid)); DIAG(("loid %2d, release all locks", loid));
...@@ -208,11 +215,6 @@ pthread_handler_t test_lockman(void *arg) ...@@ -208,11 +215,6 @@ pthread_handler_t test_lockman(void *arg)
lock2str[locklevel+4], res2str[res])); lock2str[locklevel+4], res2str[res]));
switch (res) switch (res)
{ {
case DIDNT_GET_THE_LOCK:
lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
case GOT_THE_LOCK: case GOT_THE_LOCK:
continue; continue;
case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
...@@ -232,7 +234,10 @@ pthread_handler_t test_lockman(void *arg) ...@@ -232,7 +234,10 @@ pthread_handler_t test_lockman(void *arg)
DBUG_ASSERT(res == GOT_THE_LOCK); DBUG_ASSERT(res == GOT_THE_LOCK);
continue; continue;
default: default:
DBUG_ASSERT(0); lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
} }
} }
} }
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
tablockman for row and table locks
*/
//#define EXTRA_VERBOSE //#define EXTRA_VERBOSE
#include <tap.h> #include <tap.h>
...@@ -57,7 +61,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid) ...@@ -57,7 +61,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
#define lock_ok_l(O, R, L) \ #define lock_ok_l(O, R, L) \
test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
#define lock_conflict(O, R, L) \ #define lock_conflict(O, R, L) \
test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
void test_tablockman_simple() void test_tablockman_simple()
{ {
...@@ -165,14 +169,34 @@ void run_test(const char *test, pthread_handler handler, int n, int m) ...@@ -165,14 +169,34 @@ void run_test(const char *test, pthread_handler handler, int n, int m)
my_free((void*)threads, MYF(0)); my_free((void*)threads, MYF(0));
} }
static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
{
TABLE_LOCK_OWNER backup= *lo;
tablockman_release_locks(lm, lo);
/*
pthread_mutex_destroy(lo->mutex);
pthread_cond_destroy(lo->cond);
bzero(lo, sizeof(*lo));
lo->mutex= backup.mutex;
lo->cond= backup.cond;
lo->loid= backup.loid;
pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST);
pthread_cond_init(lo->cond, 0);*/
}
pthread_mutex_t rt_mutex; pthread_mutex_t rt_mutex;
int Nrows= 100; int Nrows= 100;
int Ntables= 10; int Ntables= 10;
int table_lock_ratio= 10; int table_lock_ratio= 10;
enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
char *res2str[4]= { char *res2str[]= {
"DIDN'T GET THE LOCK", 0,
"OUT OF MEMORY",
"DEADLOCK",
"LOCK TIMEOUT",
"GOT THE LOCK", "GOT THE LOCK",
"GOT THE LOCK NEED TO LOCK A SUBRESOURCE", "GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
"GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
...@@ -200,9 +224,9 @@ pthread_handler_t test_lockman(void *arg) ...@@ -200,9 +224,9 @@ pthread_handler_t test_lockman(void *arg)
res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]);
DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
lock2str[locklevel], res2str[res])); lock2str[locklevel], res2str[res]));
if (res == DIDNT_GET_THE_LOCK) if (res < GOT_THE_LOCK)
{ {
tablockman_release_locks(&tablockman, lo1); reinit_tlo(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid)); DIAG(("loid %2d, release all locks", loid));
timeout++; timeout++;
continue; continue;
...@@ -217,11 +241,6 @@ pthread_handler_t test_lockman(void *arg) ...@@ -217,11 +241,6 @@ pthread_handler_t test_lockman(void *arg)
lock2str[locklevel+4], res2str[res])); lock2str[locklevel+4], res2str[res]));
switch (res) switch (res)
{ {
case DIDNT_GET_THE_LOCK:
tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
case GOT_THE_LOCK: case GOT_THE_LOCK:
continue; continue;
case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
...@@ -230,9 +249,9 @@ pthread_handler_t test_lockman(void *arg) ...@@ -230,9 +249,9 @@ pthread_handler_t test_lockman(void *arg)
res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]); res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]);
DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
lock2str[locklevel], res2str[res])); lock2str[locklevel], res2str[res]));
if (res == DIDNT_GET_THE_LOCK) if (res < GOT_THE_LOCK)
{ {
tablockman_release_locks(&tablockman, lo1); reinit_tlo(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid)); DIAG(("loid %2d, release all locks", loid));
timeout++; timeout++;
continue; continue;
...@@ -240,12 +259,15 @@ pthread_handler_t test_lockman(void *arg) ...@@ -240,12 +259,15 @@ pthread_handler_t test_lockman(void *arg)
DBUG_ASSERT(res == GOT_THE_LOCK); DBUG_ASSERT(res == GOT_THE_LOCK);
continue; continue;
default: default:
DBUG_ASSERT(0); reinit_tlo(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
} }
} }
} }
tablockman_release_locks(&tablockman, lo1); reinit_tlo(&tablockman, lo1);
pthread_mutex_lock(&rt_mutex); pthread_mutex_lock(&rt_mutex);
rt_num_threads--; rt_num_threads--;
...@@ -264,7 +286,7 @@ int main() ...@@ -264,7 +286,7 @@ int main()
my_init(); my_init();
pthread_mutex_init(&rt_mutex, 0); pthread_mutex_init(&rt_mutex, 0);
plan(39); plan(40);
if (my_atomic_initialize()) if (my_atomic_initialize())
return exit_status(); return exit_status();
...@@ -299,7 +321,7 @@ int main() ...@@ -299,7 +321,7 @@ int main()
Nrows= 100; Nrows= 100;
Ntables= 10; Ntables= 10;
table_lock_ratio= 10; table_lock_ratio= 10;
//run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
#if 0 #if 0
/* "real-life" simulation - many rows, no table locks */ /* "real-life" simulation - many rows, no table locks */
Nrows= 1000000; Nrows= 1000000;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment