Commit 3f4aa5f7 authored by unknown's avatar unknown

Merge bk-internal.mysql.com:/home/bk/mysql-maria

into  janus.mylan:/usr/home/serg/Abk/mysql-maria

parents 3becab22 915cebdd
......@@ -117,8 +117,9 @@ valgrind_flags="-USAFEMALLOC -UFORCE_INIT_OF_VARS -DHAVE_purify "
valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max"
#
# Used in -debug builds
debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS "
debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS"
debug_cflags="$debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC -DSAFE_MUTEX"
debug_cflags="$debug_cflags -DMY_LF_EXTRA_DEBUG"
error_inject="--with-error-inject "
#
# Base C++ flags for all builds
......
......@@ -32,7 +32,7 @@
#ifdef make_atomic_cas_body
typedef struct { } my_atomic_rwlock_t;
typedef struct { } my_atomic_rwlock_t __attribute__ ((unused));
#define my_atomic_rwlock_destroy(name)
#define my_atomic_rwlock_init(name)
#define my_atomic_rwlock_rdlock(name)
......
......@@ -24,7 +24,7 @@
func() is a _func() protected by my_atomic_rwlock_wrlock()
*/
#define lock_wrap(f,t,proto_args, args, lock) \
#define lock_wrap(f, t, proto_args, args, lock) \
t _ ## f proto_args; \
static inline t f proto_args \
{ \
......@@ -35,7 +35,7 @@ static inline t f proto_args \
return ret; \
}
#define lock_wrap_void(f,proto_args, args, lock) \
#define lock_wrap_void(f, proto_args, args, lock) \
void _ ## f proto_args; \
static inline void f proto_args \
{ \
......@@ -44,14 +44,14 @@ static inline void f proto_args \
my_atomic_rwlock_wrunlock(lock); \
}
#define nolock_wrap(f,t,proto_args, args) \
#define nolock_wrap(f, t, proto_args, args) \
t _ ## f proto_args; \
static inline t f proto_args \
{ \
return _ ## f args; \
}
#define nolock_wrap_void(f,proto_args, args) \
#define nolock_wrap_void(f, proto_args, args) \
void _ ## f proto_args; \
static inline void f proto_args \
{ \
......@@ -80,14 +80,14 @@ void lf_dynarray_destroy(LF_DYNARRAY *array);
nolock_wrap(lf_dynarray_value, void *,
(LF_DYNARRAY *array, uint idx),
(array,idx));
(array, idx));
lock_wrap(lf_dynarray_lvalue, void *,
(LF_DYNARRAY *array, uint idx),
(array,idx),
(array, idx),
&array->lock);
nolock_wrap(lf_dynarray_iterate, int,
(LF_DYNARRAY *array, lf_dynarray_func func, void *arg),
(array,func,arg));
(array, func, arg));
/*
pin manager for memory allocator, lf_alloc-pin.c
......@@ -115,9 +115,14 @@ typedef struct {
uint32 volatile link;
/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */
char pad[128-sizeof(uint32)*2
-sizeof(void *)*(LF_PINBOX_PINS+2)];
-sizeof(LF_PINBOX *)
-sizeof(void *)*(LF_PINBOX_PINS+1)];
} LF_PINS;
/*
shortcut macros to do an atomic_wrlock on a structure that uses pins
(e.g. lf_hash).
*/
#define lf_rwlock_by_pins(PINS) \
my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinstack.lock)
#define lf_rwunlock_by_pins(PINS) \
......@@ -131,11 +136,11 @@ typedef struct {
#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG)
#define LF_REQUIRE_PINS(N) \
static const char require_pins[LF_PINBOX_PINS-N]; \
static const int LF_NUM_PINS_IN_THIS_FILE=N;
static const int LF_NUM_PINS_IN_THIS_FILE= N;
#define _lf_pin(PINS, PIN, ADDR) \
( \
my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)), \
assert(PIN < LF_NUM_PINS_IN_THIS_FILE) \
assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \
my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \
)
#else
#define LF_REQUIRE_PINS(N)
......@@ -151,7 +156,7 @@ typedef struct {
} while (0)
#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL)
#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0)
#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN]==0)
#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0)
void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
lf_pinbox_free_func *free_func, void * free_func_arg);
......@@ -167,16 +172,20 @@ lock_wrap_void(lf_pinbox_put_pins,
&pins->pinbox->pinstack.lock);
lock_wrap_void(lf_pinbox_free,
(LF_PINS *pins, void *addr),
(pins,addr),
(pins, addr),
&pins->pinbox->pinstack.lock);
/*
memory allocator, lf_alloc-pin.c
*/
struct st_lf_alloc_node {
struct st_lf_alloc_node *next;
};
typedef struct st_lf_allocator {
LF_PINBOX pinbox;
void * volatile top;
struct st_lf_alloc_node * volatile top;
uint element_size;
uint32 volatile mallocs;
} LF_ALLOCATOR;
......@@ -184,13 +193,17 @@ typedef struct st_lf_allocator {
void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
void lf_alloc_destroy(LF_ALLOCATOR *allocator);
uint lf_alloc_in_pool(LF_ALLOCATOR *allocator);
/*
shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR
see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
*/
#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR))
#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR))
#define _lf_alloc_get_pins(ALLOC) _lf_pinbox_get_pins(&(ALLOC)->pinbox)
#define lf_alloc_get_pins(ALLOC) lf_pinbox_get_pins(&(ALLOC)->pinbox)
#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS)
#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS)
#define lf_alloc_real_free(ALLOC,ADDR) my_free((gptr)(ADDR), MYF(0))
#define lf_alloc_real_free(ALLOC, ADDR) my_free((gptr)(ADDR), MYF(0))
lock_wrap(lf_alloc_new, void *,
(LF_PINS *pins),
......@@ -222,6 +235,10 @@ void lf_hash_destroy(LF_HASH *hash);
int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data);
void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen);
/*
shortcut macros to access underlying pinbox functions from an LF_HASH
see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
*/
#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc)
#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc)
#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS)
......
......@@ -173,14 +173,6 @@ make_atomic_fas(ptr)
#define LF_BACKOFF (1)
#endif
#if SIZEOF_CHARP == SIZEOF_INT
typedef int intptr;
#elif SIZEOF_CHARP == SIZEOF_LONG
typedef long intptr;
#else
#error
#endif
#define MY_ATOMIC_OK 0
#define MY_ATOMIC_NOT_1CPU 1
extern int my_atomic_initialize();
......
......@@ -432,7 +432,8 @@ C_MODE_END
#define compile_time_assert(X) \
do \
{ \
char compile_time_assert[(X) ? 1 : -1]; \
char compile_time_assert[(X) ? 1 : -1] \
__attribute__ ((unused)); \
} while(0)
/* Go around some bugs in different OS and compilers */
......@@ -964,6 +965,14 @@ typedef unsigned __int64 my_ulonglong;
typedef unsigned long long my_ulonglong;
#endif
#if SIZEOF_CHARP == SIZEOF_INT
typedef int intptr;
#elif SIZEOF_CHARP == SIZEOF_LONG
typedef long intptr;
#else
#error
#endif
#ifdef USE_RAID
/*
The following is done with a if to not get problems with pre-processors
......
......@@ -91,7 +91,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins);
See the latter for details.
*/
void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset,
lf_pinbox_free_func *free_func,void *free_func_arg)
lf_pinbox_free_func *free_func, void *free_func_arg)
{
DBUG_ASSERT(sizeof(LF_PINS) == 128);
DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0);
......@@ -306,7 +306,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins)
{
if (addr) /* use binary search */
{
void **a,**b,**c;
void **a, **b, **c;
for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2)
if (cur == *c)
a= b= c;
......@@ -337,13 +337,13 @@ found:
callback for _lf_pinbox_real_free to free an unpinned object -
add it back to the allocator stack
*/
static void alloc_free(void *node, LF_ALLOCATOR *allocator)
static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator)
{
void *tmp;
struct st_lf_alloc_node *tmp;
tmp= allocator->top;
do
{
(*(void **)node)= tmp;
node->next= tmp;
} while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) &&
LF_BACKOFF);
}
......@@ -379,12 +379,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset)
*/
void lf_alloc_destroy(LF_ALLOCATOR *allocator)
{
void *el= allocator->top;
while (el)
struct st_lf_alloc_node *node= allocator->top;
while (node)
{
void *tmp= *(void **)el;
my_free(el, MYF(0));
el= tmp;
struct st_lf_alloc_node *tmp= node->next;
my_free((void *)node, MYF(0));
node= tmp;
}
lf_pinbox_destroy(&allocator->pinbox);
allocator->top= 0;
......@@ -400,7 +400,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator)
void *_lf_alloc_new(LF_PINS *pins)
{
LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
void *node;
struct st_lf_alloc_node *node;
for (;;)
{
do
......@@ -410,7 +410,8 @@ void *_lf_alloc_new(LF_PINS *pins)
} while (node != allocator->top && LF_BACKOFF);
if (!node)
{
if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL))))
if (!(node= (void *)my_malloc(allocator->element_size,
MYF(MY_WME|MY_ZEROFILL))))
break;
#ifdef MY_LF_EXTRA_DEBUG
my_atomic_add32(&allocator->mallocs, 1);
......@@ -434,8 +435,8 @@ void *_lf_alloc_new(LF_PINS *pins)
uint lf_alloc_in_pool(LF_ALLOCATOR *allocator)
{
uint i;
void *node;
for (node= allocator->top, i= 0; node; node= *(void **)node, i++)
struct st_lf_alloc_node *node;
for (node= allocator->top, i= 0; node; node= node->next, i++)
/* no op */;
return i;
}
......
......@@ -19,9 +19,9 @@
(so no pointer into the array may ever become invalid).
Memory is allocated in non-contiguous chunks.
This data structure is not space efficient for sparce arrays.
This data structure is not space efficient for sparse arrays.
The number of elements is limited to 2^16
The number of elements is limited to 4311810304
Every element is aligned to sizeof(element) boundary
(to avoid false sharing if element is big enough).
......@@ -49,7 +49,8 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size)
static void recursive_free(void **alloc, int level)
{
if (!alloc) return;
if (!alloc)
return;
if (level)
{
......@@ -68,10 +69,9 @@ void lf_dynarray_destroy(LF_DYNARRAY *array)
for (i= 0; i < LF_DYNARRAY_LEVELS; i++)
recursive_free(array->level[i], i);
my_atomic_rwlock_destroy(&array->lock);
bzero(array, sizeof(*array));
}
static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]=
{
0, /* +1 here to to avoid -1's below */
LF_DYNARRAY_LEVEL_LENGTH,
......@@ -82,6 +82,15 @@ static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH
};
static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]=
{
0, /* +1 here to to avoid -1's below */
LF_DYNARRAY_LEVEL_LENGTH,
LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH,
LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH *
LF_DYNARRAY_LEVEL_LENGTH,
};
/*
Returns a valid lvalue pointer to the element number 'idx'.
Allocates memory if necessary.
......@@ -91,16 +100,17 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
void * ptr, * volatile * ptr_ptr= 0;
int i;
for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */;
for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
/* no-op */;
ptr_ptr= &array->level[i];
idx-= dynarray_idxes_in_prev_level[i];
idx-= dynarray_idxes_in_prev_levels[i];
for (; i > 0; i--)
{
if (!(ptr= *ptr_ptr))
{
void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *),
MYF(MY_WME|MY_ZEROFILL));
if (!alloc)
MYF(MY_WME|MY_ZEROFILL));
if (unlikely(!alloc))
return(NULL);
if (my_atomic_casptr(ptr_ptr, &ptr, alloc))
ptr= alloc;
......@@ -116,7 +126,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
max(array->size_of_element, sizeof(void *)),
MYF(MY_WME|MY_ZEROFILL));
if (!alloc)
if (unlikely(!alloc))
return(NULL);
/* reserve the space for free() address */
data= alloc + sizeof(void *);
......@@ -143,9 +153,10 @@ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx)
void * ptr, * volatile * ptr_ptr= 0;
int i;
for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */;
for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--)
/* no-op */;
ptr_ptr= &array->level[i];
idx-= dynarray_idxes_in_prev_level[i];
idx-= dynarray_idxes_in_prev_levels[i];
for (; i > 0; i--)
{
if (!(ptr= *ptr_ptr))
......
......@@ -23,6 +23,7 @@
(but how to do it in lf_hash_delete ?)
*/
#include <my_global.h>
#include <m_string.h>
#include <my_sys.h>
#include <my_bit.h>
#include <lf.h>
......@@ -33,7 +34,7 @@ LF_REQUIRE_PINS(3);
typedef struct {
intptr volatile link; /* a pointer to the next element in a listand a flag */
uint32 hashnr; /* reversed hash number, for sorting */
const uchar *key;
const byte *key;
uint keylen;
} LF_SLIST;
......@@ -67,31 +68,31 @@ typedef struct {
pins[0..2] are used, they are NOT removed on return
*/
static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins)
{
uint32 cur_hashnr;
const uchar *cur_key;
const byte *cur_key;
uint cur_keylen;
intptr link;
retry:
cursor->prev=(intptr *)head;
cursor->prev= (intptr *)head;
do {
cursor->curr=PTR(*cursor->prev);
_lf_pin(pins,1,cursor->curr);
cursor->curr= PTR(*cursor->prev);
_lf_pin(pins, 1, cursor->curr);
} while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
for (;;)
{
if (!cursor->curr)
return 0;
do { // XXX or goto retry ?
link=cursor->curr->link;
cursor->next=PTR(link);
link= cursor->curr->link;
cursor->next= PTR(link);
_lf_pin(pins, 0, cursor->next);
} while(link != cursor->curr->link && LF_BACKOFF);
cur_hashnr=cursor->curr->hashnr;
cur_key=cursor->curr->key;
cur_keylen=cursor->curr->keylen;
cur_hashnr= cursor->curr->hashnr;
cur_key= cursor->curr->key;
cur_keylen= cursor->curr->keylen;
if (*cursor->prev != (intptr)cursor->curr)
{
LF_BACKOFF;
......@@ -101,12 +102,12 @@ retry:
{
if (cur_hashnr >= hashnr)
{
int r=1;
int r= 1;
if (cur_hashnr > hashnr ||
(r=my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
(r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
return !r;
}
cursor->prev=&(cursor->curr->link);
cursor->prev= &(cursor->curr->link);
_lf_pin(pins, 2, cursor->curr);
}
else
......@@ -120,7 +121,7 @@ retry:
goto retry;
}
}
cursor->curr=cursor->next;
cursor->curr= cursor->next;
_lf_pin(pins, 1, cursor->curr);
}
}
......@@ -141,21 +142,21 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
LF_SLIST *node, LF_PINS *pins, uint flags)
{
CURSOR cursor;
int res=-1;
int res= -1;
do
{
if (lfind(head, cs, node->hashnr, node->key, node->keylen,
&cursor, pins) &&
(flags & LF_HASH_UNIQUE))
res=0; /* duplicate found */
res= 0; /* duplicate found */
else
{
node->link=(intptr)cursor.curr;
node->link= (intptr)cursor.curr;
assert(node->link != (intptr)node);
assert(cursor.prev != &node->link);
if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node))
res=1; /* inserted ok */
res= 1; /* inserted ok */
}
} while (res == -1);
_lf_unpin(pins, 0);
......@@ -177,10 +178,10 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
it uses pins[0..2], on return all pins are removed.
*/
static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
const uchar *key, uint keylen, LF_PINS *pins)
const byte *key, uint keylen, LF_PINS *pins)
{
CURSOR cursor;
int res=-1;
int res= -1;
do
{
......@@ -218,30 +219,30 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
all other pins are removed.
*/
static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs,
uint32 hashnr, const uchar *key, uint keylen,
uint32 hashnr, const byte *key, uint keylen,
LF_PINS *pins)
{
CURSOR cursor;
int res=lfind(head, cs, hashnr, key, keylen, &cursor, pins);
int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins);
if (res) _lf_pin(pins, 2, cursor.curr);
_lf_unpin(pins, 0);
_lf_unpin(pins, 1);
return res ? cursor.curr : 0;
}
static inline const uchar* hash_key(const LF_HASH *hash,
const uchar *record, uint *length)
static inline const byte* hash_key(const LF_HASH *hash,
const byte *record, uint *length)
{
if (hash->get_key)
return (*hash->get_key)(record,length,0);
*length=hash->key_length;
return (*hash->get_key)(record, length, 0);
*length= hash->key_length;
return record + hash->key_offset;
}
static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen)
static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen)
{
ulong nr1=1, nr2=4;
hash->charset->coll->hash_sort(hash->charset,key,keylen,&nr1,&nr2);
ulong nr1= 1, nr2= 4;
hash->charset->coll->hash_sort(hash->charset, key, keylen, &nr1, &nr2);
return nr1 & INT_MAX32;
}
......@@ -258,28 +259,28 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
offsetof(LF_SLIST, key));
lf_dynarray_init(&hash->array, sizeof(LF_SLIST **));
hash->size=1;
hash->count=0;
hash->element_size=element_size;
hash->flags=flags;
hash->charset=charset ? charset : &my_charset_bin;
hash->key_offset=key_offset;
hash->key_length=key_length;
hash->get_key=get_key;
hash->size= 1;
hash->count= 0;
hash->element_size= element_size;
hash->flags= flags;
hash->charset= charset ? charset : &my_charset_bin;
hash->key_offset= key_offset;
hash->key_length= key_length;
hash->get_key= get_key;
DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
}
void lf_hash_destroy(LF_HASH *hash)
{
LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0);
LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0);
while (el)
{
intptr next=el->link;
intptr next= el->link;
if (el->hashnr & 1)
lf_alloc_real_free(&hash->alloc, el);
else
my_free((void *)el, MYF(0));
el=(LF_SLIST *)next;
el= (LF_SLIST *)next;
}
lf_alloc_destroy(&hash->alloc);
lf_dynarray_destroy(&hash->array);
......@@ -299,19 +300,19 @@ void lf_hash_destroy(LF_HASH *hash)
*/
int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
{
uint csize, bucket, hashnr;
int csize, bucket, hashnr;
LF_SLIST *node, * volatile *el;
lf_rwlock_by_pins(pins);
node=(LF_SLIST *)_lf_alloc_new(pins);
node= (LF_SLIST *)_lf_alloc_new(pins);
memcpy(node+1, data, hash->element_size);
node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
node->key= hash_key(hash, (byte *)(node+1), &node->keylen);
hashnr= calc_hash(hash, node->key, node->keylen);
bucket= hashnr % hash->size;
el=_lf_dynarray_lvalue(&hash->array, bucket);
el= _lf_dynarray_lvalue(&hash->array, bucket);
if (*el == NULL)
initialize_bucket(hash, el, bucket, pins);
node->hashnr=my_reverse_bits(hashnr) | 1;
node->hashnr= my_reverse_bits(hashnr) | 1;
if (linsert(el, hash->charset, node, pins, hash->flags))
{
_lf_alloc_free(pins, node);
......@@ -335,15 +336,15 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
{
LF_SLIST * volatile *el;
uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen);
uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
bucket= hashnr % hash->size;
lf_rwlock_by_pins(pins);
el=_lf_dynarray_lvalue(&hash->array, bucket);
el= _lf_dynarray_lvalue(&hash->array, bucket);
if (*el == NULL)
initialize_bucket(hash, el, bucket, pins);
if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1,
(uchar *)key, keylen, pins))
(byte *)key, keylen, pins))
{
lf_rwunlock_by_pins(pins);
return 1;
......@@ -360,33 +361,33 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
{
LF_SLIST * volatile *el, *found;
uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen);
uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen);
bucket= hashnr % hash->size;
lf_rwlock_by_pins(pins);
el=_lf_dynarray_lvalue(&hash->array, bucket);
el= _lf_dynarray_lvalue(&hash->array, bucket);
if (*el == NULL)
initialize_bucket(hash, el, bucket, pins);
found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1,
(uchar *)key, keylen, pins);
(byte *)key, keylen, pins);
lf_rwunlock_by_pins(pins);
return found ? found+1 : 0;
}
static char *dummy_key="";
static char *dummy_key= "";
static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
uint bucket, LF_PINS *pins)
{
uint parent= my_clear_highest_bit(bucket);
LF_SLIST *dummy=(LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
LF_SLIST **tmp=0, *cur;
LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent);
LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
LF_SLIST **tmp= 0, *cur;
LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent);
if (*el == NULL && bucket)
initialize_bucket(hash, el, parent, pins);
dummy->hashnr=my_reverse_bits(bucket);
dummy->key=dummy_key;
dummy->keylen=0;
dummy->hashnr= my_reverse_bits(bucket);
dummy->key= dummy_key;
dummy->keylen= 0;
if ((cur= linsert(el, hash->charset, dummy, pins, 0)))
{
my_free((void *)dummy, MYF(0));
......
......@@ -35,10 +35,6 @@ ulonglong my_getsystime()
LARGE_INTEGER t_cnt;
if (!offset)
{
/* strictly speaking there should be a mutex to protect
initialization section. But my_getsystime() is called from
UUID() code, and UUID() calls are serialized with a mutex anyway
*/
LARGE_INTEGER li;
FILETIME ft;
GetSystemTimeAsFileTime(&ft);
......
......@@ -32,7 +32,7 @@
SLX - Shared + Loose eXclusive
LSIX - Loose Shared + Intention eXclusive
*/
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX };
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
struct lockman_lock;
......@@ -55,9 +55,10 @@ typedef struct {
uint lock_timeout;
loid_to_lo_func *loid_to_lo;
} LOCKMAN;
#define DIDNT_GET_THE_LOCK 0
enum lockman_getlock_result {
DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK,
NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
GOT_THE_LOCK,
GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
};
......
// TODO - allocate everything from dynarrays !!! (benchmark)
// automatically place S instead of LS if possible
#warning TODO - allocate everything from dynarrays !!! (benchmark)
#warning automatically place S instead of LS if possible
/* Copyright (C) 2006 MySQL AB
This program is free software; you can redistribute it and/or modify
......@@ -16,10 +16,8 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
#include <my_sys.h>
#include <my_bit.h>
#include <lf.h>
#include <my_base.h>
#include <hash.h>
#include "tablockman.h"
/*
......@@ -53,45 +51,54 @@
resource, check if a conflicting lock exists, if yes - find who owns it.
Solution: every resource has a structure with
1. Hash of "active" (see below for the description of "active") granted
locks with loid as a key. Thus, checking if a given transaction has a
lock on this resource is O(1) operation.
1. Hash of latest (see the lock upgrade section below) granted locks with
loid as a key. Thus, checking if a given transaction has a lock on
this resource is O(1) operation.
2. Doubly-linked lists of all granted locks - one list for every lock
type. Thus, checking if a conflicting lock exists is a check whether
an appropriate list head pointer is not null, also O(1).
3. Every lock has a loid of the owner, thus checking who owns a
conflicting lock is also O(1).
4. Deque of waiting locks. It's a deque not a fifo, because for lock
upgrades requests are added to the queue head, not tail. There's never
a need to scan the queue.
Result: adding or removing a lock is always a O(1) operation, it does not
depend on the number of locks on the resource, or number of transactions,
or number of resources. It _does_ depend on the number of different lock
levels - O(number_of_lock_levels) - but it's a constant.
4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo,
because for lock upgrades requests are added to the queue head, not
tail. This is a single place where there it gets O(N) on number
of locks - when a transaction wakes up from waiting on a condition,
it may need to scan the queue backward to the beginning to find
a conflicting lock. It is guaranteed though that "all transactions
before it" received the same - or earlier - signal. In other words a
transaction needs to scan all transactions before it that received the
signal but didn't have a chance to resume the execution yet, so
practically OS scheduler won't let the scan to be O(N).
Waiting: if there is a conflicting lock or if wait queue is not empty, a
requested lock cannot be granted at once. It is added to the end of the
wait queue. If there is a conflicting lock - the "blocker" transaction is
the owner of this lock. If there's no conflict but a queue was not empty,
than the "blocker" is the transaction that the owner of the lock at the
end of the queue is waiting for (in other words, our lock is added to the
end of the wait queue, and our blocker is the same as of the lock right
before us).
wait queue. If a queue was empty and there is a conflicting lock - the
"blocker" transaction is the owner of this lock. If a queue is not empty,
an owner of the previous lock in the queue is the "blocker". But if the
previous lock is compatible with the request, then the "blocker" is the
transaction that the owner of the lock at the end of the queue is waiting
for (in other words, our lock is added to the end of the wait queue, and
our blocker is the same as of the lock right before us).
Lock upgrades: when a thread that has a lock on a given resource,
requests a new lock on the same resource and the old lock is not enough
to satisfy new lock requirements (which is defined by
lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock
(defineded by lock_combining_matrix as above) is placed. Depending on
other granted locks it is immediately active or it has to wait. Here the
(defined by lock_combining_matrix as above) is placed. Depending on
other granted locks it is immediately granted or it has to wait. Here the
lock is added to the start of the waiting queue, not to the end. Old
lock, is removed from the hash, but not from the doubly-linked lists.
(indeed, a transaction checks "do I have a lock on this resource ?" by
looking in a hash, and it should find a latest lock, so old locks must be
removed; but a transaction checks "are the conflicting locks ?" by
removed; but a transaction checks "are there conflicting locks ?" by
checking doubly-linked lists, it doesn't matter if it will find an old
lock - if it would be removed, a new lock would be also a conflict).
So, a hash contains only "latest" locks - there can be only one latest
lock per resource per transaction. But doubly-linked lists contain all
locks, even "obsolete" ones, because it doesnt't hurt. Note that old
locks can not be freed early, in particular they stay in the
'active_locks' list of a lock owner, because they may be "re-enabled"
on a savepoint rollback.
To better support table-row relations where one needs to lock the table
with an intention lock before locking the row, extended diagnostics is
......@@ -107,6 +114,18 @@
Instant duration locks are not supported. Though they're trivial to add,
they are normally only used on rows, not on tables. So, presumably,
they are not needed here.
Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes
(TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex).
table mutex protects operations on the table lock structures, and lock
owner pointers waiting_for and waiting_for_loid.
lock owner mutex is only used to wait on lock owner condition
(TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock
structures, and only lock owner itself may access them.
The pool mutex protects a pool of unused locks. Note the locking order:
first the table mutex, then the owner mutex or a pool mutex.
Table mutex lock cannot be attempted when owner or pool mutex are locked.
No mutex lock can be attempted if owner or pool mutex are locked.
*/
/*
......@@ -122,9 +141,9 @@
0 - incompatible
-1 - "impossible", so that we can assert the impossibility.
*/
static int lock_compatibility_matrix[10][10]=
static const int lock_compatibility_matrix[10][10]=
{ /* N S X IS IX SIX LS LX SLX LSIX */
{ -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */
{ -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */
{ -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */
{ -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */
......@@ -144,18 +163,18 @@ static int lock_compatibility_matrix[10][10]=
One should never get N from it, we assert the impossibility
*/
static enum lock_type lock_combining_matrix[10][10]=
static const enum lock_type lock_combining_matrix[10][10]=
{/* N S X IS IX SIX LS LX SLX LSIX */
{ N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */
{ S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
{ X, X, X, X, X, X, X, X, X, X}, /* X */
{ IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
{ IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
{ SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
{ LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
{ LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
{ SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
{ LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
{ N, N, N, N, N, N, N, N, N, N}, /* N */
{ N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */
{ N, X, X, X, X, X, X, X, X, X}, /* X */
{ N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */
{ N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */
{ N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */
{ N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */
{ N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */
{ N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */
{ N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */
};
/*
......@@ -176,7 +195,7 @@ static enum lock_type lock_combining_matrix[10][10]=
#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
#define A GOT_THE_LOCK
#define x GOT_THE_LOCK
static enum lockman_getlock_result getlock_result[10][10]=
static const enum lockman_getlock_result getlock_result[10][10]=
{/* N S X IS IX SIX LS LX SLX LSIX */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */
{ 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */
......@@ -200,37 +219,47 @@ static enum lockman_getlock_result getlock_result[10][10]=
*/
struct st_table_lock {
#warning do we need upgraded_from ?
struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev;
struct st_locked_table *table;
uint16 loid;
char lock_type;
uchar lock_type;
};
#define hash_insert my_hash_insert /* for consistency :) */
#define remove_from_wait_queue(LOCK, TABLE) \
do \
{ \
if ((LOCK)->prev) \
{ \
DBUG_ASSERT((TABLE)->wait_queue_out != (LOCK)); \
(LOCK)->prev->next= (LOCK)->next; \
} \
else \
{ \
DBUG_ASSERT((TABLE)->wait_queue_out == (LOCK)); \
(TABLE)->wait_queue_out= (LOCK)->next; \
} \
if ((LOCK)->next) \
{ \
DBUG_ASSERT((TABLE)->wait_queue_in != (LOCK)); \
(LOCK)->next->prev= (LOCK)->prev; \
} \
else \
{ \
DBUG_ASSERT((TABLE)->wait_queue_in == (LOCK)); \
(TABLE)->wait_queue_in= (LOCK)->prev; \
} \
} while (0)
static inline
TABLE_LOCK *find_loid(LOCKED_TABLE *table, uint16 loid)
{
return (TABLE_LOCK *)hash_search(& table->latest_locks,
(byte *)& loid, sizeof(loid));
}
static inline
void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table)
{
DBUG_ASSERT(table == lock->table);
if (lock->prev)
{
DBUG_ASSERT(table->wait_queue_out != lock);
lock->prev->next= lock->next;
}
else
{
DBUG_ASSERT(table->wait_queue_out == lock);
table->wait_queue_out= lock->next;
}
if (lock->next)
{
DBUG_ASSERT(table->wait_queue_in != lock);
lock->next->prev= lock->prev;
}
else
{
DBUG_ASSERT(table->wait_queue_in == lock);
table->wait_queue_in= lock->prev;
}
}
/*
DESCRIPTION
......@@ -243,24 +272,31 @@ enum lockman_getlock_result
tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
LOCKED_TABLE *table, enum lock_type lock)
{
TABLE_LOCK *old, *new, *blocker;
TABLE_LOCK *old, *new, *blocker, *blocker2;
TABLE_LOCK_OWNER *wait_for;
ulonglong deadline;
struct timespec timeout;
enum lock_type new_lock;
enum lockman_getlock_result res;
int i;
DBUG_ASSERT(lo->waiting_lock == 0);
DBUG_ASSERT(lo->waiting_for == 0);
DBUG_ASSERT(lo->waiting_for_loid == 0);
pthread_mutex_lock(& table->mutex);
/* do we alreasy have a lock on this resource ? */
old= (TABLE_LOCK *)hash_search(& table->active, (byte *)&lo->loid,
sizeof(lo->loid));
/* do we already have a lock on this resource ? */
old= find_loid(table, lo->loid);
/* and if yes, is it enough to satisfy the new request */
if (old && lock_combining_matrix[old->lock_type][lock] == old->lock_type)
/* calculate the level of the upgraded lock, if yes */
new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock;
/* and check if old lock is enough to satisfy the new request */
if (old && new_lock == old->lock_type)
{
/* yes */
pthread_mutex_unlock(& table->mutex);
return getlock_result[old->lock_type][lock];
res= getlock_result[old->lock_type][lock];
goto ret;
}
/* no, placing a new lock. first - take a free lock structure from the pool */
......@@ -275,48 +311,81 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
{
pthread_mutex_unlock(& lm->pool_mutex);
new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME));
if (!new)
if (unlikely(!new))
{
pthread_mutex_unlock(& table->mutex);
return DIDNT_GET_THE_LOCK;
res= NO_MEMORY_FOR_LOCK;
goto ret;
}
}
/* calculate the level of the upgraded lock */
new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock;
new->loid= lo->loid;
new->lock_type= new_lock;
new->table= table;
/* and try to place it */
for (new->prev= table->wait_queue_in ; ; )
for (new->prev= table->wait_queue_in;;)
{
/* waiting queue is not empty and we're not upgrading */
if (!old && new->prev)
wait_for= 0;
if (!old)
{
/* need to wait */
DBUG_ASSERT(table->wait_queue_out);
DBUG_ASSERT(table->wait_queue_in);
blocker= new->prev;
/* wait for a previous lock in the queue or for a lock it's waiting for */
if (lock_compatibility_matrix[blocker->lock_type][lock])
wait_for= lm->loid_to_tlo(blocker->loid)->waiting_for;
else
wait_for= lm->loid_to_tlo(blocker->loid);
/* not upgrading - a lock must be added to the _end_ of the wait queue */
for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev)
{
TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid);
/* find a blocking lock */
DBUG_ASSERT(table->wait_queue_out);
DBUG_ASSERT(table->wait_queue_in);
if (!lock_compatibility_matrix[blocker->lock_type][lock])
{
/* found! */
wait_for= tmp;
}
else
{
/*
hmm, the lock before doesn't block us, let's look one step further.
the condition below means:
if we never waited on a condition yet
OR
the lock before ours (blocker) waits on a lock (blocker2) that is
present in the hash AND and conflicts with 'blocker'
the condition after OR may fail if 'blocker2' was removed from
the hash, its signal woke us up, but 'blocker' itself didn't see
the signal yet.
*/
if (!lo->waiting_lock ||
((blocker2= find_loid(table, tmp->waiting_for_loid)) &&
!lock_compatibility_matrix[blocker2->lock_type]
[blocker->lock_type]))
{
/* but it's waiting for a real lock. we'll wait for the same lock */
wait_for= tmp->waiting_for;
}
/*
otherwise - a lock it's waiting for doesn't exist.
We've no choice but to scan the wait queue backwards, looking
for a conflicting lock or a lock waiting for a real lock.
QQ is there a way to avoid this scanning ?
*/
}
}
}
else
if (wait_for == 0)
{
/* checking for compatibility with existing locks */
for (blocker= 0, i= 0; i < LOCK_TYPES; i++)
{
if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock])
{
/* the first lock in the list may be our own - skip it */
for (blocker= table->active_locks[i];
blocker && blocker->loid == lo->loid;
blocker= blocker->next) /* no-op */;
if (blocker)
blocker= table->active_locks[i];
/* if the first lock in the list is our own - skip it */
if (blocker->loid == lo->loid)
blocker= blocker->next;
if (blocker) /* found a conflicting lock, need to wait */
break;
}
}
......@@ -327,6 +396,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
/* ok, we're here - the wait is inevitable */
lo->waiting_for= wait_for;
lo->waiting_for_loid= wait_for->loid;
if (!lo->waiting_lock) /* first iteration of the for() loop */
{
/* lock upgrade or new lock request ? */
......@@ -338,7 +408,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
new->next->prev= new;
table->wait_queue_out= new;
if (!table->wait_queue_in)
table->wait_queue_in=table->wait_queue_out;
table->wait_queue_in= table->wait_queue_out;
}
else
{
......@@ -348,7 +418,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
new->prev->next= new;
table->wait_queue_in= new;
if (!table->wait_queue_out)
table->wait_queue_out=table->wait_queue_in;
table->wait_queue_out= table->wait_queue_in;
}
lo->waiting_lock= new;
......@@ -356,22 +426,28 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
timeout.tv_sec= deadline/10000000;
timeout.tv_nsec= (deadline % 10000000) * 100;
}
else
{
if (my_getsystime() > deadline)
{
pthread_mutex_unlock(& table->mutex);
return DIDNT_GET_THE_LOCK;
}
}
/* now really wait */
/*
prepare to wait.
we must lock blocker's mutex to wait on blocker's cond.
and we must release table's mutex.
note that blocker's mutex is locked _before_ table's mutex is released
*/
pthread_mutex_lock(wait_for->mutex);
pthread_mutex_unlock(& table->mutex);
pthread_cond_timedwait(wait_for->cond, wait_for->mutex, &timeout);
/* now really wait */
i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout);
pthread_mutex_unlock(wait_for->mutex);
if (i == ETIMEDOUT || i == ETIME)
{
/* we rely on the caller to rollback and release all locks */
res= LOCK_TIMEOUT;
goto ret2;
}
pthread_mutex_lock(& table->mutex);
/* ... and repeat from the beginning */
......@@ -384,6 +460,7 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
remove_from_wait_queue(new, table);
lo->waiting_lock= 0;
lo->waiting_for= 0;
lo->waiting_for_loid= 0;
}
/* add it to the list of all locks of this lock owner */
......@@ -396,20 +473,20 @@ tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo,
new->next->prev= new;
table->active_locks[new_lock-1]= new;
/* remove the old lock from the hash, if upgrading */
/* update the latest_locks hash */
if (old)
{
new->upgraded_from= old;
hash_delete(& table->active, (byte *)old);
}
else
new->upgraded_from= 0;
hash_delete(& table->latest_locks, (byte *)old);
hash_insert(& table->latest_locks, (byte *)new);
/* and add a new lock to the hash, voila */
hash_insert(& table->active, (byte *)new);
new->upgraded_from= old;
res= getlock_result[lock][lock];
ret:
pthread_mutex_unlock(& table->mutex);
return getlock_result[lock][lock];
ret2:
DBUG_ASSERT(res);
return res;
}
/*
......@@ -443,6 +520,17 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
Signal our blocker to release this next lock (after we removed our
lock from the wait queue, of course).
*/
/*
An example to clarify the above:
trn1> S-lock the table. Granted.
trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1
trn3> IS-lock the table. The queue is not empty, so IS-lock is added
to the queue. It's compatible with the waiting IX-lock, so trn3
waits for trn2->waiting_for, that is trn1.
if trn1 releases the lock it signals trn1->cond and both waiting
transactions are awaken. But if trn2 times out, trn3 must be notified
too (as IS and S locks are compatible). So trn2 must signal trn1->cond.
*/
if (lock->prev &&
lock_compatibility_matrix[lock->prev->lock_type][lock->lock_type])
{
......@@ -451,6 +539,7 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
pthread_mutex_unlock(lo->waiting_for->mutex);
}
lo->waiting_for= 0;
lo->waiting_for_loid= 0;
pthread_mutex_unlock(& lock->table->mutex);
lock->next= local_pool;
......@@ -465,11 +554,12 @@ void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
pthread_mutex_t *mutex= & lock->table->mutex;
DBUG_ASSERT(cur->loid == lo->loid);
DBUG_ASSERT(lock != lock->next_in_lo);
lock= lock->next_in_lo;
/* TODO ? group locks by table to reduce the number of mutex locks */
pthread_mutex_lock(mutex);
hash_delete(& cur->table->active, (byte *)cur);
hash_delete(& cur->table->latest_locks, (byte *)cur);
if (cur->prev)
cur->prev->next= cur->next;
......@@ -506,7 +596,8 @@ void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout)
lm->pool= 0;
lm->loid_to_tlo= func;
lm->lock_timeout= timeout;
pthread_mutex_init(&lm->pool_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST);
my_getsystime(); /* ensure that my_getsystime() is initialized */
}
void tablockman_destroy(TABLOCKMAN *lm)
......@@ -517,36 +608,54 @@ void tablockman_destroy(TABLOCKMAN *lm)
lm->pool= tmp->next;
my_free((void *)tmp, MYF(0));
}
pthread_mutex_destroy(&lm->pool_mutex);
pthread_mutex_destroy(& lm->pool_mutex);
}
/*
initialize a LOCKED_TABLE structure
SYNOPSYS
lt a LOCKED_TABLE to initialize
initial_hash_size initial size for 'latest_locks' hash
*/
void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size)
{
TABLE_LOCK *unused;
bzero(lt, sizeof(*lt));
pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST);
hash_init(& lt->active, &my_charset_bin, initial_hash_size,
offsetof(TABLE_LOCK, loid), sizeof(unused->loid), 0, 0, 0);
hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size,
offsetof(TABLE_LOCK, loid),
sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0);
}
void tablockman_destroy_locked_table(LOCKED_TABLE *lt)
{
hash_free(& lt->active);
int i;
DBUG_ASSERT(lt->wait_queue_out == 0);
DBUG_ASSERT(lt->wait_queue_in == 0);
DBUG_ASSERT(lt->latest_locks.records == 0);
for (i= 0; i<LOCK_TYPES; i++)
DBUG_ASSERT(lt->active_locks[i] == 0);
hash_free(& lt->latest_locks);
pthread_mutex_destroy(& lt->mutex);
}
#ifdef EXTRA_DEBUG
static char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX",
static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX",
"LS", "LX", "SLX", "LSIX"};
void print_tlo(TABLE_LOCK_OWNER *lo)
void tablockman_print_tlo(TABLE_LOCK_OWNER *lo)
{
TABLE_LOCK *lock;
printf("lo%d>", lo->loid);
if ((lock= lo->waiting_lock))
printf(" (%s.%p)", lock2str[lock->lock_type], lock->table);
for (lock= lo->active_locks; lock && lock != lock->next_in_lo; lock= lock->next_in_lo)
printf(" %s.%p", lock2str[lock->lock_type], lock->table);
printf(" (%s.0x%lx)", lock2str[lock->lock_type], (intptr)lock->table);
for (lock= lo->active_locks;
lock && lock != lock->next_in_lo;
lock= lock->next_in_lo)
printf(" %s.0x%lx", lock2str[lock->lock_type], (intptr)lock->table);
if (lock && lock == lock->next_in_lo)
printf("!");
printf("\n");
......
......@@ -33,45 +33,45 @@
LSIX - Loose Shared + Intention eXclusive
*/
#ifndef _lockman_h
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX };
#warning TODO remove N-locks
enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST };
enum lockman_getlock_result {
DIDNT_GET_THE_LOCK=0, GOT_THE_LOCK,
NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT,
GOT_THE_LOCK,
GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE,
GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE
};
#endif
#define LOCK_TYPES LSIX
#define LOCK_TYPES (LOCK_TYPE_LAST-1)
typedef struct st_table_lock_owner TABLE_LOCK_OWNER;
typedef struct st_table_lock TABLE_LOCK;
typedef struct st_locked_table LOCKED_TABLE;
typedef struct st_table_lock_owner {
TABLE_LOCK *active_locks; /* list of active locks */
TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */
struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */
pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */
pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */
} TABLE_LOCK_OWNER;
typedef struct st_locked_table {
pthread_mutex_t mutex; /* mutex for everything below */
HASH latest_locks; /* latest locks in a hash */
TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */
TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/
} LOCKED_TABLE;
typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16);
typedef struct {
pthread_mutex_t pool_mutex;
TABLE_LOCK *pool; /* lifo pool of free locks */
uint lock_timeout;
loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */
TABLE_LOCK *pool; /* lifo pool of free locks */
uint lock_timeout; /* lock timeout in milliseconds */
loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */
} TABLOCKMAN;
struct st_table_lock_owner {
TABLE_LOCK *active_locks; /* list of active locks */
TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */
TABLE_LOCK_OWNER *waiting_for; /* transaction we're wating for */
pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */
pthread_mutex_t *mutex; /* mutex is required to use 'cond' */
uint16 loid; /* Lock Owner IDentifier */
};
struct st_locked_table {
pthread_mutex_t mutex; /* mutex for everything below */
HASH active; /* active locks ina hash */
TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */
TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque */
};
void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint);
void tablockman_destroy(TABLOCKMAN *);
enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *,
......@@ -81,7 +81,7 @@ void tablockman_init_locked_table(LOCKED_TABLE *, int);
void tablockman_destroy_locked_table(LOCKED_TABLE *);
#ifdef EXTRA_DEBUG
void print_tlo(TABLE_LOCK_OWNER *);
void tablockman_print_tlo(TABLE_LOCK_OWNER *);
#endif
#endif
......
......@@ -69,7 +69,8 @@ static TRN *short_trid_to_TRN(uint16 short_trid)
return (TRN *)trn;
}
static byte *trn_get_hash_key(const byte *trn, uint* len, my_bool unused)
static byte *trn_get_hash_key(const byte *trn, uint* len,
my_bool unused __attribute__ ((unused)))
{
*len= sizeof(TrID);
return (byte *) & ((*((TRN **)trn))->trid);
......
......@@ -14,6 +14,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
lockman for row and table locks
*/
//#define EXTRA_VERBOSE
#include <tap.h>
......
......@@ -14,6 +14,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
lockman for row locks, tablockman for table locks
*/
//#define EXTRA_VERBOSE
#include <tap.h>
......@@ -64,7 +68,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
#define lock_ok_l(O, R, L) \
test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
#define lock_conflict(O, R, L) \
test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK);
test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
void test_tablockman_simple()
{
......@@ -164,8 +168,11 @@ int Ntables= 10;
int table_lock_ratio= 10;
enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
char *res2str[4]= {
char *res2str[]= {
"DIDN'T GET THE LOCK",
"OUT OF MEMORY",
"DEADLOCK",
"LOCK TIMEOUT",
"GOT THE LOCK",
"GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
"GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
......@@ -191,7 +198,7 @@ pthread_handler_t test_lockman(void *arg)
res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]);
DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
lock2str[locklevel], res2str[res]));
if (res == DIDNT_GET_THE_LOCK)
if (res < GOT_THE_LOCK)
{
lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
......@@ -208,11 +215,6 @@ pthread_handler_t test_lockman(void *arg)
lock2str[locklevel+4], res2str[res]));
switch (res)
{
case DIDNT_GET_THE_LOCK:
lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
case GOT_THE_LOCK:
continue;
case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
......@@ -232,7 +234,10 @@ pthread_handler_t test_lockman(void *arg)
DBUG_ASSERT(res == GOT_THE_LOCK);
continue;
default:
DBUG_ASSERT(0);
lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
}
}
}
......
......@@ -14,6 +14,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
tablockman for row and table locks
*/
//#define EXTRA_VERBOSE
#include <tap.h>
......@@ -57,7 +61,7 @@ TABLE_LOCK_OWNER *loid2lo1(uint16 loid)
#define lock_ok_l(O, R, L) \
test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE)
#define lock_conflict(O, R, L) \
test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK);
test_lock(O, R, L, "cannot ", LOCK_TIMEOUT);
void test_tablockman_simple()
{
......@@ -165,14 +169,34 @@ void run_test(const char *test, pthread_handler handler, int n, int m)
my_free((void*)threads, MYF(0));
}
static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo)
{
TABLE_LOCK_OWNER backup= *lo;
tablockman_release_locks(lm, lo);
/*
pthread_mutex_destroy(lo->mutex);
pthread_cond_destroy(lo->cond);
bzero(lo, sizeof(*lo));
lo->mutex= backup.mutex;
lo->cond= backup.cond;
lo->loid= backup.loid;
pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST);
pthread_cond_init(lo->cond, 0);*/
}
pthread_mutex_t rt_mutex;
int Nrows= 100;
int Ntables= 10;
int table_lock_ratio= 10;
enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX};
char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"};
char *res2str[4]= {
"DIDN'T GET THE LOCK",
char *res2str[]= {
0,
"OUT OF MEMORY",
"DEADLOCK",
"LOCK TIMEOUT",
"GOT THE LOCK",
"GOT THE LOCK NEED TO LOCK A SUBRESOURCE",
"GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"};
......@@ -200,9 +224,9 @@ pthread_handler_t test_lockman(void *arg)
res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]);
DIAG(("loid %2d, table %d, lock %s, res %s", loid, table,
lock2str[locklevel], res2str[res]));
if (res == DIDNT_GET_THE_LOCK)
if (res < GOT_THE_LOCK)
{
tablockman_release_locks(&tablockman, lo1);
reinit_tlo(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
......@@ -217,11 +241,6 @@ pthread_handler_t test_lockman(void *arg)
lock2str[locklevel+4], res2str[res]));
switch (res)
{
case DIDNT_GET_THE_LOCK:
tablockman_release_locks(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
case GOT_THE_LOCK:
continue;
case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE:
......@@ -230,9 +249,9 @@ pthread_handler_t test_lockman(void *arg)
res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]);
DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row,
lock2str[locklevel], res2str[res]));
if (res == DIDNT_GET_THE_LOCK)
if (res < GOT_THE_LOCK)
{
tablockman_release_locks(&tablockman, lo1);
reinit_tlo(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
......@@ -240,12 +259,15 @@ pthread_handler_t test_lockman(void *arg)
DBUG_ASSERT(res == GOT_THE_LOCK);
continue;
default:
DBUG_ASSERT(0);
reinit_tlo(&tablockman, lo1);
DIAG(("loid %2d, release all locks", loid));
timeout++;
continue;
}
}
}
tablockman_release_locks(&tablockman, lo1);
reinit_tlo(&tablockman, lo1);
pthread_mutex_lock(&rt_mutex);
rt_num_threads--;
......@@ -264,7 +286,7 @@ int main()
my_init();
pthread_mutex_init(&rt_mutex, 0);
plan(39);
plan(40);
if (my_atomic_initialize())
return exit_status();
......@@ -299,7 +321,7 @@ int main()
Nrows= 100;
Ntables= 10;
table_lock_ratio= 10;
//run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES);
#if 0
/* "real-life" simulation - many rows, no table locks */
Nrows= 1000000;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment