Commit ed567bd2 authored by Guilhem Bichot's avatar Guilhem Bichot

Fix for BUG#39210 "Maria deadlock in _ma_bitmap_wait_or_flush". It was a thread

which nobody woke up (see comment of ma_bitmap.c). No testcase, this requires
multiple threads and is automatically tested at push time by maria_stress.yy (pushbuild2).

storage/maria/ma_bitmap.c:
  * _ma_bitmap_wait_or_flush() didn't publish that it was waiting for bitmap to not
  be over-allocated (i.e. didn't modify bitmap->flush_all_requested) so nobody
  (_ma_bitmap_flushable(), _ma_bitmap_release_unused()) knew it had to wake it up
  => it stalled (BUG#39210). In fact the wait in _ma_bitmap_wait_or_flush()
  is not needed, it's ok if this function sends the over-allocated bitmap to page
  cache and keeps pin on it (_ma_bitmap_unpin_all() will unpin it later, and
  the one who added _ma_bitmap_wait_or_flush() didn't know it). Function
  is thus deleted, as _ma_bitmap_flush() can do its job.
  * After fixing that, test runs longer and BUG 39665 happens, which looks like
  a separate page cache bug.
  * Smaller changes: _ma_bitmap_flush_all() called write_changed_bitmap() even
  though it might not be changed; added some DBUG calls in functions; split
  assertions.
  * In _ma_bitmap_release_unused(), it's more logical to test non_flushable_state
  than now_transactional to know if we have to decrement non_flushable
  (it's exactly per the definition of non_flushable_state).
storage/maria/ma_blockrec.c:
  _ma_bitmap_wait_or_flush() is not needed.
  ******
  new prototype and splitting assertion in three (3rd one fires: BUG 39665)
storage/maria/ma_blockrec.h:
  _ma_bitmap_wait_or_flush() is not needed.
parent 058916ae
......@@ -281,6 +281,14 @@ my_bool _ma_bitmap_end(MARIA_SHARE *share)
by this thread (ie, checking the changed flag is ok). The reason we
check it again in the mutex is that if someone else did a flush at the
same time, we don't have to do the write.
This is also ok for _ma_scan_init_block_record() which does not want to
miss rows: it cares only for committed rows, that is, rows for which there
was a commit before our transaction started; as commit and transaction's
start are protected by the same LOCK_trn_list mutex, we see memory at
least as new as at other transaction's commit time, so if the committed
rows caused bitmap->changed to be true, we see it; if we see 0 it really
means a flush happened since then. So, it's ok to read without bitmap's
mutex.
RETURN
0 ok
......@@ -305,38 +313,6 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *share)
}
/*
@brief Send updated bitmap to the page cache if bitmap is free
@note
This is used by reader threads which don't unpin things
*/
my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share)
{
my_bool res= 0;
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
DBUG_ENTER("_ma_bitmap_flush");
if (bitmap->changed)
{
pthread_mutex_lock(&bitmap->bitmap_lock);
while (bitmap->non_flushable && bitmap->changed)
{
DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
}
if (bitmap->changed)
{
bitmap->changed= 0;
res= write_changed_bitmap(share, bitmap);
}
pthread_mutex_unlock(&bitmap->bitmap_lock);
}
DBUG_RETURN(res);
}
/**
Dirty-page filtering criteria for bitmap pages
......@@ -386,8 +362,11 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
unpinned. We keep the mutex to preserve this situation, and flush to the
file.
*/
if (bitmap->changed)
{
res= write_changed_bitmap(share, bitmap);
bitmap->changed= FALSE;
}
/*
We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
pages have been flushed. That's a condition of correctness of
......@@ -424,6 +403,8 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
@return Operation status
@retval 0 ok
@note This unpins pages pinned by other threads.
*/
static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
......@@ -2139,8 +2120,8 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
function first waits for the flush to be done.
@note
info->non_flushable_state is set to 1 if we have incremented
bitmap->info->non_flushable and not yet decremented it.
this sets info->non_flushable_state to 1 if we have incremented
bitmap->non_flushable and not yet decremented it.
@param share Table's share
@param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable
......@@ -2151,20 +2132,21 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
{
MARIA_SHARE *share= info->s;
MARIA_FILE_BITMAP *bitmap;
DBUG_ENTER("_ma_bitmap_flushable");
/*
Not transactional tables are never automaticly flushed and needs no
protection
*/
if (!share->now_transactional)
return;
DBUG_VOID_RETURN;
bitmap= &share->bitmap;
if (non_flushable_inc == -1)
{
pthread_mutex_lock(&bitmap->bitmap_lock);
DBUG_ASSERT((int) bitmap->non_flushable > 0 &&
info->non_flushable_state == 1);
DBUG_ASSERT((int) bitmap->non_flushable > 0);
DBUG_ASSERT(info->non_flushable_state == 1);
info->non_flushable_state= 0;
if (--bitmap->non_flushable == 0)
{
......@@ -2182,14 +2164,15 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
}
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
pthread_mutex_unlock(&bitmap->bitmap_lock);
return;
DBUG_VOID_RETURN;
}
DBUG_ASSERT(non_flushable_inc == 1 && info->non_flushable_state == 0);
DBUG_ASSERT(non_flushable_inc == 1);
DBUG_ASSERT(info->non_flushable_state == 0);
/* It is a read without mutex because only an optimization */
if (unlikely(bitmap->flush_all_requested))
{
/*
_ma_bitmap_flush_all() is waiting for the bitmap to become
Some other thread is waiting for the bitmap to become
flushable. Not the moment to make the bitmap unflushable or more
unflushable; let's rather back off and wait. If we didn't do this, with
multiple writers, there may always be one thread causing the bitmap to
......@@ -2214,6 +2197,7 @@ void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
bitmap->non_flushable++;
info->non_flushable_state= 1;
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
DBUG_VOID_RETURN;
}
......@@ -2321,10 +2305,10 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
goto err;
}
if (info->s->now_transactional)
/* This duplicates ma_bitmap_flushable(-1) except it already has mutex */
if (info->non_flushable_state)
{
DBUG_ASSERT((int) bitmap->non_flushable >= 0 &&
info->non_flushable_state);
DBUG_ASSERT((int) bitmap->non_flushable >= 0);
info->non_flushable_state= 0;
if (--bitmap->non_flushable == 0)
{
......
......@@ -4982,10 +4982,12 @@ my_bool _ma_scan_init_block_record(MARIA_HA *info)
info->scan.bitmap_pos= info->scan.bitmap_end;
info->scan.bitmap_page= (pgcache_page_no_t) 0 - share->bitmap.pages_covered;
/*
We have to flush bitmap as we will read the bitmap from the page cache
while scanning rows
We need to flush what's in memory (bitmap.map) to page cache otherwise, as
we are going to read bitmaps from page cache in table scan (see
_ma_scan_block_record()), we may miss recently inserted rows (bitmap page
in page cache would be too old).
*/
DBUG_RETURN(_ma_bitmap_wait_or_flush(info->s));
DBUG_RETURN(_ma_bitmap_flush(info->s));
}
......
......@@ -182,7 +182,6 @@ maria_page_get_lsn(uchar *page, pgcache_page_no_t page_no, uchar* data_ptr);
my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
my_bool _ma_bitmap_end(MARIA_SHARE *share);
my_bool _ma_bitmap_flush(MARIA_SHARE *share);
my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share);
my_bool _ma_bitmap_flush_all(MARIA_SHARE *share);
void _ma_bitmap_reset_cache(MARIA_SHARE *share);
my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment