Commit 1cc48d16 authored by unknown's avatar unknown

WL#3072 Maria Recovery.

Updates to the bitmap flush/pin logic to prepare for when we support
multiple writers.


storage/maria/ma_bitmap.c:
  Read lock is less bad than write lock.
  Changing bitmap->flushable to a counter, to prepare for when multiple
  writers are allowed on a table.
  Using bitmap->flush_all_requested instead of share->in_checkpoint; the
  latter can be true for the time of a whole checkpoint even though
  the checkpoint is not yet handling our table, or has already handled it,
  so to decrease the number of broadcasts we use a dedicated my_bool
  which is true only when checkpoint is handling this table's bitmap.
  _ma_bitmap_flushable(share,+1) waits for a concurrent _ma_bitmap_flush_all()
  to finish before incrementing non_flushable; without this, with multiple
  writers there may always be one thread making the bitmap unflushable
  and thus checkpoint would stall.
storage/maria/ma_blockrec.c:
  update to new prototype: "flushable is FALSE|TRUE" becomes "add 1|-1 to
  non_flushable".
storage/maria/ma_blockrec.h:
  new prototype
storage/maria/maria_def.h:
  MARIA_FILE_BITMAP::flushable becomes a counter.
  New MARIA_FILE_BITMAP::flush_all_requested.
parent 105ce538
...@@ -147,8 +147,8 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, ...@@ -147,8 +147,8 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
{ {
DBUG_ENTER("write_changed_bitmap"); DBUG_ENTER("write_changed_bitmap");
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
DBUG_PRINT("info", ("bitmap->flushable: %d", bitmap->flushable)); DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
if (bitmap->flushable if ((bitmap->non_flushable == 0)
#ifdef WRONG_BITMAP_FLUSH #ifdef WRONG_BITMAP_FLUSH
|| 1 || 1
#endif #endif
...@@ -180,7 +180,7 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, ...@@ -180,7 +180,7 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
int res= pagecache_write(share->pagecache, int res= pagecache_write(share->pagecache,
&bitmap->file, bitmap->page, 0, &bitmap->file, bitmap->page, 0,
(uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE, (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_WRITE, PAGECACHE_PIN, PAGECACHE_LOCK_READ, PAGECACHE_PIN,
PAGECACHE_WRITE_DELAY, &page_link.link, PAGECACHE_WRITE_DELAY, &page_link.link,
LSN_IMPOSSIBLE); LSN_IMPOSSIBLE);
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
...@@ -231,7 +231,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) ...@@ -231,7 +231,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
The +1 is to add the bitmap page, as this doesn't have to be covered The +1 is to add the bitmap page, as this doesn't have to be covered
*/ */
bitmap->pages_covered= aligned_bit_blocks * 16 + 1; bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
bitmap->flushable= TRUE; bitmap->flush_all_requested= bitmap->non_flushable= 0;
/* Update size for bits */ /* Update size for bits */
/* TODO; Make this dependent of the row size */ /* TODO; Make this dependent of the row size */
...@@ -350,8 +350,9 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) ...@@ -350,8 +350,9 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
pthread_mutex_lock(&bitmap->bitmap_lock); pthread_mutex_lock(&bitmap->bitmap_lock);
if (bitmap->changed) if (bitmap->changed)
{ {
bitmap->flush_all_requested= TRUE;
#ifndef WRONG_BITMAP_FLUSH #ifndef WRONG_BITMAP_FLUSH
while (!bitmap->flushable) while (bitmap->non_flushable > 0)
{ {
DBUG_PRINT("info", ("waiting for bitmap to be flushable")); DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
...@@ -373,7 +374,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) ...@@ -373,7 +374,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
bitmap page was not flushed, as the REDOs about it will be skipped, it bitmap page was not flushed, as the REDOs about it will be skipped, it
will wrongly not be recovered. If bitmap pages had a rec_lsn it would will wrongly not be recovered. If bitmap pages had a rec_lsn it would
be different. be different.
There should be no pinned pages as bitmap->flushable is true. There should be no pinned pages as bitmap->non_flushable==0.
*/ */
if (flush_pagecache_blocks_with_filter(share->pagecache, if (flush_pagecache_blocks_with_filter(share->pagecache,
&bitmap->file, FLUSH_KEEP, &bitmap->file, FLUSH_KEEP,
...@@ -381,6 +382,13 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share) ...@@ -381,6 +382,13 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
&bitmap->pages_covered) & &bitmap->pages_covered) &
PCFLUSH_PINNED_AND_ERROR) PCFLUSH_PINNED_AND_ERROR)
res= TRUE; res= TRUE;
bitmap->flush_all_requested= FALSE;
/*
Some well-behaved threads may be waiting for flush_all_requested to
become false, wake them up.
*/
DBUG_PRINT("info", ("bitmap flusher waking up others"));
pthread_cond_broadcast(&bitmap->bitmap_cond);
} }
pthread_mutex_unlock(&bitmap->bitmap_lock); pthread_mutex_unlock(&bitmap->bitmap_lock);
DBUG_RETURN(res); DBUG_RETURN(res);
...@@ -2108,42 +2116,69 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info, ...@@ -2108,42 +2116,69 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
/** /**
Make a transition of MARIA_FILE_BITMAP::flushable. Make a transition of MARIA_FILE_BITMAP::non_flushable.
If the bitmap becomes flushable, which requires that REDO-UNDO has been If the bitmap becomes flushable, which requires that REDO-UNDO has been
logged and all bitmap pages touched by the thread have a correct logged and all bitmap pages touched by the thread have a correct
allocation, it unpins all bitmap pages, and if checkpoint is waiting, it allocation, it unpins all bitmap pages, and if _ma_bitmap_flush_all() is
wakes it up. waiting (in practice it is a checkpoint), it wakes it up.
If the bitmap becomes unflushable, it just records it. If the bitmap becomes or stays unflushable, the function merely records it
unless a concurrent _ma_bitmap_flush_all() is happening, in which case the
function first waits for the flush to be done.
@param share Table's share @param share Table's share
@param flushable New state @param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable
(-1 or +1).
*/ */
void _ma_bitmap_flushable(MARIA_SHARE *share, my_bool flushable) void _ma_bitmap_flushable(MARIA_SHARE *share, int non_flushable_inc)
{ {
MARIA_FILE_BITMAP *bitmap= &share->bitmap; MARIA_FILE_BITMAP *bitmap= &share->bitmap;
if (flushable) if (non_flushable_inc == -1)
{ {
pthread_mutex_lock(&bitmap->bitmap_lock); pthread_mutex_lock(&bitmap->bitmap_lock);
DBUG_ASSERT(bitmap->non_flushable > 0);
if (--bitmap->non_flushable == 0)
{
_ma_bitmap_unpin_all(share); _ma_bitmap_unpin_all(share);
bitmap->flushable= TRUE; if (unlikely(bitmap->flush_all_requested))
{
DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
pthread_cond_broadcast(&bitmap->bitmap_cond);
}
}
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
pthread_mutex_unlock(&bitmap->bitmap_lock); pthread_mutex_unlock(&bitmap->bitmap_lock);
return;
}
DBUG_ASSERT(non_flushable_inc == 1);
/* It is a read without mutex because only an optimization */
if (unlikely(bitmap->flush_all_requested))
{
/* /*
Ok to read in_checkpoint without mutex, as it is set before Checkpoint _ma_bitmap_flush_all() is waiting for the bitmap to become
calls _ma_bitmap_flush_all(). flushable. Not the moment to make the bitmap unflushable or more
unflushable; let's rather back off and wait. If we didn't do this, with
multiple writers, there may always be one thread causing the bitmap to
be unflushable and _ma_bitmap_flush_all() would wait for long.
There should not be a deadlock because if our thread increased
non_flushable (and thus _ma_bitmap_flush_all() is waiting for at least
our thread), it is not going to increase it more so is not going to come
here.
*/ */
if (share->in_checkpoint) pthread_mutex_lock(&bitmap->bitmap_lock);
while (bitmap->flush_all_requested)
{ {
DBUG_PRINT("info", ("bitmap ready waking up checkpoint")); DBUG_PRINT("info", ("waiting for bitmap flusher"));
pthread_cond_broadcast(&bitmap->bitmap_cond); pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
} }
return; pthread_mutex_unlock(&bitmap->bitmap_lock);
} }
/* /*
Ok to set without mutex: we didn't touch the bitmap yet; when we touch it Ok to set without mutex: we didn't touch the bitmap's content yet; when we
we will take the mutex. touch it we will take the mutex.
*/ */
bitmap->flushable= FALSE; bitmap->non_flushable++;
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
} }
...@@ -2240,14 +2275,18 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) ...@@ -2240,14 +2275,18 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
goto err; goto err;
} }
if (--bitmap->non_flushable == 0)
{
_ma_bitmap_unpin_all(info->s); _ma_bitmap_unpin_all(info->s);
bitmap->flushable= TRUE; if (unlikely(bitmap->flush_all_requested))
pthread_mutex_unlock(&bitmap->bitmap_lock);
if (info->s->in_checkpoint)
{ {
DBUG_PRINT("info", ("bitmap ready waking up checkpoint")); DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
pthread_cond_broadcast(&bitmap->bitmap_cond); pthread_cond_broadcast(&bitmap->bitmap_cond);
} }
}
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
pthread_mutex_unlock(&bitmap->bitmap_lock);
DBUG_RETURN(0); DBUG_RETURN(0);
err: err:
......
...@@ -2692,7 +2692,7 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info, ...@@ -2692,7 +2692,7 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
MARIA_BITMAP_BLOCKS *blocks= &row->insert_blocks; MARIA_BITMAP_BLOCKS *blocks= &row->insert_blocks;
DBUG_ENTER("allocate_and_write_block_record"); DBUG_ENTER("allocate_and_write_block_record");
_ma_bitmap_flushable(info->s, FALSE); _ma_bitmap_flushable(info->s, 1);
if (_ma_bitmap_find_place(info, row, blocks)) if (_ma_bitmap_find_place(info, row, blocks))
goto err; /* Error reading bitmap */ goto err; /* Error reading bitmap */
...@@ -2729,7 +2729,7 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info, ...@@ -2729,7 +2729,7 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(1000);); DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(1000););
DBUG_RETURN(0); DBUG_RETURN(0);
err: err:
_ma_bitmap_flushable(info->s, TRUE); _ma_bitmap_flushable(info->s, -1);
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1); DBUG_RETURN(1);
} }
...@@ -2801,7 +2801,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) ...@@ -2801,7 +2801,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
MARIA_SHARE *share= info->s; MARIA_SHARE *share= info->s;
DBUG_ENTER("_ma_write_abort_block_record"); DBUG_ENTER("_ma_write_abort_block_record");
_ma_bitmap_flushable(share, FALSE); _ma_bitmap_flushable(share, 1);
if (delete_head_or_tail(info, if (delete_head_or_tail(info,
ma_recordpos_to_page(info->cur_row.lastpos), ma_recordpos_to_page(info->cur_row.lastpos),
ma_recordpos_to_dir_entry(info->cur_row.lastpos), 1, ma_recordpos_to_dir_entry(info->cur_row.lastpos), 1,
...@@ -2836,7 +2836,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) ...@@ -2836,7 +2836,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
&lsn, (void*) 0)) &lsn, (void*) 0))
res= 1; res= 1;
} }
_ma_bitmap_flushable(share, TRUE); _ma_bitmap_flushable(share, -1);
_ma_unpin_all_pages_and_finalize_row(info, lsn); _ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(res); DBUG_RETURN(res);
} }
...@@ -2886,7 +2886,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, ...@@ -2886,7 +2886,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
calc_record_size(info, record, new_row); calc_record_size(info, record, new_row);
page= ma_recordpos_to_page(record_pos); page= ma_recordpos_to_page(record_pos);
_ma_bitmap_flushable(share, FALSE); _ma_bitmap_flushable(share, 1);
DBUG_ASSERT(share->pagecache->block_size == block_size); DBUG_ASSERT(share->pagecache->block_size == block_size);
if (!(buff= pagecache_read(share->pagecache, if (!(buff= pagecache_read(share->pagecache,
&info->dfile, (pgcache_page_no_t) page, 0, &info->dfile, (pgcache_page_no_t) page, 0,
...@@ -2978,7 +2978,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, ...@@ -2978,7 +2978,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
DBUG_RETURN(res); DBUG_RETURN(res);
err: err:
_ma_bitmap_flushable(share, TRUE); _ma_bitmap_flushable(share, -1);
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1); DBUG_RETURN(1);
} }
...@@ -3287,7 +3287,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) ...@@ -3287,7 +3287,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
DBUG_PRINT("enter", ("Rowid: %lu (%lu:%u)", (ulong) info->cur_row.lastpos, DBUG_PRINT("enter", ("Rowid: %lu (%lu:%u)", (ulong) info->cur_row.lastpos,
(ulong) page, record_number)); (ulong) page, record_number));
_ma_bitmap_flushable(share, FALSE); _ma_bitmap_flushable(share, 1);
if (delete_head_or_tail(info, page, record_number, 1, 0) || if (delete_head_or_tail(info, page, record_number, 1, 0) ||
delete_tails(info, info->cur_row.tail_positions)) delete_tails(info, info->cur_row.tail_positions))
goto err; goto err;
...@@ -3334,12 +3334,12 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) ...@@ -3334,12 +3334,12 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
} }
_ma_bitmap_flushable(share, TRUE); _ma_bitmap_flushable(share, -1);
_ma_unpin_all_pages_and_finalize_row(info, lsn); _ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(0); DBUG_RETURN(0);
err: err:
_ma_bitmap_flushable(share, TRUE); _ma_bitmap_flushable(share, -1);
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1); DBUG_RETURN(1);
} }
...@@ -5648,7 +5648,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, ...@@ -5648,7 +5648,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
if (read_row_extent_info(info, buff, rownr)) if (read_row_extent_info(info, buff, rownr))
DBUG_RETURN(1); DBUG_RETURN(1);
_ma_bitmap_flushable(share, FALSE); _ma_bitmap_flushable(share, 1);
if (delete_head_or_tail(info, page, rownr, 1, 1) || if (delete_head_or_tail(info, page, rownr, 1, 1) ||
delete_tails(info, info->cur_row.tail_positions)) delete_tails(info, info->cur_row.tail_positions))
goto err; goto err;
...@@ -5665,7 +5665,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, ...@@ -5665,7 +5665,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
res= 0; res= 0;
err: err:
_ma_bitmap_flushable(share, TRUE); _ma_bitmap_flushable(share, -1);
_ma_unpin_all_pages_and_finalize_row(info, lsn); _ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(res); DBUG_RETURN(res);
} }
......
...@@ -199,7 +199,7 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, ...@@ -199,7 +199,7 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
uint *bitmap_pattern); uint *bitmap_pattern);
void _ma_bitmap_delete_all(MARIA_SHARE *share); void _ma_bitmap_delete_all(MARIA_SHARE *share);
int _ma_bitmap_create_first(MARIA_SHARE *share); int _ma_bitmap_create_first(MARIA_SHARE *share);
void _ma_bitmap_flushable(MARIA_SHARE *share, my_bool flushable); void _ma_bitmap_flushable(MARIA_SHARE *share, int non_flushable_inc);
#ifndef DBUG_OFF #ifndef DBUG_OFF
void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
ulonglong page); ulonglong page);
......
...@@ -217,7 +217,8 @@ typedef struct st_maria_file_bitmap ...@@ -217,7 +217,8 @@ typedef struct st_maria_file_bitmap
ulonglong page; /* Page number for current bitmap */ ulonglong page; /* Page number for current bitmap */
uint used_size; /* Size of bitmap head that is not 0 */ uint used_size; /* Size of bitmap head that is not 0 */
my_bool changed; /* 1 if page needs to be flushed */ my_bool changed; /* 1 if page needs to be flushed */
my_bool flushable; /**< If bitmap and log are in sync */ my_bool flush_all_requested; /**< If _ma_bitmap_flush_all waiting */
uint non_flushable; /**< 0 if bitmap and log are in sync */
PAGECACHE_FILE file; /* datafile where bitmap is stored */ PAGECACHE_FILE file; /* datafile where bitmap is stored */
#ifdef THREAD #ifdef THREAD
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment