Commit e63ead68 authored by Marko Mäkelä's avatar Marko Mäkelä Committed by Marko Mäkelä

Bug#24346574 PAGE CLEANER THREAD, ASSERT BLOCK->N_POINTERS == 0

btr_search_drop_page_hash_index(): Do not return before ensuring
that block->index=NULL, even if !btr_search_enabled. We would
typically still skip acquiring the AHI latch when the AHI is
disabled, because block->index would already be NULL. Only if the AHI
is in the process of being disabled, we would wait for the AHI latch
and then notice that block->index=NULL and return.

The above bug was a regression caused in MySQL 5.7.9 by the fix of
Bug#21407023: DISABLING AHI SHOULD AVOID TAKING AHI LATCH

The rest of this patch improves diagnostics by adding assertions.

assert_block_ahi_valid(): A debug predicate for checking that
block->n_pointers!=0 implies block->index!=NULL.

assert_block_ahi_empty(): A debug predicate for checking that
block->n_pointers==0.

buf_block_init(): Instead of assigning block->n_pointers=0,
assert_block_ahi_empty(block).

buf_pool_clear_hash_index(): Clarify comments, and assign
block->n_pointers=0 before assigning block->index=NULL.
The wrong ordering could make block->n_pointers appear incorrect in
debug assertions. This bug was introduced in MySQL 5.1.52 by
Bug#13006367 62487: INNODB TAKES 3 MINUTES TO CLEAN UP THE
ADAPTIVE HASH INDEX AT SHUTDOWN

i_s_innodb_buffer_page_get_info(): Add a comment that
the IS_HASHED column in the INFORMATION_SCHEMA views
INNODB_BUFFER_POOL_PAGE and INNODB_BUFFER_PAGE_LRU may
show false positives (there may be no pointers after all.)

ha_insert_for_fold_func(), ha_delete_hash_node(),
ha_search_and_update_if_found_func(): Use atomics for
updating buf_block_t::n_pointers. While buf_block_t::index is
always protected by btr_search_x_lock(index), in
ha_insert_for_fold_func() the n_pointers-- may belong to
another dict_index_t whose btr_search_latches[] we are not holding.

RB: 13879
Reviewed-by: default avatarJimmy Yang <jimmy.yang@oracle.com>
parent a6adf567
......@@ -3683,6 +3683,8 @@ btr_cur_update_in_place(
btr_search_x_lock(index);
}
assert_block_ahi_valid(block);
#endif /* BTR_CUR_HASH_ADAPT */
row_upd_rec_in_place(rec, index, offsets, update, page_zip);
......
......@@ -611,6 +611,7 @@ btr_search_update_hash_ref(
|| rw_lock_own(&(block->lock), RW_LOCK_X));
ut_ad(page_align(btr_cur_get_rec(cursor))
== buf_block_get_frame(block));
assert_block_ahi_valid(block);
index = block->index;
......@@ -1122,14 +1123,13 @@ btr_search_drop_page_hash_index(buf_block_t* block)
rw_lock_t* latch;
btr_search_t* info;
if (!btr_search_enabled) {
return;
}
retry:
/* Do a dirty check on block->index, return if the block is
not in the adaptive hash index. */
index = block->index;
/* This debug check uses a dirty read that could theoretically cause
false positives while buf_pool_clear_hash_index() is executing. */
assert_block_ahi_valid(block);
if (index == NULL) {
return;
......@@ -1156,6 +1156,7 @@ btr_search_drop_page_hash_index(buf_block_t* block)
ut_ad(!btr_search_own_any(RW_LOCK_X));
rw_lock_s_lock(latch);
assert_block_ahi_valid(block);
if (block->index == NULL) {
rw_lock_s_unlock(latch);
......@@ -1172,6 +1173,7 @@ btr_search_drop_page_hash_index(buf_block_t* block)
#ifdef MYSQL_INDEX_DISABLE_AHI
ut_ad(!index->disable_ahi);
#endif
ut_ad(btr_search_enabled);
ut_ad(block->page.id.space() == index->space);
ut_a(index_id == index->id);
......@@ -1290,23 +1292,8 @@ btr_search_drop_page_hash_index(buf_block_t* block)
MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached);
cleanup:
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (UNIV_UNLIKELY(block->n_pointers)) {
/* Corruption */
ib::error() << "Corruption of adaptive hash index."
<< " After dropping, the hash index to a page of "
<< index->name
<< ", still " << block->n_pointers
<< " hash nodes remain.";
rw_lock_x_unlock(latch);
ut_ad(btr_search_validate());
} else {
rw_lock_x_unlock(latch);
}
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
assert_block_ahi_valid(block);
rw_lock_x_unlock(latch);
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
ut_free(folds);
}
......@@ -1533,6 +1520,7 @@ btr_search_build_page_hash_index(
have to take care not to increment the counter in that
case. */
if (!block->index) {
assert_block_ahi_empty(block);
index->search_info->ref_count++;
}
......@@ -1551,6 +1539,7 @@ btr_search_build_page_hash_index(
MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
exit_func:
assert_block_ahi_valid(block);
btr_search_x_unlock(index);
ut_free(folds);
......@@ -1590,6 +1579,8 @@ btr_search_move_or_delete_hash_entries(
ut_a(!block->index || block->index == index);
ut_a(!(new_block->index || block->index)
|| !dict_index_is_ibuf(index));
assert_block_ahi_valid(block);
assert_block_ahi_valid(new_block);
if (new_block->index) {
......@@ -1650,6 +1641,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
assert_block_ahi_valid(block);
index = block->index;
if (!index) {
......@@ -1674,6 +1666,7 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
}
btr_search_x_lock(index);
assert_block_ahi_valid(block);
if (block->index) {
ut_a(block->index == index);
......@@ -1684,6 +1677,8 @@ btr_search_update_hash_on_delete(btr_cur_t* cursor)
MONITOR_INC(
MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND);
}
assert_block_ahi_valid(block);
}
btr_search_x_unlock(index);
......@@ -1747,6 +1742,7 @@ btr_search_update_hash_node_on_insert(btr_cur_t* cursor)
}
func_exit:
assert_block_ahi_valid(block);
btr_search_x_unlock(index);
} else {
btr_search_x_unlock(index);
......@@ -1791,6 +1787,7 @@ btr_search_update_hash_on_insert(btr_cur_t* cursor)
block = btr_cur_get_block(cursor);
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_X));
assert_block_ahi_valid(block);
index = block->index;
......
......@@ -1496,6 +1496,10 @@ buf_block_init(
{
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
/* This function should only be executed at database startup or by
buf_pool_resize(). Either way, adaptive hash index must not exist. */
assert_block_ahi_empty(block);
block->frame = frame;
block->page.buf_pool_index = buf_pool_index(buf_pool);
......@@ -1526,11 +1530,6 @@ buf_block_init(
ut_d(block->in_unzip_LRU_list = FALSE);
ut_d(block->in_withdraw_list = FALSE);
#ifdef BTR_CUR_HASH_ADAPT
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
#endif /* BTR_CUR_HASH_ADAPT */
page_zip_des_init(&block->page.zip);
mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
......@@ -2244,6 +2243,10 @@ buf_page_realloc(
/* set other flags of buf_block_t */
#ifdef BTR_CUR_HASH_ADAPT
/* This code should only be executed by buf_pool_resize(),
while the adaptive hash index is disabled. */
assert_block_ahi_empty(block);
assert_block_ahi_empty(new_block);
ut_ad(!block->index);
new_block->index = NULL;
new_block->n_hash_helps = 0;
......@@ -3210,20 +3213,23 @@ buf_pool_clear_hash_index()
for (; i--; block++) {
dict_index_t* index = block->index;
assert_block_ahi_valid(block);
/* We can set block->index = NULL
when we have an x-latch on search latch;
see the comment in buf0buf.h */
and block->n_pointers = 0
when btr_search_own_all(RW_LOCK_X);
see the comments in buf0buf.h */
if (!index) {
/* Not hashed */
continue;
}
block->index = NULL;
ut_ad(buf_block_get_state(block)
== BUF_BLOCK_FILE_PAGE);
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
block->index = NULL;
}
}
}
......@@ -3915,6 +3921,9 @@ buf_block_init_low(
{
block->skip_flush_check = false;
#ifdef BTR_CUR_HASH_ADAPT
/* No adaptive hash index entries may point to a previously
unused (and now freshly allocated) block. */
assert_block_ahi_empty(block);
block->index = NULL;
block->n_hash_helps = 0;
......
......@@ -295,21 +295,29 @@ buf_LRU_drop_page_hash_for_tablespace(
continue;
}
mutex_enter(&((buf_block_t*) bpage)->mutex);
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
{
bool skip = bpage->buf_fix_count > 0
|| !((buf_block_t*) bpage)->index;
mutex_enter(&block->mutex);
mutex_exit(&((buf_block_t*) bpage)->mutex);
/* This debug check uses a dirty read that could
theoretically cause false positives while
buf_pool_clear_hash_index() is executing.
(Other conflicting access paths to the adaptive hash
index should not be possible, because when a
tablespace is being discarded or dropped, there must
be no concurrect access to the contained tables.) */
assert_block_ahi_valid(block);
if (skip) {
/* Skip this block, because there are
no adaptive hash index entries
pointing to it, or because we cannot
drop them due to the buffer-fix. */
goto next_page;
}
bool skip = bpage->buf_fix_count > 0 || !block->index;
mutex_exit(&block->mutex);
if (skip) {
/* Skip this block, because there are
no adaptive hash index entries
pointing to it, or because we cannot
drop them due to the buffer-fix. */
goto next_page;
}
/* Store the page number so that we can drop the hash
......@@ -800,6 +808,17 @@ buf_LRU_remove_all_pages(
bpage->id, bpage->size);
goto scan_again;
} else {
/* This debug check uses a dirty read that could
theoretically cause false positives while
buf_pool_clear_hash_index() is executing,
if the writes to block->index=NULL and
block->n_pointers=0 are reordered.
(Other conflicting access paths to the adaptive hash
index should not be possible, because when a
tablespace is being discarded or dropped, there must
be no concurrect access to the contained tables.) */
assert_block_ahi_empty((buf_block_t*) bpage);
}
#endif /* BTR_CUR_HASH_ADAPT */
......@@ -1156,6 +1175,9 @@ buf_LRU_get_free_only(
|| !buf_block_will_withdrawn(buf_pool, block)) {
/* found valid free block */
buf_page_mutex_enter(block);
/* No adaptive hash index entries may point to
a free block. */
assert_block_ahi_empty(block);
buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
......@@ -2030,17 +2052,10 @@ buf_LRU_block_free_non_file_page(
case BUF_BLOCK_READY_FOR_USE:
break;
default:
ib::error() << "Block:" << block
<< " incorrect state:" << buf_get_state_name(block)
<< " in buf_LRU_block_free_non_file_page";
return; /* Continue */
ut_error;
}
#ifdef BTR_CUR_HASH_ADAPT
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(block->n_pointers == 0);
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
#endif /* BTR_CUR_HASH_ADAPT */
assert_block_ahi_empty(block);
ut_ad(!block->page.in_free_list);
ut_ad(!block->page.in_flush_list);
ut_ad(!block->page.in_LRU_list);
......
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -189,6 +190,12 @@ ha_clear(
}
#ifdef BTR_CUR_HASH_ADAPT
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/** Maximum number of records in a page */
static const lint MAX_N_POINTERS
= UNIV_PAGE_SIZE_MAX / REC_N_NEW_EXTRA_BYTES;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
......@@ -235,9 +242,11 @@ ha_insert_for_fold_func(
buf_block_t* prev_block = prev_node->block;
ut_a(prev_block->frame
== page_align(prev_node->data));
ut_a(prev_block->n_pointers > 0);
prev_block->n_pointers--;
block->n_pointers++;
ut_a(my_atomic_addlint(
&prev_block->n_pointers, -1)
< MAX_N_POINTERS);
ut_a(my_atomic_addlint(&block->n_pointers, 1)
< MAX_N_POINTERS);
}
prev_node->block = block;
......@@ -268,7 +277,8 @@ ha_insert_for_fold_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
block->n_pointers++;
ut_a(my_atomic_addlint(&block->n_pointers, 1)
< MAX_N_POINTERS);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
......@@ -329,8 +339,8 @@ ha_delete_hash_node(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
ut_a(del_node->block->frame = page_align(del_node->data));
ut_a(del_node->block->n_pointers > 0);
del_node->block->n_pointers--;
ut_a(my_atomic_addlint(&del_node->block->n_pointers, -1)
< MAX_N_POINTERS);
}
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
......@@ -372,9 +382,10 @@ ha_search_and_update_if_found_func(
if (node) {
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
ut_a(node->block->n_pointers > 0);
node->block->n_pointers--;
new_block->n_pointers++;
ut_a(my_atomic_addlint(&node->block->n_pointers, -1)
< MAX_N_POINTERS);
ut_a(my_atomic_addlint(&new_block->n_pointers, 1)
< MAX_N_POINTERS);
}
node->block = new_block;
......
......@@ -5149,6 +5149,10 @@ i_s_innodb_buffer_page_get_info(
block = reinterpret_cast<const buf_block_t*>(bpage);
frame = block->frame;
#ifdef BTR_CUR_HASH_ADAPT
/* Note: this may be a false positive, that
is, block->index will not always be set to
NULL when the last adaptive hash index
reference is dropped. */
page_info->hashed = (block->index != NULL);
#endif /* BTR_CUR_HASH_ADAPT */
} else {
......
......@@ -3953,7 +3953,11 @@ ibuf_insert_to_index_page(
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
#ifdef BTR_CUR_HASH_ADAPT
/* A change buffer merge must occur before users are granted
any access to the page. No adaptive hash index entries may
point to a freshly read page. */
ut_ad(!block->index);
assert_block_ahi_empty(block);
#endif /* BTR_CUR_HASH_ADAPT */
ut_ad(mtr->is_named_space(block->page.id.space()));
......
......@@ -1836,23 +1836,53 @@ struct buf_block_t{
/* @} */
/** @name Hash search fields
These 5 fields may only be modified when we have
an x-latch on search system AND
- we are holding an s-latch or x-latch on buf_block_t::lock or
- we know that buf_block_t::buf_fix_count == 0.
These 5 fields may only be modified when:
we are holding the appropriate x-latch in btr_search_latches[], and
one of the following holds:
(1) the block state is BUF_BLOCK_FILE_PAGE, and
we are holding an s-latch or x-latch on buf_block_t::lock, or
(2) buf_block_t::buf_fix_count == 0, or
(3) the block state is BUF_BLOCK_REMOVE_HASH.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.cc.
Another exception is that assigning block->index = NULL
is allowed whenever holding an x-latch on search system. */
Another exception for buf_pool_clear_hash_index() is that
assigning block->index = NULL (and block->n_pointers = 0)
is allowed whenever btr_search_own_all(RW_LOCK_X).
Another exception is that ha_insert_for_fold_func() may
decrement n_pointers without holding the appropriate latch
in btr_search_latches[]. Thus, n_pointers must be
protected by atomic memory access.
This implies that the fields may be read without race
condition whenever any of the following hold:
- the btr_search_latches[] s-latch or x-latch is being held, or
- the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
and holding some latch prevents the state from changing to that.
Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
is prone to race conditions while buf_pool_clear_hash_index() is
executing (the adaptive hash index is being disabled). Such use
is explicitly commented. */
/* @{ */
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ulint n_pointers; /*!< used in debugging: the number of
pointers in the adaptive hash index
pointing to this frame */
pointing to this frame;
protected by atomic memory access
or btr_search_own_all(). */
# define assert_block_ahi_empty(block) \
ut_a(my_atomic_addlint(&(block)->n_pointers, 0) == 0)
# define assert_block_ahi_valid(block) \
ut_a((block)->index \
|| my_atomic_addlint(&(block)->n_pointers, 0) == 0)
# else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
# define assert_block_ahi_empty(block) /* nothing */
# define assert_block_ahi_valid(block) /* nothing */
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
number of full fields */
......@@ -1868,11 +1898,14 @@ struct buf_block_t{
complete, though: there may
have been hash collisions,
record deletions, etc. */
/* @} */
#else /* BTR_CUR_HASH_ADAPT */
# define assert_block_ahi_empty(block) /* nothing */
# define assert_block_ahi_valid(block) /* nothing */
#endif /* BTR_CUR_HASH_ADAPT */
bool skip_flush_check;
/*!< Skip check in buf_dblwr_check_block
during bulk load, protected by lock.*/
/* @} */
# ifdef UNIV_DEBUG
/** @name Debug fields */
/* @{ */
......
......@@ -920,6 +920,7 @@ buf_block_modify_clock_inc(
RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
}
#endif /* UNIV_DEBUG */
assert_block_ahi_valid(block);
block->modify_clock++;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment