Commit bd5a6403 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26033: Race condition between buf_pool.page_hash and resize()

The replacement of buf_pool.page_hash with a different type of
hash table in commit 5155a300 (MDEV-22871)
introduced a race condition with buffer pool resizing.

We have an execution trace where buf_pool.page_hash.array is changed
to point to something else while page_hash_latch::read_lock() is
executing. The same should also affect page_hash_latch::write_lock().

We fix the race condition by never resizing (and reallocating) the
buf_pool.page_hash. We assume that resizing the buffer pool is
a rare operation. Yes, there might be a performance regression if a
server is first started up with a tiny buffer pool, which is later
enlarged. In that case, the tiny buf_pool.page_hash.array could cause
increased use of the hash bucket lists. That problem can be worked
around by initially starting up the server with a larger buffer pool
and then shrinking that, until changing to a larger size again.

buf_pool_t::resize_hash(): Remove.

buf_pool_t::page_hash_table::lock(): Do not attempt to deal with
hash table resizing. If we really wanted that in a safe manner,
we would probably have to introduce a global rw-lock around the
operation, or at the very least, poll buf_pool.resizing, both of
which would be detrimental to performance.
parent 77926284
......@@ -1536,13 +1536,6 @@ void buf_pool_t::close()
ut_free(chunks);
chunks= nullptr;
page_hash.free();
while (page_hash_table *old_page_hash= freed_page_hash)
{
freed_page_hash= static_cast<page_hash_table*>
(old_page_hash->array[1].node);
old_page_hash->free();
UT_DELETE(old_page_hash);
}
zip_hash.free();
io_buf.close();
......@@ -1829,57 +1822,6 @@ inline bool buf_pool_t::withdraw_blocks()
return(false);
}
/** resize page_hash and zip_hash */
inline void buf_pool_t::resize_hash()
{
page_hash_table *new_page_hash= UT_NEW_NOKEY(page_hash_table());
new_page_hash->create(2 * buf_pool.curr_size);
new_page_hash->write_lock_all();
for (auto i= page_hash.pad(page_hash.n_cells); i--; )
{
static_assert(!((page_hash_table::ELEMENTS_PER_LATCH + 1) &
page_hash_table::ELEMENTS_PER_LATCH),
"must be one less than a power of 2");
if (!(i & page_hash_table::ELEMENTS_PER_LATCH))
{
ut_ad(reinterpret_cast<page_hash_latch*>
(&page_hash.array[i])->is_write_locked());
continue;
}
while (buf_page_t *bpage= static_cast<buf_page_t*>
(page_hash.array[i].node))
{
ut_ad(bpage->in_page_hash);
const ulint fold= bpage->id().fold();
HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, new_page_hash, fold, bpage);
}
}
buf_pool.page_hash.array[1].node= freed_page_hash;
std::swap(buf_pool.page_hash, *new_page_hash);
freed_page_hash= new_page_hash;
/* recreate zip_hash */
hash_table_t new_hash;
new_hash.create(2 * buf_pool.curr_size);
for (ulint i= 0; i < buf_pool.zip_hash.n_cells; i++)
{
while (buf_page_t *bpage= static_cast<buf_page_t*>
(HASH_GET_FIRST(&buf_pool.zip_hash, i)))
{
const ulint fold= BUF_POOL_ZIP_FOLD_BPAGE(bpage);
HASH_DELETE(buf_page_t, hash, &buf_pool.zip_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, &new_hash, fold, bpage);
}
}
std::swap(buf_pool.zip_hash.array, new_hash.array);
buf_pool.zip_hash.n_cells= new_hash.n_cells;
new_hash.free();
}
inline void buf_pool_t::page_hash_table::write_lock_all()
......@@ -1904,26 +1846,6 @@ inline void buf_pool_t::page_hash_table::write_unlock_all()
}
inline void buf_pool_t::write_lock_all_page_hash()
{
mysql_mutex_assert_owner(&mutex);
page_hash.write_lock_all();
for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
old_page_hash= static_cast<page_hash_table*>
(old_page_hash->array[1].node))
old_page_hash->write_lock_all();
}
inline void buf_pool_t::write_unlock_all_page_hash()
{
page_hash.write_unlock_all();
for (page_hash_table *old_page_hash= freed_page_hash; old_page_hash;
old_page_hash= static_cast<page_hash_table*>
(old_page_hash->array[1].node))
old_page_hash->write_unlock_all();
}
namespace
{
......@@ -2097,7 +2019,7 @@ inline void buf_pool_t::resize()
resizing.store(true, std::memory_order_relaxed);
mysql_mutex_lock(&mutex);
write_lock_all_page_hash();
page_hash.write_lock_all();
chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map());
......@@ -2252,16 +2174,8 @@ inline void buf_pool_t::resize()
= srv_buf_pool_base_size > srv_buf_pool_size * 2
|| srv_buf_pool_base_size * 2 < srv_buf_pool_size;
/* Normalize page_hash and zip_hash,
if the new size is too different */
if (!warning && new_size_too_diff) {
buf_resize_status("Resizing hash table");
resize_hash();
ib::info() << "hash tables were resized";
}
mysql_mutex_unlock(&mutex);
write_unlock_all_page_hash();
page_hash.write_unlock_all();
UT_DELETE(chunk_map_old);
......
......@@ -1895,22 +1895,14 @@ class buf_pool_t
page_hash_latch *lock_get(ulint fold) const
{ return lock_get(fold, n_cells); }
/** Acquire an array latch, tolerating concurrent buf_pool_t::resize()
/** Acquire an array latch.
@tparam exclusive whether the latch is to be acquired exclusively
@param fold hash bucket key */
template<bool exclusive> page_hash_latch *lock(ulint fold)
{
for (;;)
{
auto n= n_cells;
page_hash_latch *latch= lock_get(fold, n);
latch->acquire<exclusive>();
/* Our latch prevents n_cells from changing. */
if (UNIV_LIKELY(n == n_cells))
return latch;
/* Retry, because buf_pool_t::resize_hash() affected us. */
latch->release<exclusive>();
}
page_hash_latch *latch= lock_get(fold, n_cells);
latch->acquire<exclusive>();
return latch;
}
/** Exclusively aqcuire all latches */
......@@ -1920,19 +1912,6 @@ class buf_pool_t
inline void write_unlock_all();
};
private:
/** Former page_hash that has been deleted during resize();
singly-linked list via freed_page_hash->array[1] */
page_hash_table *freed_page_hash;
/** Lock all page_hash, also freed_page_hash. */
inline void write_lock_all_page_hash();
/** Release all page_hash, also freed_page_hash. */
inline void write_unlock_all_page_hash();
/** Resize page_hash and zip_hash. */
inline void resize_hash();
public:
/** Hash table of file pages (buf_page_t::in_file() holds),
indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */
page_hash_table page_hash;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment