Commit 757e756d authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-22850 Reduce buf_pool.page_hash latch contention

For reads, the buf_pool.page_hash is protected by buf_pool.mutex or
by the hash_lock. There is no need to compute or acquire hash_lock
if we are not modifying the buf_pool.page_hash.

However, the buf_pool.page_hash latch must be held exclusively
when changing buf_page_t::in_file(), or if we desire to prevent
buf_page_t::can_relocate() or buf_page_t::buf_fix_count()
from changing.

rw_lock_lock_word_decr(): Add a comment that explains the polling logic.

buf_page_t::set_state(): When in_file() is to be changed, assert that
an exclusive buf_pool.page_hash latch is being held. Unfortunately
we cannot assert this for set_state(BUF_BLOCK_REMOVE_HASH) because
set_corrupt_id() may already have been called.

buf_LRU_free_page(): Check buf_page_t::can_relocate() before
aqcuiring the hash_lock.

buf_block_t::initialise(): Initialize also page.buf_fix_count().

buf_page_create(): Initialize buf_fix_count while not holding
any mutex or hash_lock. Acquire the hash_lock only for the
duration of inserting the block to the buf_pool.page_hash.

buf_LRU_old_init(), buf_LRU_add_block(),
buf_page_t::belongs_to_unzip_LRU(): Do not assert buf_page_t::in_file(),
because buf_page_create() will invoke buf_LRU_add_block()
before acquiring hash_lock and buf_page_t::set_state().

buf_pool_t::validate(): Rely on the buf_pool.mutex and do not
unnecessarily acquire any buf_pool.page_hash latches.

buf_page_init_for_read(): Clarify that we must acquire the hash_lock
upfront in order to prevent a race with buf_pool_t::watch_remove().
parent 0af1b0bd
......@@ -3776,15 +3776,16 @@ buf_page_try_get_func(
}
/** Initialize the block.
@param page_id page id
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
void buf_block_t::initialise(const page_id_t page_id, ulint zip_size)
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param fix initial buf_fix_count() */
void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
uint32_t fix)
{
ut_ad(page.state() != BUF_BLOCK_FILE_PAGE);
buf_block_init_low(this);
lock_hash_val= lock_rec_hash(page_id.space(), page_id.page_no());
page.init();
page.id_= page_id;
page.init(page_id, fix);
page_zip_set_size(&page.zip, zip_size);
}
......@@ -3803,11 +3804,9 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
ut_ad(page_id.space() != 0 || !zip_size);
buf_block_t *free_block= buf_LRU_get_free_block(false);
free_block->initialise(page_id, zip_size);
free_block->initialise(page_id, zip_size, 1);
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
mutex_enter(&buf_pool.mutex);
rw_lock_x_lock(hash_lock);
buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id));
......@@ -3816,7 +3815,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
!buf_pool.watch_is_sentinel(block->page))
{
/* Page can be found in buf_pool */
rw_lock_x_unlock(hash_lock);
buf_LRU_block_free_non_file_page(free_block);
mutex_exit(&buf_pool.mutex);
......@@ -3846,11 +3844,17 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
page_id.space(), page_id.page_no()));
block= free_block;
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
/* Duplicate buf_block_buf_fix_inc_func() */
ut_ad(block->page.buf_fix_count() == 1);
ut_ad(fsp_is_system_temporary(page_id.space()) ||
rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
/* The block must be put to the LRU list */
block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_LRU_add_block(&block->page, false);
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
rw_lock_x_lock(hash_lock);
block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(),
&block->page);
......@@ -4401,7 +4405,6 @@ void buf_pool_t::validate()
ulint n_zip = 0;
mutex_enter(&mutex);
page_hash_lock_all();
chunk_t* chunk = chunks;
......@@ -4492,7 +4495,6 @@ void buf_pool_t::validate()
ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing);
page_hash_unlock_all();
mutex_exit(&flush_list_mutex);
if (curr_size == old_size
......
......@@ -940,7 +940,6 @@ static void buf_LRU_old_init()
bpage = UT_LIST_GET_PREV(LRU, bpage)) {
ut_ad(bpage->in_LRU_list);
ut_ad(bpage->in_file());
/* This loop temporarily violates the
assertions of buf_page_t::set_old(). */
......@@ -1066,7 +1065,6 @@ buf_LRU_add_block(
the start, regardless of this parameter */
{
ut_ad(mutex_own(&buf_pool.mutex));
ut_a(bpage->in_file());
ut_ad(!bpage->in_LRU_list);
if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) {
......@@ -1153,10 +1151,18 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
ut_ad(bpage->in_file());
ut_ad(bpage->in_LRU_list);
/* First, perform a quick check before we acquire hash_lock. */
if (!bpage->can_relocate()) {
return false;
}
/* We must hold an exclusive hash_lock to prevent
bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */
rw_lock_t* hash_lock = buf_pool.hash_lock_get(id);
rw_lock_x_lock(hash_lock);
if (!bpage->can_relocate()) {
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
......
......@@ -121,6 +121,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
mutex_enter(&buf_pool.mutex);
/* We must acquire hash_lock this early to prevent
a race condition with buf_pool_t::watch_remove() */
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
rw_lock_x_lock(hash_lock);
......@@ -151,8 +153,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_pool.watch_remove(hash_page);
}
block->page.set_state(BUF_BLOCK_FILE_PAGE);
block->page.set_io_fix(BUF_IO_READ);
block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), bpage);
......
......@@ -995,6 +995,14 @@ class buf_page_t
buf_fix_count_= buf_fix_count;
}
/** Initialize some more fields */
void init(page_id_t id, uint32_t buf_fix_count= 0)
{
init();
id_= id;
buf_fix_count_= buf_fix_count;
}
public:
const page_id_t &id() const { return id_; }
buf_page_state state() const { return state_; }
......@@ -1010,8 +1018,7 @@ class buf_page_t
/** @return if this belongs to buf_pool.unzip_LRU */
bool belongs_to_unzip_LRU() const
{
ut_ad(in_file());
return zip.data && state() == BUF_BLOCK_FILE_PAGE;
return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
}
inline void add_buf_fix_count(uint32_t count);
......@@ -1277,9 +1284,10 @@ struct buf_block_t{
ulint zip_size() const { return page.zip_size(); }
/** Initialize the block.
@param page_id page id
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
void initialise(const page_id_t page_id, ulint zip_size);
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param fix initial buf_fix_count() */
void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
};
/**********************************************************************//**
......@@ -2134,6 +2142,29 @@ inline void buf_page_t::set_buf_fix_count(uint32_t count)
inline void buf_page_t::set_state(buf_page_state state)
{
ut_ad(mutex_own(&buf_pool.mutex));
#ifdef UNIV_DEBUG
switch (state) {
case BUF_BLOCK_REMOVE_HASH:
/* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
before buf_LRU_free_one_page(), so we cannot assert that
we are holding the hash_lock. */
break;
case BUF_BLOCK_MEMORY:
if (!in_file()) break;
/* fall through */
case BUF_BLOCK_FILE_PAGE:
ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
break;
case BUF_BLOCK_NOT_USED:
if (!in_file()) break;
/* fall through */
case BUF_BLOCK_ZIP_PAGE:
ut_ad((this >= &buf_pool.watch[0] &&
this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) ||
rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
break;
}
#endif
state_= state;
}
......@@ -2212,7 +2243,6 @@ inline bool buf_page_t::is_old() const
/** Set whether a block is old in buf_pool.LRU */
inline void buf_page_t::set_old(bool old)
{
ut_ad(in_file());
ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(in_LRU_list);
......
......@@ -220,8 +220,31 @@ rw_lock_lock_word_decr(
return(true);
}
/* Note that lock_copy was reloaded above. We will
keep trying if a spurious conflict occurred, typically
caused by concurrent executions of
rw_lock_s_lock(). */
#if 1 /* FIXME: MDEV-22871 Spurious contention between rw_lock_s_lock() */
/* When the number of concurrently executing threads
exceeds the number of available processor cores,
multiple buf_pool.page_hash S-latch requests would
conflict here, mostly in buf_page_get_low(). We should
implement a simpler rw-lock where the S-latch
acquisition would be a simple fetch_add(1) followed by
either an optional load() loop to wait for the X-latch
to be released, or a fetch_sub(1) and a retry.
For now, we work around the problem with a delay in
this loop. It helped a little on some systems and was
reducing performance on others. */
(void) LF_BACKOFF();
#endif
}
/* A real conflict was detected. */
return(false);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment