Commit 757e756d authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-22850 Reduce buf_pool.page_hash latch contention

For reads, the buf_pool.page_hash is protected by buf_pool.mutex or
by the hash_lock. There is no need to compute or acquire hash_lock
if we are not modifying the buf_pool.page_hash.

However, the buf_pool.page_hash latch must be held exclusively
when changing buf_page_t::in_file(), or if we desire to prevent
buf_page_t::can_relocate() or buf_page_t::buf_fix_count()
from changing.

rw_lock_lock_word_decr(): Add a comment that explains the polling logic.

buf_page_t::set_state(): When in_file() is to be changed, assert that
an exclusive buf_pool.page_hash latch is being held. Unfortunately
we cannot assert this for set_state(BUF_BLOCK_REMOVE_HASH) because
set_corrupt_id() may already have been called.

buf_LRU_free_page(): Check buf_page_t::can_relocate() before
aqcuiring the hash_lock.

buf_block_t::initialise(): Initialize also page.buf_fix_count().

buf_page_create(): Initialize buf_fix_count while not holding
any mutex or hash_lock. Acquire the hash_lock only for the
duration of inserting the block to the buf_pool.page_hash.

buf_LRU_old_init(), buf_LRU_add_block(),
buf_page_t::belongs_to_unzip_LRU(): Do not assert buf_page_t::in_file(),
because buf_page_create() will invoke buf_LRU_add_block()
before acquiring hash_lock and buf_page_t::set_state().

buf_pool_t::validate(): Rely on the buf_pool.mutex and do not
unnecessarily acquire any buf_pool.page_hash latches.

buf_page_init_for_read(): Clarify that we must acquire the hash_lock
upfront in order to prevent a race with buf_pool_t::watch_remove().
parent 0af1b0bd
...@@ -3776,15 +3776,16 @@ buf_page_try_get_func( ...@@ -3776,15 +3776,16 @@ buf_page_try_get_func(
} }
/** Initialize the block. /** Initialize the block.
@param page_id page id @param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */ @param zip_size ROW_FORMAT=COMPRESSED page size, or 0
void buf_block_t::initialise(const page_id_t page_id, ulint zip_size) @param fix initial buf_fix_count() */
void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
uint32_t fix)
{ {
ut_ad(page.state() != BUF_BLOCK_FILE_PAGE); ut_ad(page.state() != BUF_BLOCK_FILE_PAGE);
buf_block_init_low(this); buf_block_init_low(this);
lock_hash_val= lock_rec_hash(page_id.space(), page_id.page_no()); lock_hash_val= lock_rec_hash(page_id.space(), page_id.page_no());
page.init(); page.init(page_id, fix);
page.id_= page_id;
page_zip_set_size(&page.zip, zip_size); page_zip_set_size(&page.zip, zip_size);
} }
...@@ -3803,11 +3804,9 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) ...@@ -3803,11 +3804,9 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
ut_ad(page_id.space() != 0 || !zip_size); ut_ad(page_id.space() != 0 || !zip_size);
buf_block_t *free_block= buf_LRU_get_free_block(false); buf_block_t *free_block= buf_LRU_get_free_block(false);
free_block->initialise(page_id, zip_size); free_block->initialise(page_id, zip_size, 1);
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
mutex_enter(&buf_pool.mutex); mutex_enter(&buf_pool.mutex);
rw_lock_x_lock(hash_lock);
buf_block_t *block= reinterpret_cast<buf_block_t*> buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id)); (buf_pool.page_hash_get_low(page_id));
...@@ -3816,7 +3815,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) ...@@ -3816,7 +3815,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
!buf_pool.watch_is_sentinel(block->page)) !buf_pool.watch_is_sentinel(block->page))
{ {
/* Page can be found in buf_pool */ /* Page can be found in buf_pool */
rw_lock_x_unlock(hash_lock);
buf_LRU_block_free_non_file_page(free_block); buf_LRU_block_free_non_file_page(free_block);
mutex_exit(&buf_pool.mutex); mutex_exit(&buf_pool.mutex);
...@@ -3846,11 +3844,17 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) ...@@ -3846,11 +3844,17 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
page_id.space(), page_id.page_no())); page_id.space(), page_id.page_no()));
block= free_block; block= free_block;
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
/* Duplicate buf_block_buf_fix_inc_func() */
ut_ad(block->page.buf_fix_count() == 1);
ut_ad(fsp_is_system_temporary(page_id.space()) ||
rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
/* The block must be put to the LRU list */ /* The block must be put to the LRU list */
block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_LRU_add_block(&block->page, false); buf_LRU_add_block(&block->page, false);
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
rw_lock_x_lock(hash_lock);
block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_d(block->page.in_page_hash= true); ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(),
&block->page); &block->page);
...@@ -4401,7 +4405,6 @@ void buf_pool_t::validate() ...@@ -4401,7 +4405,6 @@ void buf_pool_t::validate()
ulint n_zip = 0; ulint n_zip = 0;
mutex_enter(&mutex); mutex_enter(&mutex);
page_hash_lock_all();
chunk_t* chunk = chunks; chunk_t* chunk = chunks;
...@@ -4492,7 +4495,6 @@ void buf_pool_t::validate() ...@@ -4492,7 +4495,6 @@ void buf_pool_t::validate()
ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing); ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing);
page_hash_unlock_all();
mutex_exit(&flush_list_mutex); mutex_exit(&flush_list_mutex);
if (curr_size == old_size if (curr_size == old_size
......
...@@ -940,7 +940,6 @@ static void buf_LRU_old_init() ...@@ -940,7 +940,6 @@ static void buf_LRU_old_init()
bpage = UT_LIST_GET_PREV(LRU, bpage)) { bpage = UT_LIST_GET_PREV(LRU, bpage)) {
ut_ad(bpage->in_LRU_list); ut_ad(bpage->in_LRU_list);
ut_ad(bpage->in_file());
/* This loop temporarily violates the /* This loop temporarily violates the
assertions of buf_page_t::set_old(). */ assertions of buf_page_t::set_old(). */
...@@ -1066,7 +1065,6 @@ buf_LRU_add_block( ...@@ -1066,7 +1065,6 @@ buf_LRU_add_block(
the start, regardless of this parameter */ the start, regardless of this parameter */
{ {
ut_ad(mutex_own(&buf_pool.mutex)); ut_ad(mutex_own(&buf_pool.mutex));
ut_a(bpage->in_file());
ut_ad(!bpage->in_LRU_list); ut_ad(!bpage->in_LRU_list);
if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) { if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) {
...@@ -1153,10 +1151,18 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) ...@@ -1153,10 +1151,18 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
ut_ad(bpage->in_file()); ut_ad(bpage->in_file());
ut_ad(bpage->in_LRU_list); ut_ad(bpage->in_LRU_list);
/* First, perform a quick check before we acquire hash_lock. */
if (!bpage->can_relocate()) {
return false;
}
/* We must hold an exclusive hash_lock to prevent
bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */
rw_lock_t* hash_lock = buf_pool.hash_lock_get(id); rw_lock_t* hash_lock = buf_pool.hash_lock_get(id);
rw_lock_x_lock(hash_lock); rw_lock_x_lock(hash_lock);
if (!bpage->can_relocate()) { if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */ /* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit; goto func_exit;
} }
......
...@@ -121,6 +121,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, ...@@ -121,6 +121,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
mutex_enter(&buf_pool.mutex); mutex_enter(&buf_pool.mutex);
/* We must acquire hash_lock this early to prevent
a race condition with buf_pool_t::watch_remove() */
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id); rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
rw_lock_x_lock(hash_lock); rw_lock_x_lock(hash_lock);
...@@ -151,8 +153,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, ...@@ -151,8 +153,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_pool.watch_remove(hash_page); buf_pool.watch_remove(hash_page);
} }
block->page.set_state(BUF_BLOCK_FILE_PAGE);
block->page.set_io_fix(BUF_IO_READ); block->page.set_io_fix(BUF_IO_READ);
block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_page_hash); ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash= true); ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), bpage); HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), bpage);
...@@ -202,8 +204,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id, ...@@ -202,8 +204,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{ {
/* The block was added by some other thread. */ /* The block was added by some other thread. */
rw_lock_x_unlock(hash_lock); rw_lock_x_unlock(hash_lock);
buf_buddy_free(data, zip_size); buf_buddy_free(data, zip_size);
goto func_exit; goto func_exit;
} }
} }
......
...@@ -995,6 +995,14 @@ class buf_page_t ...@@ -995,6 +995,14 @@ class buf_page_t
buf_fix_count_= buf_fix_count; buf_fix_count_= buf_fix_count;
} }
/** Initialize some more fields */
void init(page_id_t id, uint32_t buf_fix_count= 0)
{
init();
id_= id;
buf_fix_count_= buf_fix_count;
}
public: public:
const page_id_t &id() const { return id_; } const page_id_t &id() const { return id_; }
buf_page_state state() const { return state_; } buf_page_state state() const { return state_; }
...@@ -1010,8 +1018,7 @@ class buf_page_t ...@@ -1010,8 +1018,7 @@ class buf_page_t
/** @return if this belongs to buf_pool.unzip_LRU */ /** @return if this belongs to buf_pool.unzip_LRU */
bool belongs_to_unzip_LRU() const bool belongs_to_unzip_LRU() const
{ {
ut_ad(in_file()); return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
return zip.data && state() == BUF_BLOCK_FILE_PAGE;
} }
inline void add_buf_fix_count(uint32_t count); inline void add_buf_fix_count(uint32_t count);
...@@ -1277,9 +1284,10 @@ struct buf_block_t{ ...@@ -1277,9 +1284,10 @@ struct buf_block_t{
ulint zip_size() const { return page.zip_size(); } ulint zip_size() const { return page.zip_size(); }
/** Initialize the block. /** Initialize the block.
@param page_id page id @param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */ @param zip_size ROW_FORMAT=COMPRESSED page size, or 0
void initialise(const page_id_t page_id, ulint zip_size); @param fix initial buf_fix_count() */
void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
}; };
/**********************************************************************//** /**********************************************************************//**
...@@ -2134,6 +2142,29 @@ inline void buf_page_t::set_buf_fix_count(uint32_t count) ...@@ -2134,6 +2142,29 @@ inline void buf_page_t::set_buf_fix_count(uint32_t count)
inline void buf_page_t::set_state(buf_page_state state) inline void buf_page_t::set_state(buf_page_state state)
{ {
ut_ad(mutex_own(&buf_pool.mutex)); ut_ad(mutex_own(&buf_pool.mutex));
#ifdef UNIV_DEBUG
switch (state) {
case BUF_BLOCK_REMOVE_HASH:
/* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
before buf_LRU_free_one_page(), so we cannot assert that
we are holding the hash_lock. */
break;
case BUF_BLOCK_MEMORY:
if (!in_file()) break;
/* fall through */
case BUF_BLOCK_FILE_PAGE:
ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
break;
case BUF_BLOCK_NOT_USED:
if (!in_file()) break;
/* fall through */
case BUF_BLOCK_ZIP_PAGE:
ut_ad((this >= &buf_pool.watch[0] &&
this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) ||
rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
break;
}
#endif
state_= state; state_= state;
} }
...@@ -2212,7 +2243,6 @@ inline bool buf_page_t::is_old() const ...@@ -2212,7 +2243,6 @@ inline bool buf_page_t::is_old() const
/** Set whether a block is old in buf_pool.LRU */ /** Set whether a block is old in buf_pool.LRU */
inline void buf_page_t::set_old(bool old) inline void buf_page_t::set_old(bool old)
{ {
ut_ad(in_file());
ut_ad(mutex_own(&buf_pool.mutex)); ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(in_LRU_list); ut_ad(in_LRU_list);
......
...@@ -220,8 +220,31 @@ rw_lock_lock_word_decr( ...@@ -220,8 +220,31 @@ rw_lock_lock_word_decr(
return(true); return(true);
} }
/* Note that lock_copy was reloaded above. We will
keep trying if a spurious conflict occurred, typically
caused by concurrent executions of
rw_lock_s_lock(). */
#if 1 /* FIXME: MDEV-22871 Spurious contention between rw_lock_s_lock() */
/* When the number of concurrently executing threads
exceeds the number of available processor cores,
multiple buf_pool.page_hash S-latch requests would
conflict here, mostly in buf_page_get_low(). We should
implement a simpler rw-lock where the S-latch
acquisition would be a simple fetch_add(1) followed by
either an optional load() loop to wait for the X-latch
to be released, or a fetch_sub(1) and a retry.
For now, we work around the problem with a delay in
this loop. It helped a little on some systems and was
reducing performance on others. */
(void) LF_BACKOFF(); (void) LF_BACKOFF();
#endif
} }
/* A real conflict was detected. */
return(false); return(false);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment