Commit c091a0bc authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26826 Duplicated computations of buf_pool.page_hash addresses

Since commit bd5a6403 (MDEV-26033)
we can actually calculate the buf_pool.page_hash cell and latch
addresses while not holding buf_pool.mutex.

buf_page_alloc_descriptor(): Remove the MEM_UNDEFINED.
We now expect buf_page_t::hash to be zero-initialized.

buf_pool_t::hash_chain: Dedicated data type for buf_pool.page_hash.array.

buf_LRU_free_one_page(): Merged to the only caller
buf_pool_t::corrupted_evict().
parent fdae71f8
......@@ -856,9 +856,10 @@ PageBulk::latch()
ut_ad(m_block->page.buf_fix_count());
/* In case the block is S-latched by page_cleaner. */
/* In case the block is U-latched by page_cleaner. */
if (!buf_page_optimistic_get(RW_X_LATCH, m_block, m_modify_clock,
&m_mtr)) {
/* FIXME: avoid another lookup */
m_block = buf_page_get_gen(page_id_t(m_index->table->space_id,
m_page_no),
0, RW_X_LATCH,
......
......@@ -1630,6 +1630,9 @@ btr_cur_search_to_nth_level_func(
ut_ad(cursor->thr);
switch (btr_op) {
default:
ut_error;
break;
case BTR_INSERT_OP:
case BTR_INSERT_IGNORE_UNIQUE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
......@@ -1662,6 +1665,8 @@ btr_cur_search_to_nth_level_func(
case BTR_DELETE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
ut_ad(!dict_index_is_spatial(index));
auto& chain = buf_pool.page_hash.cell_get(
page_id.fold());
if (!row_purge_poss_sec(cursor->purge_node,
index, tuple)) {
......@@ -1676,15 +1681,12 @@ btr_cur_search_to_nth_level_func(
cursor->flag = BTR_CUR_DELETE_IBUF;
} else {
/* The purge could not be buffered. */
buf_pool.watch_unset(page_id);
buf_pool.watch_unset(page_id, chain);
break;
}
buf_pool.watch_unset(page_id);
buf_pool.watch_unset(page_id, chain);
goto func_exit;
default:
ut_error;
}
/* Insert to the insert/delete buffer did not succeed, we
......@@ -6743,11 +6745,10 @@ static void btr_blob_free(buf_block_t *block, bool all, mtr_t *mtr)
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
mtr->commit();
const ulint fold= page_id.fold();
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
mysql_mutex_lock(&buf_pool.mutex);
if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold))
if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
if (!buf_LRU_free_page(bpage, all) && all && bpage->zip.data)
/* Attempt to deallocate the redundant copy of the uncompressed page
if the whole ROW_FORMAT=COMPRESSED block cannot be deallocted. */
......
......@@ -1090,15 +1090,16 @@ btr_search_guess_on_hash(
buf_block_t* block = buf_pool.block_from_ahi(rec);
if (!ahi_latch) {
page_hash_latch* hash_lock = buf_pool.hash_lock_get(
block->page.id());
hash_lock->read_lock();
buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(
block->page.id().fold());
page_hash_latch&hash_lock = buf_pool.page_hash.lock_get(chain);
hash_lock.read_lock();
if (block->page.state() == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block
from the LRU list of the buffer pool: do not
try to access this page. */
hash_lock->read_unlock();
hash_lock.read_unlock();
goto fail;
}
......@@ -1109,7 +1110,7 @@ btr_search_guess_on_hash(
DBUG_ASSERT(fail || block->page.status != buf_page_t::FREED);
buf_block_buf_fix_inc(block);
hash_lock->read_unlock();
hash_lock.read_unlock();
block->page.set_accessed();
buf_page_make_young_if_needed(&block->page);
......@@ -2209,8 +2210,9 @@ btr_search_hash_table_validate(ulint hash_table_id)
assertion and the comment below) */
const page_id_t id(block->page.id());
if (const buf_page_t* hash_page
= buf_pool.page_hash_get_low(
id, id.fold())) {
= buf_pool.page_hash.get(
id, buf_pool.page_hash.cell_get(
id.fold()))) {
ut_ad(hash_page == &block->page);
goto state_ok;
}
......
/*****************************************************************************
Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2020, MariaDB Corporation.
Copyright (c) 2020, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
......@@ -46,14 +46,15 @@ void Block_hint::buffer_fix_block_if_still_valid()
validate m_block->state() to ensure that the block is not being freed. */
if (m_block)
{
const ulint fold= m_page_id.fold();
page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
auto &cell= buf_pool.page_hash.cell_get(m_page_id.fold());
page_hash_latch &latch= buf_pool.page_hash.lock_get(cell);
latch.read_lock();
if (buf_pool.is_uncompressed(m_block) && m_page_id == m_block->page.id() &&
m_block->page.state() == BUF_BLOCK_FILE_PAGE)
buf_block_buf_fix_inc(m_block);
else
clear();
hash_lock->read_unlock();
latch.read_unlock();
}
}
} // namespace buf
/*****************************************************************************
Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2020, MariaDB Corporation.
Copyright (c) 2018, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -499,9 +499,10 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
ut_ad(space != BUF_BUDDY_STAMP_FREE);
const page_id_t page_id(space, offset);
const ulint fold= page_id.fold();
/* FIXME: we are computing this while holding buf_pool.mutex */
auto &cell= buf_pool.page_hash.cell_get(page_id.fold());
bpage = buf_pool.page_hash_get_low(page_id, fold);
bpage = buf_pool.page_hash.get(page_id, cell);
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
......@@ -546,8 +547,8 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
return false;
}
page_hash_latch *hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
page_hash_latch &hash_lock = buf_pool.page_hash.lock_get(cell);
hash_lock.write_lock();
if (bpage->can_relocate()) {
/* Relocate the compressed page. */
......@@ -558,7 +559,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
memcpy(dst, src, size);
bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
hash_lock->write_unlock();
hash_lock.write_unlock();
buf_buddy_mem_invalid(
reinterpret_cast<buf_buddy_free_t*>(src), i);
......@@ -569,7 +570,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force)
return(true);
}
hash_lock->write_unlock();
hash_lock.write_unlock();
return(false);
}
......
......@@ -1147,7 +1147,7 @@ void buf_pool_t::page_hash_table::create(ulint n)
const size_t size= pad(n_cells) * sizeof *array;
void* v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE);
memset(v, 0, size);
array= static_cast<hash_cell_t*>(v);
array= static_cast<hash_chain*>(v);
}
/** Create the buffer pool.
......@@ -1336,9 +1336,10 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
return(false); /* free list was not enough */
}
const page_id_t id(block->page.id());
page_hash_latch* hash_lock = hash_lock_get(id);
hash_lock->write_lock();
const page_id_t id{block->page.id()};
hash_chain& chain = page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = page_hash.lock_get(chain);
hash_lock.write_lock();
if (block->page.can_relocate()) {
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
......@@ -1382,14 +1383,10 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
}
/* relocate page_hash */
ut_ad(block->page.in_page_hash);
ut_ad(new_block->page.in_page_hash);
const ulint fold = id.fold();
ut_ad(&block->page == page_hash_get_low(id, fold));
ut_d(block->page.in_page_hash = false);
HASH_REPLACE(buf_page_t, hash, &page_hash, fold,
&block->page, &new_block->page);
hash_chain& chain = page_hash.cell_get(id.fold());
ut_ad(&block->page == page_hash.get(id, chain));
buf_pool.page_hash.replace(chain, &block->page,
&new_block->page);
buf_block_modify_clock_inc(block);
static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment");
memset_aligned<4>(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
......@@ -1424,7 +1421,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
new_block = block;
}
hash_lock->write_unlock();
hash_lock.write_unlock();
buf_LRU_block_free_non_file_page(new_block);
return(true); /* free_list was enough */
}
......@@ -2049,13 +2046,14 @@ The caller must relocate bpage->list.
@param dpage destination control block */
static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
{
const ulint fold= bpage->id().fold();
const page_id_t id= bpage->id();
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold());
ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(buf_pool.hash_lock_get(bpage->id())->is_write_locked());
ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked());
ut_a(bpage->io_fix() == BUF_IO_NONE);
ut_a(!bpage->buf_fix_count());
ut_ad(bpage == buf_pool.page_hash_get_low(bpage->id(), fold));
ut_ad(bpage == buf_pool.page_hash.get(id, chain));
ut_ad(!buf_pool.watch_is_sentinel(*bpage));
ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
......@@ -2090,29 +2088,24 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage)
ut_d(CheckInLRUList::validate());
/* relocate buf_pool.page_hash */
ut_ad(bpage->in_page_hash);
ut_ad(dpage->in_page_hash);
ut_d(bpage->in_page_hash= false);
HASH_REPLACE(buf_page_t, hash, &buf_pool.page_hash, fold, bpage, dpage);
buf_pool.page_hash.replace(chain, bpage, dpage);
}
/** Register a watch for a page identifier. The caller must hold an
exclusive page hash latch. The *hash_lock may be released,
relocated, and reacquired.
@param id page identifier
@param hash_lock exclusively held page_hash latch
@param chain hash table chain with exclusively held page_hash
@return a buffer pool block corresponding to id
@retval nullptr if the block was not present, and a watch was installed */
inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
page_hash_latch **hash_lock)
buf_pool_t::hash_chain &chain)
{
const ulint fold= id.fold();
ut_ad(*hash_lock == page_hash.lock_get(fold));
ut_ad((*hash_lock)->is_write_locked());
ut_ad(&chain == &page_hash.cell_get(id.fold()));
ut_ad(page_hash.lock_get(chain).is_write_locked());
retry:
if (buf_page_t *bpage= page_hash_get_low(id, fold))
if (buf_page_t *bpage= page_hash.get(id, chain))
{
if (!watch_is_sentinel(*bpage))
/* The page was loaded meanwhile. */
......@@ -2122,7 +2115,7 @@ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
return nullptr;
}
(*hash_lock)->write_unlock();
page_hash.lock_get(chain).write_unlock();
/* Allocate a watch[] and then try to insert it into the page_hash. */
mysql_mutex_lock(&mutex);
......@@ -2142,28 +2135,23 @@ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
ut_ad(!w->buf_fix_count());
/* w is pointing to watch[], which is protected by mutex.
Normally, buf_page_t::id for objects that are reachable by
page_hash_get_low(id, fold) are protected by hash_lock. */
page_hash.get(id, chain) are protected by hash_lock. */
w->set_state(BUF_BLOCK_ZIP_PAGE);
w->id_= id;
*hash_lock= page_hash.lock_get(fold);
buf_page_t *bpage= page_hash_get_low(id, fold);
buf_page_t *bpage= page_hash.get(id, chain);
if (UNIV_LIKELY_NULL(bpage))
{
w->set_state(BUF_BLOCK_NOT_USED);
*hash_lock= page_hash.lock_get(fold);
(*hash_lock)->write_lock();
page_hash.lock_get(chain).write_lock();
mysql_mutex_unlock(&mutex);
goto retry;
}
(*hash_lock)->write_lock();
page_hash.lock_get(chain).write_lock();
ut_ad(!w->buf_fix_count_);
w->buf_fix_count_= 1;
ut_ad(!w->in_page_hash);
ut_d(w->in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &page_hash, fold, w);
buf_pool.page_hash.append(chain, w);
mysql_mutex_unlock(&mutex);
return nullptr;
}
......@@ -2175,43 +2163,40 @@ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
/** Stop watching whether a page has been read in.
watch_set(id) must have returned nullptr before.
@param id page identifier */
void buf_pool_t::watch_unset(const page_id_t id)
@param id page identifier
@param chain unlocked hash table chain */
void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_not_owner(&mutex);
const ulint fold= id.fold();
page_hash_latch *hash_lock= page_hash.lock<true>(fold);
page_hash_latch &hash_lock= page_hash.lock_get(chain);
hash_lock.write_lock();
/* The page must exist because watch_set() increments buf_fix_count. */
buf_page_t *w= page_hash_get_low(id, fold);
buf_page_t *w= page_hash.get(id, chain);
const auto buf_fix_count= w->buf_fix_count();
ut_ad(buf_fix_count);
const bool must_remove= buf_fix_count == 1 && watch_is_sentinel(*w);
ut_ad(w->in_page_hash);
if (!must_remove)
w->unfix();
hash_lock->write_unlock();
hash_lock.write_unlock();
if (must_remove)
{
const auto old= w;
/* The following is based on buf_pool_t::watch_remove(). */
mysql_mutex_lock(&mutex);
w= page_hash_get_low(id, fold);
page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
w= page_hash.get(id, chain);
hash_lock.write_lock();
if (w->unfix() == 0 && w == old)
{
ut_ad(w->in_page_hash);
ut_d(w->in_page_hash= false);
HASH_DELETE(buf_page_t, hash, &page_hash, fold, w);
page_hash.remove(chain, w);
// Now that the watch is detached from page_hash, release it to watch[].
ut_ad(w->id_ == id);
ut_ad(!w->buf_fix_count());
ut_ad(w->state() == BUF_BLOCK_ZIP_PAGE);
w->set_state(BUF_BLOCK_NOT_USED);
}
hash_lock->write_unlock();
mysql_mutex_unlock(&mutex);
hash_lock.write_unlock();
}
}
......@@ -2233,10 +2218,11 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
++buf_pool.stat.n_page_gets;
const page_id_t page_id(space->id, page);
const ulint fold= page_id.fold();
page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
hash_lock.read_lock();
if (buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold)))
(buf_pool.page_hash.get(page_id, chain)))
{
if (block->page.state() != BUF_BLOCK_FILE_PAGE)
/* FIXME: convert, but avoid buf_zip_decompress() */;
......@@ -2244,7 +2230,7 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
{
buf_block_buf_fix_inc(block);
ut_ad(block->page.buf_fix_count());
hash_lock->read_unlock();
hash_lock.read_unlock();
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
block->lock.x_lock();
......@@ -2254,7 +2240,7 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
}
}
hash_lock->read_unlock();
hash_lock.read_unlock();
}
/** Get read access to a compressed page (usually of type
......@@ -2274,16 +2260,18 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
++buf_pool.stat.n_page_gets;
bool discard_attempted= false;
const ulint fold= page_id.fold();
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
buf_page_t *bpage;
page_hash_latch *hash_lock;
for (;;)
{
lookup:
bpage= buf_pool.page_hash_get_locked<false>(page_id, fold, &hash_lock);
hash_lock.read_lock();
bpage= buf_pool.page_hash.get(page_id, chain);
if (bpage)
break;
hash_lock.read_unlock();
dberr_t err= buf_read_page(page_id, zip_size);
......@@ -2299,13 +2287,11 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
#endif /* UNIV_DEBUG */
}
ut_ad(hash_lock->is_locked());
if (!bpage->zip.data)
{
/* There is no compressed page. */
err_exit:
hash_lock->read_unlock();
hash_lock.read_unlock();
return nullptr;
}
......@@ -2317,9 +2303,9 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
if (!discard_attempted)
{
discard_attempted= true;
hash_lock->read_unlock();
hash_lock.read_unlock();
mysql_mutex_lock(&buf_pool.mutex);
if (buf_page_t *bpage= buf_pool.page_hash_get_low(page_id, fold))
if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
buf_LRU_free_page(bpage, false);
mysql_mutex_unlock(&buf_pool.mutex);
goto lookup;
......@@ -2337,7 +2323,7 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
got_block:
bool must_read= bpage->io_fix() == BUF_IO_READ;
hash_lock->read_unlock();
hash_lock.read_unlock();
DBUG_ASSERT(bpage->status != buf_page_t::FREED);
......@@ -2521,7 +2507,6 @@ buf_page_get_low(
buf_block_t* block;
unsigned access_time;
ulint retries = 0;
const ulint fold = page_id.fold();
ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
ut_ad(!mtr || mtr->is_active());
......@@ -2572,57 +2557,53 @@ buf_page_get_low(
|| ibuf_page_low(page_id, zip_size, FALSE, NULL));
++buf_pool.stat.n_page_gets;
auto& chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
loop:
buf_block_t* fix_block;
block = guess;
page_hash_latch* hash_lock = buf_pool.page_hash.lock<false>(fold);
if (block) {
hash_lock.read_lock();
/* If the guess is a compressed page descriptor that
has been allocated by buf_page_alloc_descriptor(),
it may have been freed by buf_relocate(). */
if (!buf_pool.is_uncompressed(block)
|| page_id != block->page.id()
|| block->page.state() != BUF_BLOCK_FILE_PAGE) {
/* Our guess was bogus or things have changed
since. */
guess = nullptr;
goto lookup;
} else {
ut_ad(!block->page.in_zip_hash);
if (guess && buf_pool.is_uncompressed(guess)
&& page_id == guess->page.id()
&& guess->page.state() == BUF_BLOCK_FILE_PAGE) {
ut_ad(!guess->page.in_zip_hash);
block = guess;
goto have_block;
}
} else {
lookup:
guess = nullptr;
block = reinterpret_cast<buf_block_t*>(
buf_pool.page_hash_get_low(page_id, fold));
}
buf_pool.page_hash.get(page_id, chain));
if (!block || buf_pool.watch_is_sentinel(block->page)) {
hash_lock->read_unlock();
block = nullptr;
}
if (block && !buf_pool.watch_is_sentinel(block->page)) {
have_block:
fix_block = block;
} else {
hash_lock.read_unlock();
fix_block = block = nullptr;
if (UNIV_UNLIKELY(!block)) {
/* Page not in buf_pool: needs to be read from file */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
hash_lock = buf_pool.page_hash.lock<true>(fold);
hash_lock.write_lock();
if (buf_page_t *bpage= buf_pool.watch_set(
page_id, &hash_lock)) {
if (buf_page_t *bpage= buf_pool.watch_set(page_id,
chain)) {
/* We can release hash_lock after we
increment the fix count to make
sure that no state change takes place. */
bpage->fix();
hash_lock->write_unlock();
hash_lock.write_unlock();
block = reinterpret_cast<buf_block_t*>(bpage);
fix_block = block;
goto got_block;
}
hash_lock->write_unlock();
hash_lock.write_unlock();
}
switch (mode) {
......@@ -2714,12 +2695,10 @@ buf_page_get_low(
if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
#endif /* UNIV_DEBUG */
goto loop;
} else {
fix_block = block;
}
fix_block->fix();
hash_lock->read_unlock();
hash_lock.read_unlock();
got_block:
switch (mode) {
......@@ -2811,12 +2790,10 @@ buf_page_get_low(
buf_block_init_low(block);
mysql_mutex_lock(&buf_pool.mutex);
hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
hash_lock.write_lock();
/* Buffer-fixing prevents the page_hash from changing. */
ut_ad(bpage == buf_pool.page_hash_get_low(page_id, fold));
ut_ad(bpage == buf_pool.page_hash.get(page_id, chain));
fix_block->unfix(); /* hash_lock protects us after this */
......@@ -2827,7 +2804,7 @@ buf_page_get_low(
This should be extremely unlikely, for example,
if buf_page_get_zip() was invoked. */
hash_lock->write_unlock();
hash_lock.write_unlock();
buf_LRU_block_free_non_file_page(block);
mysql_mutex_unlock(&buf_pool.mutex);
......@@ -2866,7 +2843,7 @@ buf_page_get_low(
MEM_UNDEFINED(bpage, sizeof *bpage);
mysql_mutex_unlock(&buf_pool.mutex);
hash_lock->write_unlock();
hash_lock.write_unlock();
buf_pool.n_pend_unzip++;
access_time = block->page.is_accessed();
......@@ -2923,17 +2900,16 @@ buf_page_get_low(
space->release();
if (evicted) {
hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
hash_lock.write_lock();
mysql_mutex_unlock(&buf_pool.mutex);
/* We may set the watch, as it would have
been set if the page were not in the
buffer pool in the first place. */
block= reinterpret_cast<buf_block_t*>(
mode == BUF_GET_IF_IN_POOL_OR_WATCH
? buf_pool.watch_set(page_id, &hash_lock)
: buf_pool.page_hash_get_low(page_id, fold));
hash_lock->write_unlock();
? buf_pool.watch_set(page_id, chain)
: buf_pool.page_hash.get(page_id, chain));
hash_lock.write_unlock();
if (block != NULL) {
/* Either the page has been read in or
......@@ -3114,20 +3090,20 @@ buf_page_optimistic_get(
return FALSE;
}
const page_id_t id(block->page.id());
page_hash_latch *hash_lock = buf_pool.hash_lock_get(id);
hash_lock->read_lock();
const page_id_t id{block->page.id()};
buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
hash_lock.read_lock();
if (UNIV_UNLIKELY(id != block->page.id()
|| block->page.state() != BUF_BLOCK_FILE_PAGE
|| block->page.io_fix() != BUF_IO_NONE)) {
hash_lock->read_unlock();
hash_lock.read_unlock();
return(FALSE);
}
buf_block_buf_fix_inc(block);
hash_lock->read_unlock();
hash_lock.read_unlock();
block->page.set_accessed();
......@@ -3194,21 +3170,19 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr)
ut_ad(mtr);
ut_ad(mtr->is_active());
page_hash_latch *hash_lock;
buf_page_t *bpage= buf_pool.page_hash_get_locked<false>(page_id,
page_id.fold(),
&hash_lock);
if (!bpage)
return nullptr;
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
hash_lock.read_lock();
buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || bpage->state() != BUF_BLOCK_FILE_PAGE)
{
hash_lock->read_unlock();
hash_lock.read_unlock();
return nullptr;
}
buf_block_t *block= reinterpret_cast<buf_block_t*>(bpage);
buf_block_buf_fix_inc(block);
hash_lock->read_unlock();
hash_lock.read_unlock();
if (!block->lock.s_lock_try())
{
......@@ -3250,12 +3224,12 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
free_block->initialise(page_id, zip_size, 1);
const ulint fold= page_id.fold();
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
mysql_mutex_lock(&buf_pool.mutex);
loop:
buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold));
(buf_pool.page_hash.get(page_id, chain));
if (block && block->page.in_file() &&
!buf_pool.watch_is_sentinel(block->page))
......@@ -3294,11 +3268,11 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
#endif
break;
case BUF_BLOCK_ZIP_PAGE:
page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
hash_lock.write_lock();
if (block->page.io_fix() != BUF_IO_NONE)
{
hash_lock->write_unlock();
hash_lock.write_unlock();
/* Wait for buf_page_write_complete() to release the I/O fix. */
timespec abstime;
set_timespec_nsec(abstime, 1000000);
......@@ -3315,7 +3289,7 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
free_block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_unzip_LRU_add_block(free_block, FALSE);
hash_lock->write_unlock();
hash_lock.write_unlock();
buf_page_free_descriptor(&block->page);
block= free_block;
buf_block_buf_fix_inc(block);
......@@ -3351,11 +3325,10 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
/* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, false);
page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
hash_lock.write_lock();
block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, &block->page);
buf_pool.page_hash.append(chain, &block->page);
block->lock.x_lock();
if (UNIV_UNLIKELY(zip_size))
......@@ -3364,7 +3337,7 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
release and reacquire buf_pool.mutex, by IO-fixing and X-latching
the block. */
block->page.set_io_fix(BUF_IO_READ);
hash_lock->write_unlock();
hash_lock.write_unlock();
/* buf_pool.mutex may be released and reacquired by
buf_buddy_alloc(). We must defer this operation until
......@@ -3381,7 +3354,7 @@ static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
block->page.set_io_fix(BUF_IO_NONE);
}
else
hash_lock->write_unlock();
hash_lock.write_unlock();
mysql_mutex_unlock(&buf_pool.mutex);
......@@ -3564,32 +3537,6 @@ static void buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t& space)
}
}
/** Release and evict a corrupted page.
@param bpage page that was being read */
ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage)
{
const page_id_t id(bpage->id());
page_hash_latch *hash_lock= hash_lock_get(id);
mysql_mutex_lock(&mutex);
hash_lock->write_lock();
ut_ad(bpage->io_fix() == BUF_IO_READ);
ut_ad(!bpage->oldest_modification());
bpage->set_corrupt_id();
if (bpage->state() == BUF_BLOCK_FILE_PAGE)
reinterpret_cast<buf_block_t*>(bpage)->lock.x_unlock(true);
bpage->io_unfix();
/* remove from LRU and page_hash */
buf_LRU_free_one_page(bpage, id, hash_lock);
mysql_mutex_unlock(&mutex);
ut_d(auto n=) n_pend_reads--;
ut_ad(n > 0);
}
/** Mark a table corrupted.
@param[in] bpage Corrupted page
@param[in] node data file
......@@ -3955,7 +3902,8 @@ void buf_pool_t::validate()
case BUF_BLOCK_FILE_PAGE:
const page_id_t id = block->page.id();
ut_ad(page_hash_get_low(id, id.fold())
ut_ad(page_hash.get(id, page_hash.cell_get(
id.fold()))
== &block->page);
n_lru++;
break;
......@@ -3988,7 +3936,7 @@ void buf_pool_t::validate()
break;
}
const page_id_t id = b->id();
ut_ad(page_hash_get_low(id, id.fold()) == b);
ut_ad(page_hash.get(id, page_hash.cell_get(id.fold())) == b);
}
ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing);
......
......@@ -947,7 +947,9 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru)
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(fold == id.fold());
buf_page_t *bpage= buf_pool.page_hash_get_low(id, fold);
/* FIXME: cell_get() is being invoked while holding buf_pool.mutex */
const buf_page_t *bpage=
buf_pool.page_hash.get(id, buf_pool.page_hash.cell_get(fold));
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
return false;
......@@ -1107,9 +1109,10 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
id_fold= id.fold();
}
const buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id_fold);
mysql_mutex_lock(&buf_pool.mutex);
if (buf_page_t *bpage= buf_pool.page_hash_get_low(id, id_fold))
if (buf_page_t *bpage= buf_pool.page_hash.get(id, chain))
{
ut_ad(bpage->in_file());
/* We avoid flushing 'non-old' blocks in an LRU flush,
......
......@@ -113,7 +113,7 @@ the object will be freed.
@param bpage buffer block
@param id page identifier
@param hash_lock buf_pool.page_hash latch (will be released here)
@param chain locked buf_pool.page_hash chain (will be released here)
@param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed
If a compressed page is freed other compressed pages may be relocated.
......@@ -122,7 +122,8 @@ caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */
static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
page_hash_latch *hash_lock, bool zip);
buf_pool_t::hash_chain &chain,
bool zip);
/** Free a block to buf_pool */
static void buf_LRU_block_free_hashed_page(buf_block_t *block)
......@@ -807,9 +808,9 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
/* We must hold an exclusive hash_lock to prevent
bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */
const ulint fold = id.fold();
page_hash_latch* hash_lock = buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
hash_lock.write_lock();
lsn_t oldest_modification = bpage->oldest_modification_acquire();
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
......@@ -839,7 +840,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
} else if (oldest_modification
&& bpage->state() != BUF_BLOCK_FILE_PAGE) {
func_exit:
hash_lock->write_unlock();
hash_lock.write_unlock();
return(false);
} else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
......@@ -859,7 +860,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
ut_ad(bpage->can_relocate());
if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) {
if (!buf_LRU_block_remove_hashed(bpage, id, chain, zip)) {
ut_ad(!b);
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
return(true);
......@@ -875,7 +876,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
if (UNIV_LIKELY_NULL(b)) {
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
ut_ad(!buf_pool.page_hash_get_low(id, fold));
ut_ad(!buf_pool.page_hash.get(id, chain));
ut_ad(b->zip_size());
/* The field in_LRU_list of
......@@ -894,8 +895,10 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
ut_ad(!b->in_zip_hash);
ut_ad(b->in_LRU_list);
ut_ad(b->in_page_hash);
ut_d(b->in_page_hash = false);
b->hash = nullptr;
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, b);
buf_pool.page_hash.append(chain, b);
/* Insert b where bpage was in the LRU list. */
if (prev_b) {
......@@ -951,9 +954,9 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
decompressing the block while we release
hash_lock. */
b->set_io_fix(BUF_IO_PIN);
hash_lock->write_unlock();
hash_lock.write_unlock();
} else if (!zip) {
hash_lock->write_unlock();
hash_lock.write_unlock();
}
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
......@@ -1063,7 +1066,7 @@ the object will be freed.
@param bpage buffer block
@param id page identifier
@param hash_lock buf_pool.page_hash latch (will be released here)
@param chain locked buf_pool.page_hash chain (will be released here)
@param zip whether bpage->zip of BUF_BLOCK_FILE_PAGE should be freed
If a compressed page is freed other compressed pages may be relocated.
......@@ -1072,10 +1075,11 @@ caller needs to free the page to the free list
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
this case the block is already returned to the buddy allocator. */
static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
page_hash_latch *hash_lock, bool zip)
buf_pool_t::hash_chain &chain,
bool zip)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(hash_lock->is_write_locked());
ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked());
ut_a(bpage->io_fix() == BUF_IO_NONE);
ut_a(!bpage->buf_fix_count());
......@@ -1155,7 +1159,8 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
}
ut_ad(!bpage->in_zip_hash);
HASH_DELETE(buf_page_t, hash, &buf_pool.page_hash, id.fold(), bpage);
buf_pool.page_hash.remove(chain, bpage);
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
switch (bpage->state()) {
case BUF_BLOCK_ZIP_PAGE:
......@@ -1165,7 +1170,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
ut_a(bpage->zip.ssize);
ut_ad(!bpage->oldest_modification());
hash_lock->write_unlock();
hash_lock.write_unlock();
buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data, bpage->zip_size());
......@@ -1209,7 +1214,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
and by the time we'll release it in the caller we'd
have inserted the compressed only descriptor in the
page_hash. */
hash_lock->write_unlock();
hash_lock.write_unlock();
if (bpage->zip.data) {
/* Free the compressed page. */
......@@ -1240,20 +1245,38 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
return(false);
}
/** Remove one page from LRU list and put it to free list.
@param bpage file page to be freed
@param id page identifier
@param hash_lock buf_pool.page_hash latch (will be released here) */
void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id,
page_hash_latch *hash_lock)
/** Release and evict a corrupted page.
@param bpage page that was being read */
ATTRIBUTE_COLD void buf_pool_t::corrupted_evict(buf_page_t *bpage)
{
const page_id_t id(bpage->id());
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
mysql_mutex_lock(&mutex);
hash_lock.write_lock();
ut_ad(bpage->io_fix() == BUF_IO_READ);
ut_ad(!bpage->oldest_modification());
bpage->set_corrupt_id();
bpage->io_unfix();
if (bpage->state() == BUF_BLOCK_FILE_PAGE)
reinterpret_cast<buf_block_t*>(bpage)->lock.x_unlock(true);
while (bpage->buf_fix_count())
/* Wait for other threads to release the fix count
before releasing the bpage from LRU list. */
(void) LF_BACKOFF();
if (buf_LRU_block_remove_hashed(bpage, id, hash_lock, true))
/* remove from LRU and page_hash */
if (buf_LRU_block_remove_hashed(bpage, id, chain, true))
buf_LRU_block_free_hashed_page(reinterpret_cast<buf_block_t*>(bpage));
mysql_mutex_unlock(&mutex);
ut_d(auto n=) n_pend_reads--;
ut_ad(n > 0);
}
/** Update buf_pool.LRU_old_ratio.
......
......@@ -50,17 +50,17 @@ i/o-fixed buffer blocks */
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
@param watch sentinel */
inline void buf_pool_t::watch_remove(buf_page_t *watch)
@param watch sentinel
@param chain locked hash table chain */
inline void buf_pool_t::watch_remove(buf_page_t *watch,
buf_pool_t::hash_chain &chain)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(hash_lock_get(watch->id())->is_write_locked());
ut_ad(page_hash.lock_get(chain).is_write_locked());
ut_a(watch_is_sentinel(*watch));
if (watch->buf_fix_count())
{
ut_ad(watch->in_page_hash);
ut_d(watch->in_page_hash= false);
HASH_DELETE(buf_page_t, hash, &page_hash, watch->id().fold(), watch);
page_hash.remove(chain, watch);
watch->set_buf_fix_count(0);
}
ut_ad(!watch->in_page_hash);
......@@ -114,11 +114,12 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
block->lock.x_lock(true);
}
const ulint fold= page_id.fold();
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
mysql_mutex_lock(&buf_pool.mutex);
buf_page_t *hash_page= buf_pool.page_hash_get_low(page_id, fold);
buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain);
if (hash_page && !buf_pool.watch_is_sentinel(*hash_page))
{
/* The page is already in the buffer pool. */
......@@ -135,8 +136,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
bpage= &block->page;
/* Insert into the hash table of file pages */
page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
hash_lock.write_lock();
if (hash_page)
{
......@@ -144,18 +144,16 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
auto buf_fix_count= hash_page->buf_fix_count();
ut_a(buf_fix_count > 0);
block->page.add_buf_fix_count(buf_fix_count);
buf_pool.watch_remove(hash_page);
buf_pool.watch_remove(hash_page, chain);
}
block->page.set_io_fix(BUF_IO_READ);
block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
hash_lock->write_unlock();
buf_pool.page_hash.append(chain, &block->page);
hash_lock.write_unlock();
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, true/* to old blocks */);
buf_LRU_add_block(&block->page, true/* to old blocks */);
if (UNIV_UNLIKELY(zip_size))
{
......@@ -188,7 +186,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru))
{
hash_page= buf_pool.page_hash_get_low(page_id, fold);
hash_page= buf_pool.page_hash.get(page_id, chain);
if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page)))
{
......@@ -206,8 +204,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
bpage->init(BUF_BLOCK_ZIP_PAGE, page_id);
page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
hash_lock->write_lock();
hash_lock.write_lock();
if (hash_page)
{
......@@ -215,14 +212,12 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_pool_t::watch_unset() is executing concurrently,
waiting for buf_pool.mutex, which we are holding. */
bpage->add_buf_fix_count(hash_page->buf_fix_count());
buf_pool.watch_remove(hash_page);
buf_pool.watch_remove(hash_page, chain);
}
ut_ad(!bpage->in_page_hash);
ut_d(bpage->in_page_hash= true);
HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
buf_pool.page_hash.append(chain, bpage);
bpage->set_io_fix(BUF_IO_READ);
hash_lock->write_unlock();
hash_lock.write_unlock();
/* The block must be put to the LRU list, to the old blocks.
The zip size is already set into the page zip */
......@@ -408,11 +403,12 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
for (page_id_t i= low; i < high; ++i)
{
const ulint fold= i.fold();
page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
const buf_page_t *bpage= buf_pool.page_hash_get_low(i, fold);
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
page_hash_latch &latch= buf_pool.page_hash.lock_get(chain);
latch.read_lock();
const buf_page_t *bpage= buf_pool.page_hash.get(i, chain);
bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage);
hash_lock->read_unlock();
latch.read_unlock();
if (found && !--count)
goto read_ahead;
}
......@@ -608,9 +604,10 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
unsigned prev_accessed= 0;
for (page_id_t i= low; i != high_1; ++i)
{
const ulint fold= i.fold();
page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
const buf_page_t* bpage= buf_pool.page_hash_get_low(i, fold);
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
hash_lock.read_lock();
const buf_page_t* bpage= buf_pool.page_hash.get(i, chain);
if (i == page_id)
{
/* Read the natural predecessor and successor page addresses from
......@@ -621,7 +618,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
if (!bpage)
{
hard_fail:
hash_lock->read_unlock();
hash_lock.read_unlock();
goto fail;
}
const byte *f;
......@@ -661,7 +658,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
else if (!bpage)
{
failed:
hash_lock->read_unlock();
hash_lock.read_unlock();
if (--count)
continue;
goto fail;
......@@ -681,7 +678,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
prev_accessed= accessed;
if (fail)
goto failed;
hash_lock->read_unlock();
hash_lock.read_unlock();
}
/* If we got this far, read-ahead can be sensible: do it */
......
......@@ -1052,10 +1052,10 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
if (UNIV_UNLIKELY(space->is_being_truncated))
{
const page_id_t page_id{space->id, offset};
const ulint fold= page_id.fold();
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
mysql_mutex_lock(&buf_pool.mutex);
block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold));
(buf_pool.page_hash.get(page_id, chain));
if (block && block->page.oldest_modification() <= 1)
block= nullptr;
mysql_mutex_unlock(&buf_pool.mutex);
......
......@@ -3310,7 +3310,8 @@ ibuf_insert_low(
/* We check if the index page is suitable for buffered entries */
if (buf_pool.page_hash_contains(page_id)) {
if (buf_pool.page_hash_contains(
page_id, buf_pool.page_hash.cell_get(page_id.fold()))) {
commit_exit:
ibuf_mtr_commit(&bitmap_mtr);
goto fail_exit;
......@@ -3556,7 +3557,8 @@ ibuf_insert(
that the issuer of IBUF_OP_DELETE has called
buf_pool_t::watch_set(). */
if (buf_pool.page_hash_contains<true>(page_id)) {
if (buf_pool.page_hash_contains<true>(
page_id, buf_pool.page_hash.cell_get(page_id.fold()))) {
/* A buffer pool watch has been set or the
page has been read into the buffer pool.
Do not buffer the request. If a purge operation
......
......@@ -36,7 +36,6 @@ Created 11/5/1995 Heikki Tuuri
#include "assume_aligned.h"
#include "buf0types.h"
#ifndef UNIV_INNOCHECKSUM
#include "hash0hash.h"
#include "ut0byte.h"
#include "page0types.h"
#include "log0log.h"
......@@ -169,30 +168,10 @@ operator<<(
const page_id_t page_id);
#ifndef UNIV_INNOCHECKSUM
/*********************************************************************//**
Gets the current size of buffer buf_pool in bytes.
@return size in bytes */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
MY_ATTRIBUTE((malloc));
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
MY_ATTRIBUTE((nonnull));
# define buf_pool_get_curr_size() srv_buf_pool_curr_size
# define buf_page_alloc_descriptor() \
static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof(buf_page_t)))
# define buf_page_free_descriptor(bpage) ut_free(bpage)
/** Allocate a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
......@@ -349,25 +328,6 @@ void buf_page_make_young(buf_page_t *bpage);
@param[in,out] mtr mini-transaction */
void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr);
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
UNIV_INLINE
unsigned
buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
UNIV_INLINE
unsigned
buf_block_get_freed_page_clock(
/*===========================*/
const buf_block_t* block) /*!< in: block */
MY_ATTRIBUTE((warn_unused_result));
/** Determine if a block is still close enough to the MRU end of the LRU list
meaning that it is not in danger of getting evicted and also implying
that it has been accessed recently.
......@@ -665,7 +625,7 @@ class buf_page_t
/* @{ */
public: // FIXME: fix fil_iterate()
/** Page id. Protected by buf_pool.hash_lock_get(id) when
/** Page id. Protected by buf_pool.page_hash.lock_get() when
the page is in buf_pool.page_hash. */
page_id_t id_;
private:
......@@ -687,13 +647,13 @@ class buf_page_t
Atomic_relaxed<buf_io_fix> io_fix_;
/** Block state. @see in_file().
State transitions between in_file() states and to
BUF_BLOCK_REMOVE_HASH are protected by buf_pool.hash_lock_get(id)
BUF_BLOCK_REMOVE_HASH are protected by buf_pool.page_hash.lock_get()
when the block is in buf_pool.page_hash.
Other transitions when in_LRU_list are protected by buf_pool.mutex. */
buf_page_state state_;
public:
/** buf_pool.page_hash link; protected by buf_pool.hash_lock_get(id) */
/** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */
buf_page_t *hash;
/* @} */
page_zip_des_t zip; /*!< compressed page; zip.data
......@@ -801,7 +761,6 @@ class buf_page_t
ut_d(in_free_list= false);
ut_d(in_LRU_list= false);
ut_d(in_page_hash= false);
HASH_INVALIDATE(this, hash);
}
/** Initialize some more fields */
......@@ -819,6 +778,7 @@ class buf_page_t
init();
id_= id;
buf_fix_count_= buf_fix_count;
hash= nullptr;
}
public:
......@@ -1347,7 +1307,14 @@ class buf_pool_t
inline const buf_block_t *not_freed() const;
#endif /* UNIV_DEBUG */
};
public:
/** Hash cell chain in page_hash_table */
struct hash_chain
{
/** pointer to the first block */
buf_page_t *first;
};
private:
/** Withdraw blocks from the buffer pool until meeting withdraw_target.
@return whether retry is needed */
inline bool withdraw_blocks();
......@@ -1509,89 +1476,29 @@ class buf_pool_t
return is_block_field(reinterpret_cast<const void*>(block));
}
/** Get the page_hash latch for a page */
page_hash_latch *hash_lock_get(const page_id_t id) const
{
return page_hash.lock_get(id.fold());
}
/** Look up a block descriptor.
@param id page identifier
@param fold id.fold()
@return block descriptor, possibly in watch[]
@retval nullptr if not found*/
buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold)
{
ut_ad(id.fold() == fold);
#ifdef SAFE_MUTEX
DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
page_hash.lock_get(fold)->is_locked());
#endif /* SAFE_MUTEX */
buf_page_t *bpage;
/* Look for the page in the hash table */
HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage,
ut_ad(bpage->in_page_hash), id == bpage->id());
return bpage;
}
private:
/** Look up a block descriptor.
@tparam exclusive whether the latch is to be acquired exclusively
public:
/** @return whether the buffer pool contains a page
@tparam watch whether to allow watch_is_sentinel()
@param page_id page identifier
@param fold page_id.fold()
@param hash_lock pointer to the acquired latch (to be released by caller)
@return pointer to the block
@retval nullptr if no block was found; !lock || !*lock will also hold */
template<bool exclusive,bool watch>
buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
page_hash_latch **hash_lock)
{
ut_ad(hash_lock || !exclusive);
page_hash_latch *latch= page_hash.lock<exclusive>(fold);
buf_page_t *bpage= page_hash_get_low(page_id, fold);
@param chain hash table chain for page_id.fold() */
template<bool watch= false>
bool page_hash_contains(const page_id_t page_id, hash_chain &chain)
{
page_hash_latch &latch= page_hash.lock_get(chain);
latch.read_lock();
buf_page_t *bpage= page_hash.get(page_id, chain);
if (!bpage || watch_is_sentinel(*bpage))
{
if (exclusive)
latch->write_unlock();
else
latch->read_unlock();
if (hash_lock)
*hash_lock= nullptr;
latch.read_unlock();
return watch ? bpage : nullptr;
}
ut_ad(bpage->in_file());
ut_ad(page_id == bpage->id());
if (hash_lock)
*hash_lock= latch; /* to be released by the caller */
else if (exclusive)
latch->write_unlock();
else
latch->read_unlock();
latch.read_unlock();
return bpage;
}
public:
/** Look up a block descriptor.
@tparam exclusive whether the latch is to be acquired exclusively
@param page_id page identifier
@param fold page_id.fold()
@param hash_lock pointer to the acquired latch (to be released by caller)
@return pointer to the block
@retval nullptr if no block was found; !lock || !*lock will also hold */
template<bool exclusive>
buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
page_hash_latch **hash_lock)
{ return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); }
/** @return whether the buffer pool contains a page
@tparam watch whether to allow watch_is_sentinel()
@param page_id page identifier */
template<bool watch= false>
bool page_hash_contains(const page_id_t page_id)
{
return page_hash_get_locked<false,watch>(page_id, page_id.fold(), nullptr);
}
/** Determine if a block is a sentinel for a buffer pool watch.
@param bpage page descriptor
......@@ -1600,7 +1507,8 @@ class buf_pool_t
{
#ifdef SAFE_MUTEX
DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
hash_lock_get(bpage.id())->is_locked());
page_hash.lock_get(page_hash.cell_get(bpage.id().fold())).
is_locked());
#endif /* SAFE_MUTEX */
ut_ad(bpage.in_file());
......@@ -1622,12 +1530,13 @@ class buf_pool_t
@return whether the page was read to the buffer pool */
bool watch_occurred(const page_id_t id)
{
const ulint fold= id.fold();
page_hash_latch *hash_lock= page_hash.lock<false>(fold);
hash_chain &chain= page_hash.cell_get(id.fold());
page_hash_latch &latch= page_hash.lock_get(chain);
latch.read_lock();
/* The page must exist because watch_set() increments buf_fix_count. */
buf_page_t *bpage= page_hash_get_low(id, fold);
buf_page_t *bpage= page_hash.get(id, chain);
const bool is_sentinel= watch_is_sentinel(*bpage);
hash_lock->read_unlock();
latch.read_unlock();
return !is_sentinel;
}
......@@ -1635,22 +1544,23 @@ class buf_pool_t
exclusive page hash latch. The *hash_lock may be released,
relocated, and reacquired.
@param id page identifier
@param hash_lock exclusively held page_hash latch
@param chain hash table chain with exclusively held page_hash
@return a buffer pool block corresponding to id
@retval nullptr if the block was not present, and a watch was installed */
inline buf_page_t *watch_set(const page_id_t id,
page_hash_latch **hash_lock);
inline buf_page_t *watch_set(const page_id_t id, hash_chain &chain);
/** Stop watching whether a page has been read in.
watch_set(id) must have returned nullptr before.
@param id page identifier */
void watch_unset(const page_id_t id);
@param id page identifier
@param chain unlocked hash table chain */
void watch_unset(const page_id_t id, hash_chain &chain);
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
@param watch sentinel */
inline void watch_remove(buf_page_t *watch);
@param watch sentinel
@param chain locked hash table chain */
inline void watch_remove(buf_page_t *watch, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
bool running_out() const
......@@ -1728,7 +1638,7 @@ class buf_pool_t
/** read-ahead request size in pages */
Atomic_counter<uint32_t> read_ahead_area;
/** Hash table with singly-linked overflow lists. @see hash_table_t */
/** Hash table with singly-linked overflow lists */
struct page_hash_table
{
static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "less than 64 bytes");
......@@ -1744,7 +1654,7 @@ class buf_pool_t
/** number of payload elements in array[] */
Atomic_relaxed<ulint> n_cells;
/** the hash table, with pad(n_cells) elements, aligned to L1 cache size */
hash_cell_t *array;
hash_chain *array;
/** Create the hash table.
@param n the lower bound of n_cells */
......@@ -1771,32 +1681,72 @@ class buf_pool_t
{
return pad(hash(fold, n_cells));
}
/** Get a page_hash latch. */
page_hash_latch *lock_get(ulint fold, ulint n) const
public:
/** @return the latch covering a hash table chain */
static page_hash_latch &lock_get(hash_chain &chain)
{
static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH),
"must be one less than a power of 2");
return reinterpret_cast<page_hash_latch*>
(&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]);
const size_t addr= reinterpret_cast<size_t>(&chain);
ut_ad(addr & (ELEMENTS_PER_LATCH * sizeof chain));
return *reinterpret_cast<page_hash_latch*>
(addr & ~(ELEMENTS_PER_LATCH * sizeof chain));
}
public:
/** Get a page_hash latch. */
page_hash_latch *lock_get(ulint fold) const
{ return lock_get(fold, n_cells); }
/** Acquire an array latch.
@tparam exclusive whether the latch is to be acquired exclusively
@param fold hash bucket key */
template<bool exclusive> page_hash_latch *lock(ulint fold)
{
page_hash_latch *latch= lock_get(fold, n_cells);
if (exclusive)
latch->write_lock();
else
latch->read_lock();
return latch;
/** Get a hash table slot. */
hash_chain &cell_get(ulint fold) const
{ return array[calc_hash(fold, n_cells)]; }
/** Append a block descriptor to a hash bucket chain. */
void append(hash_chain &chain, buf_page_t *bpage)
{
ut_ad(!bpage->in_page_hash);
ut_ad(!bpage->hash);
ut_d(bpage->in_page_hash= true);
buf_page_t **prev= &chain.first;
while (*prev)
{
ut_ad((*prev)->in_page_hash);
prev= &(*prev)->hash;
}
*prev= bpage;
}
/** Remove a block descriptor from a hash bucket chain. */
void remove(hash_chain &chain, buf_page_t *bpage)
{
ut_ad(bpage->in_page_hash);
buf_page_t **prev= &chain.first;
while (*prev != bpage)
{
ut_ad((*prev)->in_page_hash);
prev= &(*prev)->hash;
}
*prev= bpage->hash;
ut_d(bpage->in_page_hash= false);
bpage->hash= nullptr;
}
/** Replace a block descriptor with another. */
void replace(hash_chain &chain, buf_page_t *old, buf_page_t *bpage)
{
ut_ad(old->in_page_hash);
ut_ad(bpage->in_page_hash);
ut_d(old->in_page_hash= false);
ut_ad(bpage->hash == old->hash);
old->hash= nullptr;
buf_page_t **prev= &chain.first;
while (*prev != old)
{
ut_ad((*prev)->in_page_hash);
prev= &(*prev)->hash;
}
*prev= bpage;
}
/** Look up a page in a hash bucket chain. */
inline buf_page_t *get(const page_id_t id, const hash_chain &chain) const;
/** Exclusively aqcuire all latches */
inline void write_lock_all();
......@@ -2032,6 +1982,23 @@ class buf_pool_t
/** The InnoDB buffer pool */
extern buf_pool_t buf_pool;
inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id,
const hash_chain &chain)
const
{
#ifdef SAFE_MUTEX
DBUG_ASSERT(mysql_mutex_is_owner(&buf_pool.mutex) ||
lock_get(const_cast<hash_chain&>(chain)).is_locked());
#endif /* SAFE_MUTEX */
for (buf_page_t *bpage= chain.first; bpage; bpage= bpage->hash)
{
ut_ad(bpage->in_page_hash);
if (bpage->id() == id)
return bpage;
}
return nullptr;
}
#ifdef SUX_LOCK_GENERIC
inline void page_hash_latch::read_lock()
{
......@@ -2070,18 +2037,17 @@ inline void buf_page_t::set_state(buf_page_state state)
we are holding the hash_lock. */
break;
case BUF_BLOCK_MEMORY:
if (!in_file()) break;
/* fall through */
case BUF_BLOCK_FILE_PAGE:
ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
break;
case BUF_BLOCK_NOT_USED:
if (!in_file()) break;
/* fall through */
break;
case BUF_BLOCK_ZIP_PAGE:
ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() ||
(this >= &buf_pool.watch[0] &&
this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]));
if (this >= &buf_pool.watch[0] &&
this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)])
break;
/* fall through */
case BUF_BLOCK_FILE_PAGE:
ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())).
is_write_locked());
break;
}
#endif
......@@ -2113,7 +2079,8 @@ inline void buf_page_t::set_corrupt_id()
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_FILE_PAGE:
ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())).
is_write_locked());
break;
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_MEMORY:
......
......@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2014, 2020, MariaDB Corporation.
Copyright (c) 2014, 2021, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
......@@ -37,42 +37,6 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0rea.h"
#include "fsp0types.h"
/*********************************************************************//**
Gets the current size of buffer buf_pool in bytes.
@return size in bytes */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void)
/*========================*/
{
return(srv_buf_pool_curr_size);
}
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
UNIV_INLINE
unsigned
buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
{
/* This is sometimes read without holding buf_pool.mutex. */
return(bpage->freed_page_clock);
}
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
UNIV_INLINE
unsigned
buf_block_get_freed_page_clock(
/*===========================*/
const buf_block_t* block) /*!< in: block */
{
return(buf_page_get_freed_page_clock(&block->page));
}
/** Determine if a block is still close enough to the MRU end of the LRU list
meaning that it is not in danger of getting evicted and also implying
that it has been accessed recently.
......@@ -154,35 +118,6 @@ ok:
}
#endif /* UNIV_DEBUG */
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function.
@return: the allocated descriptor. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
{
buf_page_t* bpage;
bpage = (buf_page_t*) ut_zalloc_nokey(sizeof *bpage);
ut_ad(bpage);
MEM_UNDEFINED(bpage, sizeof *bpage);
return(bpage);
}
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
{
ut_free(bpage);
}
/** Allocate a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
inline buf_block_t *buf_block_alloc()
......
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -24,11 +24,10 @@ The database buffer pool LRU replacement algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0lru_h
#define buf0lru_h
#pragma once
#include "ut0byte.h"
#include "buf0types.h"
#include "hash0hash.h"
// Forward declaration
struct trx_t;
......@@ -132,14 +131,6 @@ policy at the end of each interval. */
void
buf_LRU_stat_update();
/** Remove one page from LRU list and put it to free list.
@param bpage file page to be freed
@param id page identifier
@param hash_lock buf_pool.page_hash latch (will be released here) */
void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id,
page_hash_latch *hash_lock)
MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG
/** Validate the LRU list. */
void buf_LRU_validate();
......@@ -200,5 +191,3 @@ Increments the I/O counter in buf_LRU_stat_cur. */
/********************************************************************//**
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
#endif
......@@ -181,12 +181,11 @@ enum rw_lock_type_t
#ifdef SUX_LOCK_GENERIC
class page_hash_latch : public rw_lock
{
public:
/** Wait for a shared lock */
void read_lock_wait();
/** Wait for an exclusive lock */
void write_lock_wait();
public:
/** Acquire a shared lock */
inline void read_lock();
/** Acquire an exclusive lock */
......
......@@ -117,18 +117,6 @@ do {\
HASH_INVALIDATE(DATA, NAME);\
} while (0)
#define HASH_REPLACE(TYPE, NAME, TABLE, FOLD, DATA_OLD, DATA_NEW) \
do { \
(DATA_NEW)->NAME = (DATA_OLD)->NAME; \
\
hash_cell_t& cell3333 \
= (TABLE)->array[(TABLE)->calc_hash(FOLD)]; \
TYPE** struct3333 = (TYPE**)&cell3333.node; \
while (*struct3333 != DATA_OLD) { \
struct3333 = &((*struct3333)->NAME); \
} \
*struct3333 = DATA_NEW; \
} while (0)
/*******************************************************************//**
Gets the first struct in a hash chain, NULL if none. */
......
......@@ -841,6 +841,8 @@ constexpr const char* const auto_event_names[] =
"buf0buf",
"buf0dblwr",
"buf0dump",
"buf0lru",
"buf0rea",
"dict0dict",
"dict0mem",
"dict0stats",
......
......@@ -2862,7 +2862,9 @@ static void recv_read_in_area(page_id_t page_id)
&& i->first.space() == page_id.space()
&& i->first.page_no() < up_limit; i++) {
if (i->second.state == page_recv_t::RECV_NOT_PROCESSED
&& !buf_pool.page_hash_contains(i->first)) {
&& !buf_pool.page_hash_contains(
i->first,
buf_pool.page_hash.cell_get(i->first.fold()))) {
i->second.state = page_recv_t::RECV_BEING_READ;
*p++ = i->first.page_no();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment