Commit a4cda66e authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-33588 buf::Block_hint is a performance hog

In so-called optimistic buffer pool lookups, we must not
dereference a block descriptor before we have made sure that
it is accessible. While buf_pool_t::resize() is running,
block descriptors could become invalid.

The buf::Block_hint class was essentially duplicating
a buf_pool.page_hash lookup that was executed in
buf_page_optimistic_get() anyway. For better locality of
reference, we had better execute that lookup only once.

buf_page_optimistic_fix(): Prepare for buf_page_optimistic_get().
This basically is a simpler version of Buf::Block_hint.

buf_page_optimistic_get(): Assume that buf_page_optimistic_fix()
has been called and the page identifier verified. Should the block
be evicted, the block->modify_clock will be invalidated; we do not
need to check the block->page.id() again. It suffices to check
the block->modify_clock after acquiring the page latch.

btr_pcur_t::old_page_id: Store the expected page identifier
for buf_page_optimistic_fix().

btr_pcur_t::block_when_stored: Remove. This was duplicating
page_cur_t::block.

btr_pcur_optimistic_latch_leaves(): Remove redundant parameters.
First, invoke buf_page_optimistic_fix() on the requested page.
If needed, acquire a latch on the left page. Finally, acquire
a latch on the target page and recheck the block->modify_clock.
If the page had been freed while we were not holding a page latch,
fall back to the slow path. Validate the FIL_PAGE_PREV after
acquiring a latch on the current page. The block->modify_clock
is only being incremented when records are deleted or pages
reorganized or evicted; it does not guard against concurrent
page splits.

Reviewed by: Debarun Banerjee
parent d90a2b44
......@@ -143,7 +143,6 @@ SET(INNOBASE_SOURCES
btr/btr0pcur.cc
btr/btr0sea.cc
btr/btr0defragment.cc
buf/buf0block_hint.cc
buf/buf0buddy.cc
buf/buf0buf.cc
buf/buf0dblwr.cc
......
......@@ -264,6 +264,8 @@ btr_root_block_get(
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
{
ut_ad(mode != RW_NO_LATCH);
if (!index->table || !index->table->space)
{
*err= DB_TABLESPACE_NOT_FOUND;
......@@ -285,8 +287,7 @@ btr_root_block_get(
if (UNIV_LIKELY(block != nullptr))
{
if (UNIV_UNLIKELY(mode == RW_NO_LATCH));
else if (!!page_is_comp(block->page.frame) !=
if (!!page_is_comp(block->page.frame) !=
index->table->not_redundant() ||
btr_page_get_index_id(block->page.frame) != index->id ||
!fil_page_index_page_check(block->page.frame) ||
......@@ -568,6 +569,31 @@ btr_page_alloc_for_ibuf(
return new_block;
}
static MY_ATTRIBUTE((nonnull, warn_unused_result))
/** Acquire a latch on the index root page for allocating or freeing pages.
@param index index tree
@param mtr mini-transaction
@param err error code
@return root page
@retval nullptr if an error occurred */
buf_block_t *btr_root_block_sx(dict_index_t *index, mtr_t *mtr, dberr_t *err)
{
buf_block_t *root=
mtr->get_already_latched(page_id_t{index->table->space_id, index->page},
MTR_MEMO_PAGE_SX_FIX);
if (!root)
{
root= btr_root_block_get(index, RW_SX_LATCH, mtr, err);
if (UNIV_UNLIKELY(!root))
return root;
}
#ifdef BTR_CUR_HASH_ADAPT
else
ut_ad(!root->index || !root->index->freed());
#endif
return root;
}
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
......@@ -589,21 +615,9 @@ btr_page_alloc_low(
page should be initialized. */
dberr_t* err) /*!< out: error code */
{
const auto savepoint= mtr->get_savepoint();
buf_block_t *root= btr_root_block_get(index, RW_NO_LATCH, mtr, err);
buf_block_t *root= btr_root_block_sx(index, mtr, err);
if (UNIV_UNLIKELY(!root))
return root;
const bool have_latch= mtr->have_u_or_x_latch(*root);
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!have_latch || !root->index || !root->index->freed());
#endif
mtr->rollback_to_savepoint(savepoint);
if (!have_latch &&
UNIV_UNLIKELY(!(root= btr_root_block_get(index, RW_SX_LATCH, mtr, err))))
return root;
fseg_header_t *seg_header= root->page.frame +
(level ? PAGE_HEADER + PAGE_BTR_SEG_TOP : PAGE_HEADER + PAGE_BTR_SEG_LEAF);
return fseg_alloc_free_page_general(seg_header, hint_page_no, file_direction,
......@@ -696,24 +710,16 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
fil_space_t *space= index->table->space;
dberr_t err;
const auto savepoint= mtr->get_savepoint();
if (buf_block_t *root= btr_root_block_get(index, RW_NO_LATCH, mtr, &err))
if (buf_block_t *root= btr_root_block_sx(index, mtr, &err))
{
const bool have_latch= mtr->have_u_or_x_latch(*root);
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!have_latch || !root->index || !root->index->freed());
#endif
mtr->rollback_to_savepoint(savepoint);
if (have_latch ||
(root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err)))
err= fseg_free_page(&root->page.frame[blob ||
page_is_leaf(block->page.frame)
? PAGE_HEADER + PAGE_BTR_SEG_LEAF
: PAGE_HEADER + PAGE_BTR_SEG_TOP],
space, page, mtr, space_latched);
}
if (err == DB_SUCCESS)
buf_page_free(space, page, mtr);
}
/* The page was marked free in the allocation bitmap, but it
should remain exclusively latched until mtr_t::commit() or until it
......
......@@ -837,7 +837,7 @@ PageBulk::release()
m_block->page.fix();
/* No other threads can modify this block. */
m_modify_clock = buf_block_get_modify_clock(m_block);
m_modify_clock = m_block->modify_clock;
m_mtr.commit();
}
......
......@@ -935,7 +935,7 @@ static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode)
return PAGE_CUR_LE;
}
static MY_ATTRIBUTE((nonnull))
MY_ATTRIBUTE((nonnull,warn_unused_result))
/** Acquire a latch on the previous page without violating the latching order.
@param block index page
@param page_id page identifier with valid space identifier
......@@ -946,7 +946,8 @@ static MY_ATTRIBUTE((nonnull))
@retval 0 if an error occurred
@retval 1 if the page could be latched in the wrong order
@retval -1 if the latch on block was temporarily released */
int btr_latch_prev(buf_block_t *block, page_id_t page_id, ulint zip_size,
static int btr_latch_prev(buf_block_t *block, page_id_t page_id,
ulint zip_size,
rw_lock_type_t rw_latch, mtr_t *mtr, dberr_t *err)
{
ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
......@@ -955,47 +956,80 @@ int btr_latch_prev(buf_block_t *block, page_id_t page_id, ulint zip_size,
const auto prev_savepoint= mtr->get_savepoint();
ut_ad(block == mtr->at_savepoint(prev_savepoint - 1));
page_id.set_page_no(btr_page_get_prev(block->page.frame));
const page_t *const page= block->page.frame;
page_id.set_page_no(btr_page_get_prev(page));
/* We are holding a latch on the current page.
We will start by buffer-fixing the left sibling. Waiting for a latch
on it while holding a latch on the current page could lead to a
deadlock, because another thread could hold that latch and wait for
a right sibling page latch (the current page).
If there is a conflict, we will temporarily release our latch on the
current block while waiting for a latch on the left sibling. The
buffer-fixes on both blocks will prevent eviction. */
retry:
buf_block_t *prev= buf_page_get_gen(page_id, zip_size, RW_NO_LATCH, nullptr,
BUF_GET, mtr, err, false);
if (UNIV_UNLIKELY(!prev))
return 0;
int ret= 1;
if (UNIV_UNLIKELY(rw_latch == RW_S_LATCH))
static_assert(MTR_MEMO_PAGE_S_FIX == mtr_memo_type_t(BTR_SEARCH_LEAF), "");
static_assert(MTR_MEMO_PAGE_X_FIX == mtr_memo_type_t(BTR_MODIFY_LEAF), "");
if (rw_latch == RW_S_LATCH
? prev->page.lock.s_lock_try() : prev->page.lock.x_lock_try())
{
if (UNIV_LIKELY(prev->page.lock.s_lock_try()))
mtr->lock_register(prev_savepoint, mtr_memo_type_t(rw_latch));
if (UNIV_UNLIKELY(prev->page.id() != page_id))
{
mtr->lock_register(prev_savepoint, MTR_MEMO_PAGE_S_FIX);
goto prev_latched;
fail:
/* the page was just read and found to be corrupted */
mtr->rollback_to_savepoint(prev_savepoint);
return 0;
}
block->page.lock.s_unlock();
}
else
{
if (UNIV_LIKELY(prev->page.lock.x_lock_try()))
{
mtr->lock_register(prev_savepoint, MTR_MEMO_PAGE_X_FIX);
goto prev_latched;
}
ut_ad(mtr->at_savepoint(mtr->get_savepoint() - 1)->page.id() == page_id);
mtr->release_last_page();
if (rw_latch == RW_S_LATCH)
block->page.lock.s_unlock();
else
block->page.lock.x_unlock();
}
ret= -1;
mtr->lock_register(prev_savepoint - 1, MTR_MEMO_BUF_FIX);
mtr->rollback_to_savepoint(prev_savepoint);
prev= buf_page_get_gen(page_id, zip_size, rw_latch, prev,
BUF_GET, mtr, err, false);
BUF_GET, mtr, err);
if (rw_latch == RW_S_LATCH)
block->page.lock.s_lock();
else
block->page.lock.x_lock();
const page_id_t prev_page_id= page_id;
page_id.set_page_no(btr_page_get_prev(page));
if (UNIV_UNLIKELY(page_id != prev_page_id))
{
mtr->release_last_page();
if (page_id.page_no() == FIL_NULL)
return -1;
goto retry;
}
if (UNIV_UNLIKELY(!prev))
return 0;
mtr->upgrade_buffer_fix(prev_savepoint - 1, rw_latch);
prev_latched:
if (memcmp_aligned<2>(FIL_PAGE_TYPE + prev->page.frame,
FIL_PAGE_TYPE + block->page.frame, 2) ||
memcmp_aligned<2>(PAGE_HEADER + PAGE_INDEX_ID + prev->page.frame,
PAGE_HEADER + PAGE_INDEX_ID + block->page.frame, 8) ||
page_is_comp(prev->page.frame) != page_is_comp(block->page.frame))
goto fail;
ret= -1;
}
const page_t *const p= prev->page.frame;
if (memcmp_aligned<4>(FIL_PAGE_NEXT + p, FIL_PAGE_OFFSET + page, 4) ||
memcmp_aligned<2>(FIL_PAGE_TYPE + p, FIL_PAGE_TYPE + page, 2) ||
memcmp_aligned<2>(PAGE_HEADER + PAGE_INDEX_ID + p,
PAGE_HEADER + PAGE_INDEX_ID + page, 8) ||
page_is_comp(p) != page_is_comp(page))
{
ut_ad("corrupted" == 0); // FIXME: remove this
*err= DB_CORRUPTION;
......
......@@ -179,10 +179,8 @@ btr_pcur_store_position(
cursor->old_n_fields,
&cursor->old_rec_buf,
&cursor->buf_size);
cursor->block_when_stored.store(block);
/* Function try to check if block is S/X latch. */
cursor->modify_clock = buf_block_get_modify_clock(block);
cursor->old_page_id = block->page.id();
cursor->modify_clock = block->modify_clock;
}
/**************************************************************//**
......@@ -214,101 +212,80 @@ btr_pcur_copy_stored_position(
}
/** Optimistically latches the leaf page or pages requested.
@param[in] block guessed buffer block
@param[in,out] pcur cursor
@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
@param[in,out] mtr mini-transaction
@return true if success */
@param pcur persistent cursor
@param latch_mode BTR_SEARCH_LEAF, ...
@param mtr mini-transaction
@return true on success */
TRANSACTIONAL_TARGET
static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block,
btr_pcur_t *pcur,
static bool btr_pcur_optimistic_latch_leaves(btr_pcur_t *pcur,
btr_latch_mode *latch_mode,
mtr_t *mtr)
{
ut_ad(block->page.buf_fix_count());
ut_ad(block->page.in_file());
ut_ad(block->page.frame);
static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) ==
(RW_S_LATCH ^ RW_X_LATCH), "");
buf_block_t *const block=
buf_page_optimistic_fix(pcur->btr_cur.page_cur.block, pcur->old_page_id);
if (!block)
return false;
if (*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF)
return buf_page_optimistic_get(block, rw_lock_type_t(*latch_mode),
pcur->modify_clock, mtr);
ut_ad(*latch_mode == BTR_SEARCH_PREV || *latch_mode == BTR_MODIFY_PREV);
const rw_lock_type_t mode=
rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH));
switch (*latch_mode) {
default:
ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF);
return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr);
case BTR_SEARCH_PREV:
case BTR_MODIFY_PREV:
page_id_t id{0};
uint64_t modify_clock;
uint32_t left_page_no;
ulint zip_size;
buf_block_t *left_block= nullptr;
const page_t *const page= block->page.frame;
{
transactional_shared_lock_guard<block_lock> g{block->page.lock};
if (block->modify_clock != pcur->modify_clock)
return false;
id= block->page.id();
zip_size= block->zip_size();
left_page_no= btr_page_get_prev(block->page.frame);
modify_clock= block->modify_clock;
left_page_no= btr_page_get_prev(page);
}
if (left_page_no != FIL_NULL)
{
left_block=
buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size,
mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);
const auto savepoint= mtr->get_savepoint();
mtr->memo_push(block, MTR_MEMO_BUF_FIX);
if (!left_block);
else if (btr_page_get_next(left_block->page.frame) != id.page_no())
if (UNIV_UNLIKELY(modify_clock != pcur->modify_clock))
{
release_left_block:
mtr->release_last_page();
fail:
mtr->rollback_to_savepoint(savepoint);
return false;
}
else
buf_page_make_young_if_needed(&left_block->page);
}
if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr))
{
if (btr_page_get_prev(block->page.frame) == left_page_no)
buf_block_t *prev;
if (left_page_no != FIL_NULL)
{
/* block was already buffer-fixed while entering the function and
buf_page_optimistic_get() buffer-fixes it again. */
ut_ad(2 <= block->page.buf_fix_count());
*latch_mode= btr_latch_mode(mode);
return true;
}
mtr->release_last_page();
prev= buf_page_get_gen(page_id_t(pcur->old_page_id.space(),
left_page_no), block->zip_size(),
mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);
if (!prev ||
page_is_comp(prev->page.frame) != page_is_comp(block->page.frame) ||
memcmp_aligned<2>(block->page.frame, prev->page.frame, 2) ||
memcmp_aligned<2>(block->page.frame + PAGE_HEADER + PAGE_INDEX_ID,
prev->page.frame + PAGE_HEADER + PAGE_INDEX_ID, 8))
goto fail;
}
else
prev= nullptr;
ut_ad(block->page.buf_fix_count());
if (left_block)
goto release_left_block;
return false;
}
}
mtr->upgrade_buffer_fix(savepoint, mode);
/** Structure acts as functor to do the latching of leaf pages.
It returns true if latching of leaf pages succeeded and false
otherwise. */
struct optimistic_latch_leaves
{
btr_pcur_t *const cursor;
btr_latch_mode *const latch_mode;
mtr_t *const mtr;
if (UNIV_UNLIKELY(block->modify_clock != modify_clock) ||
UNIV_UNLIKELY(block->page.is_freed()) ||
(prev &&
memcmp_aligned<4>(FIL_PAGE_NEXT + prev->page.frame,
FIL_PAGE_OFFSET + page, 4)))
goto fail;
bool operator()(buf_block_t *hint) const
{
return hint &&
btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr);
}
};
return true;
}
/** Restores the stored position of a persistent cursor bufferfixing
the page and obtaining the specified latches. If the cursor position
......@@ -331,6 +308,7 @@ btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the
record with the same unique field values as in the stored record,
btr_pcur_t::NOT_SAME cursor position is not on user rec or points on
the record with not the samebuniq field values as in the stored */
TRANSACTIONAL_TARGET
btr_pcur_t::restore_status
btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr)
{
......@@ -361,7 +339,6 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr)
latch_mode =
BTR_LATCH_MODE_WITHOUT_INTENTION(restore_latch_mode);
pos_state = BTR_PCUR_IS_POSITIONED;
block_when_stored.clear();
return restore_status::NOT_SAME;
}
......@@ -378,9 +355,8 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr)
case BTR_SEARCH_PREV:
case BTR_MODIFY_PREV:
/* Try optimistic restoration. */
if (block_when_stored.run_with_hint(
optimistic_latch_leaves{this, &restore_latch_mode,
mtr})) {
if (btr_pcur_optimistic_latch_leaves(this, &restore_latch_mode,
mtr)) {
pos_state = BTR_PCUR_IS_POSITIONED;
latch_mode = restore_latch_mode;
......@@ -485,9 +461,8 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr)
since the cursor can now be on a different page!
But we can retain the value of old_rec */
block_when_stored.store(btr_pcur_get_block(this));
modify_clock= buf_block_get_modify_clock(
block_when_stored.block());
old_page_id = btr_cur.page_cur.block->page.id();
modify_clock = btr_cur.page_cur.block->modify_clock;
mem_heap_free(heap);
......@@ -612,40 +587,33 @@ btr_pcur_move_backward_from_page(
return true;
}
buf_block_t* block = btr_pcur_get_block(cursor);
if (page_has_prev(block->page.frame)) {
buf_block_t* left_block
= mtr->at_savepoint(mtr->get_savepoint() - 1);
const page_t* const left = left_block->page.frame;
if (memcmp_aligned<4>(left + FIL_PAGE_NEXT,
block->page.frame
+ FIL_PAGE_OFFSET, 4)) {
/* This should be the right sibling page, or
if there is none, the current block. */
ut_ad(left_block == block
|| !memcmp_aligned<4>(left + FIL_PAGE_PREV,
block->page.frame
+ FIL_PAGE_OFFSET, 4));
/* The previous one must be the left sibling. */
left_block
= mtr->at_savepoint(mtr->get_savepoint() - 2);
ut_ad(!memcmp_aligned<4>(left_block->page.frame
+ FIL_PAGE_NEXT,
block->page.frame
+ FIL_PAGE_OFFSET, 4));
}
buf_block_t* block = mtr->at_savepoint(0);
ut_ad(block == btr_pcur_get_block(cursor));
const page_t* const page = block->page.frame;
/* btr_pcur_optimistic_latch_leaves() will acquire a latch on
the preceding page if one exists;
if that fails, btr_cur_t::search_leaf() invoked by
btr_pcur_open_with_no_init() will also acquire a latch on the
succeeding page. Our caller only needs one page latch. */
ut_ad(mtr->get_savepoint() <= 3);
if (page_has_prev(page)) {
buf_block_t* const left_block = mtr->at_savepoint(1);
ut_ad(!memcmp_aligned<4>(page + FIL_PAGE_OFFSET,
left_block->page.frame
+ FIL_PAGE_NEXT, 4));
if (btr_pcur_is_before_first_on_page(cursor)) {
/* Reposition on the previous page. */
page_cur_set_after_last(left_block,
&cursor->btr_cur.page_cur);
/* Release the right sibling. */
} else {
/* Release the left sibling. */
mtr->rollback_to_savepoint(0, 1);
block = left_block;
}
mtr->release(*block);
}
mtr->rollback_to_savepoint(1);
ut_ad(block == mtr->at_savepoint(0));
cursor->latch_mode = latch_mode;
cursor->old_rec = nullptr;
return false;
......
/*****************************************************************************
Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2020, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Free Software Foundation.
This program is also distributed with certain software (including but not
limited to OpenSSL) that is licensed under separate terms, as designated in a
particular file or component or in included license documentation. The authors
of MySQL hereby grant you an additional permission to link the program and
your derivative works with the separately licensed software that they have
included with MySQL.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#include "buf0block_hint.h"
namespace buf {
TRANSACTIONAL_TARGET
void Block_hint::buffer_fix_block_if_still_valid()
{
/* To check if m_block belongs to the current buf_pool, we must
prevent freeing memory while we check, and until we buffer-fix the
block. For this purpose it is enough to latch any of the many
latches taken by buf_pool_t::resize().
Similar to buf_page_optimistic_get(), we must validate
m_block->page.id() after acquiring the hash_lock, because the object
may have been freed and not actually attached to buf_pool.page_hash
at the moment. (The block could have been reused to store a
different page, and that slice of buf_pool.page_hash could be protected
by another hash_lock that we are not holding.)
Finally, we must ensure that the block is not being freed. */
if (m_block)
{
auto &cell= buf_pool.page_hash.cell_get(m_page_id.fold());
transactional_shared_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(cell)};
if (buf_pool.is_uncompressed(m_block) && m_page_id == m_block->page.id() &&
m_block->page.frame && m_block->page.in_file())
m_block->page.fix();
else
clear();
}
}
} // namespace buf
......@@ -2562,9 +2562,10 @@ buf_page_get_low(
if (state > buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) {
if (mode == BUF_PEEK_IF_IN_POOL) {
ignore_block:
block->unfix();
ignore_unfixed:
ut_ad(mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_PEEK_IF_IN_POOL);
block->unfix();
if (err) {
*err = DB_CORRUPTION;
}
......@@ -2585,9 +2586,17 @@ buf_page_get_low(
const page_id_t id{block->page.id()};
block->page.lock.s_unlock();
if (UNIV_UNLIKELY(id != page_id)) {
ut_ad(id == page_id_t{~0ULL});
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) {
block->page.unfix();
if (UNIV_UNLIKELY(id == page_id)) {
/* The page read was completed, and
another thread marked the page as free
while we were waiting. */
goto ignore_unfixed;
}
ut_ad(id == page_id_t{~0ULL});
if (++retries < BUF_PAGE_READ_MAX_RETRIES) {
goto loop;
}
......@@ -2598,6 +2607,7 @@ buf_page_get_low(
return nullptr;
}
ut_ad(id == page_id);
} else if (mode != BUF_PEEK_IF_IN_POOL) {
} else if (!mtr) {
ut_ad(!block->page.oldest_modification());
......@@ -2804,83 +2814,72 @@ buf_page_get_low(
#endif /* UNIV_DEBUG */
ut_ad(block->page.frame);
/* The state = block->page.state() may be stale at this point,
and in fact, at any point of time if we consider its
buffer-fix component. If the block is being read into the
buffer pool, it is possible that buf_page_t::read_complete()
will invoke buf_pool_t::corrupted_evict() and therefore
invalidate it (invoke buf_page_t::set_corrupt_id() and set the
state to FREED). Therefore, after acquiring the page latch we
must recheck the state. */
if (state >= buf_page_t::UNFIXED
&& allow_ibuf_merge
&& fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX
&& page_is_leaf(block->page.frame)) {
block->page.lock.x_lock();
ut_ad(block->page.id() == page_id
|| (state >= buf_page_t::READ_FIX
&& state < buf_page_t::WRITE_FIX));
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
dberr_t e;
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
page_id_mismatch:
state = block->page.state();
e = DB_CORRUPTION;
ibuf_merge_corrupted:
if (err) {
*err = e;
}
if (block->page.id().is_corrupted()) {
buf_pool.corrupted_evict(&block->page, state);
}
return nullptr;
}
state = block->page.state();
ut_ad(state < buf_page_t::READ_FIX);
if (state >= buf_page_t::IBUF_EXIST
&& state < buf_page_t::REINIT) {
block->page.clear_ibuf_exist();
e = ibuf_merge_or_delete_for_page(block, page_id,
block->zip_size());
if (UNIV_UNLIKELY(e != DB_SUCCESS)) {
goto ibuf_merge_corrupted;
if (dberr_t local_err =
ibuf_merge_or_delete_for_page(block, page_id,
block->zip_size())) {
if (err) {
*err = local_err;
}
goto release_and_ignore_block;
}
} else if (state < buf_page_t::UNFIXED) {
release_and_ignore_block:
block->page.lock.x_unlock();
goto ignore_block;
}
if (rw_latch == RW_X_LATCH) {
goto get_latch_valid;
} else {
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
switch (rw_latch) {
case RW_NO_LATCH:
block->page.lock.x_unlock();
break;
case RW_S_LATCH:
block->page.lock.x_unlock();
goto get_latch;
block->page.lock.s_lock();
break;
case RW_SX_LATCH:
block->page.lock.x_u_downgrade();
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
}
mtr->memo_push(block, mtr_memo_type_t(rw_latch));
} else {
get_latch:
switch (rw_latch) {
case RW_NO_LATCH:
mtr->memo_push(block, MTR_MEMO_BUF_FIX);
return block;
case RW_S_LATCH:
block->page.lock.s_lock();
ut_ad(!block->page.is_read_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
block->page.lock.s_unlock();
block->page.lock.x_lock();
goto page_id_mismatch;
}
get_latch_valid:
mtr->memo_push(block, mtr_memo_type_t(rw_latch));
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
break;
case RW_SX_LATCH:
block->page.lock.u_lock();
ut_ad(!block->page.is_io_fixed());
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
block->page.lock.u_x_upgrade();
goto page_id_mismatch;
}
goto get_latch_valid;
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
if (block->page.lock.x_lock_upgraded()) {
......@@ -2889,17 +2888,26 @@ buf_page_get_low(
mtr->page_lock_upgrade(*block);
return block;
}
if (UNIV_UNLIKELY(block->page.id() != page_id)) {
goto page_id_mismatch;
}
goto get_latch_valid;
mtr->memo_push(block, mtr_memo_type_t(rw_latch));
state = block->page.state();
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) {
mtr->release_last_page();
goto ignore_unfixed;
}
ut_ad(page_id_t(page_get_space_id(block->page.frame),
page_get_page_no(block->page.frame))
== page_id);
ut_ad(state < buf_page_t::READ_FIX
|| state > buf_page_t::WRITE_FIX);
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(block, true);
#endif /* BTR_CUR_HASH_ADAPT */
}
ut_ad(page_id_t(page_get_space_id(block->page.frame),
page_get_page_no(block->page.frame)) == page_id);
return block;
}
......@@ -2995,83 +3003,76 @@ buf_page_get_gen(
return block;
}
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
@return TRUE if success */
TRANSACTIONAL_TARGET
bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
uint64_t modify_clock, mtr_t *mtr)
buf_block_t *buf_page_optimistic_fix(buf_block_t *block, page_id_t id)
{
ut_ad(block);
ut_ad(mtr);
ut_ad(mtr->is_active());
ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
if (have_transactional_memory);
else if (UNIV_UNLIKELY(!block->page.frame))
return false;
else
{
const auto state= block->page.state();
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED ||
state >= buf_page_t::READ_FIX))
return false;
}
bool success;
const page_id_t id{block->page.id()};
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold());
bool have_u_not_x= false;
{
transactional_shared_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
if (UNIV_UNLIKELY(id != block->page.id() || !block->page.frame))
return false;
if (UNIV_UNLIKELY(!buf_pool.is_uncompressed(block) ||
id != block->page.id() || !block->page.frame))
return nullptr;
const auto state= block->page.state();
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED ||
state >= buf_page_t::READ_FIX))
return false;
return nullptr;
block->page.fix();
return block;
}
buf_block_t *buf_page_optimistic_get(buf_block_t *block,
rw_lock_type_t rw_latch,
uint64_t modify_clock, mtr_t *mtr)
{
ut_ad(mtr->is_active());
ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);
ut_ad(block->page.buf_fix_count());
if (rw_latch == RW_S_LATCH)
success= block->page.lock.s_lock_try();
else
{
have_u_not_x= block->page.lock.have_u_not_x();
success= have_u_not_x || block->page.lock.x_lock_try();
}
if (!block->page.lock.s_lock_try())
{
fail:
block->page.unfix();
return nullptr;
}
if (!success)
return false;
ut_ad(!ibuf_inside(mtr) ||
ibuf_page(block->page.id(), block->zip_size(), nullptr));
if (have_u_not_x)
if (modify_clock != block->modify_clock || block->page.is_freed())
{
block->page.lock.s_unlock();
goto fail;
}
ut_ad(!block->page.is_read_fixed());
buf_page_make_young_if_needed(&block->page);
mtr->memo_push(block, MTR_MEMO_PAGE_S_FIX);
}
else if (block->page.lock.have_u_not_x())
{
block->page.lock.u_x_upgrade();
block->page.unfix();
mtr->page_lock_upgrade(*block);
ut_ad(id == block->page.id());
ut_ad(modify_clock == block->modify_clock);
}
else if (!block->page.lock.x_lock_try())
goto fail;
else
{
ut_ad(rw_latch == RW_S_LATCH || !block->page.is_io_fixed());
ut_ad(id == block->page.id());
ut_ad(!ibuf_inside(mtr) || ibuf_page(id, block->zip_size(), nullptr));
ut_ad(!block->page.is_io_fixed());
ut_ad(!ibuf_inside(mtr) ||
ibuf_page(block->page.id(), block->zip_size(), nullptr));
if (modify_clock != block->modify_clock || block->page.is_freed())
{
if (rw_latch == RW_S_LATCH)
block->page.lock.s_unlock();
else
block->page.lock.x_unlock();
return false;
goto fail;
}
block->page.fix();
ut_ad(!block->page.is_read_fixed());
buf_page_make_young_if_needed(&block->page);
mtr->memo_push(block, mtr_memo_type_t(rw_latch));
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
}
ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
......@@ -3081,7 +3082,7 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
ut_ad(~buf_page_t::LRU_MASK & state);
ut_ad(block->page.frame);
return true;
return block;
}
/** Try to S-latch a page.
......
......@@ -672,8 +672,13 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
buf_mode, mtr, &err, false);
if (!block)
{
if (err)
{
err_exit:
if (err == DB_DECRYPTION_FAILED)
btr_decryption_failed(*index);
mtr->rollback_to_savepoint(savepoint);
}
func_exit:
if (UNIV_LIKELY_NULL(heap))
mem_heap_free(heap);
......@@ -737,7 +742,8 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
#endif
}
if (height == 0) {
if (height == 0)
{
if (rw_latch == RW_NO_LATCH)
{
ut_ad(block == mtr->at_savepoint(block_savepoint));
......@@ -821,7 +827,7 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
if (page_cur_search_with_match(tuple, page_mode, &up_match,
&low_match, &cur->page_cur, nullptr)) {
err= DB_CORRUPTION;
goto func_exit;
goto err_exit;
}
}
......@@ -1584,23 +1590,6 @@ rtr_check_discard_page(
lock_sys.prdt_page_free_from_discard(id, true);
}
/** Structure acts as functor to get the optimistic access of the page.
It returns true if it successfully gets the page. */
struct optimistic_get
{
btr_pcur_t *const r_cursor;
mtr_t *const mtr;
optimistic_get(btr_pcur_t *r_cursor,mtr_t *mtr)
:r_cursor(r_cursor), mtr(mtr) {}
bool operator()(buf_block_t *hint) const
{
return hint && buf_page_optimistic_get(
RW_X_LATCH, hint, r_cursor->modify_clock, mtr);
}
};
/** Restore the stored position of a persistent cursor bufferfixing the page */
static
bool
......@@ -1632,8 +1621,11 @@ rtr_cur_restore_position(
r_cursor->modify_clock = 100;
);
if (r_cursor->block_when_stored.run_with_hint(
optimistic_get(r_cursor, mtr))) {
if (buf_page_optimistic_fix(r_cursor->btr_cur.page_cur.block,
r_cursor->old_page_id)
&& buf_page_optimistic_get(r_cursor->btr_cur.page_cur.block,
RW_X_LATCH, r_cursor->modify_clock,
mtr)) {
ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(r_cursor->rel_pos == BTR_PCUR_ON);
......
......@@ -28,7 +28,6 @@ Created 2/23/1996 Heikki Tuuri
#include "dict0dict.h"
#include "btr0cur.h"
#include "buf0block_hint.h"
#include "btr0btr.h"
#include "gis0rtree.h"
......@@ -332,8 +331,8 @@ struct btr_pcur_t
/** BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on
whether cursor was on, before, or after the old_rec record */
btr_pcur_pos_t rel_pos= btr_pcur_pos_t(0);
/** buffer block when the position was stored */
buf::Block_hint block_when_stored;
/** the page identifier of old_rec */
page_id_t old_page_id{0,0};
/** the modify clock value of the buffer block when the cursor position
was stored */
ib_uint64_t modify_clock= 0;
......
/*****************************************************************************
Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Free Software Foundation.
This program is also distributed with certain software (including but not
limited to OpenSSL) that is licensed under separate terms, as designated in a
particular file or component or in included license documentation. The authors
of MySQL hereby grant you an additional permission to link the program and
your derivative works with the separately licensed software that they have
included with MySQL.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#pragma once
#include "buf0buf.h"
namespace buf {
class Block_hint {
public:
/** Stores the pointer to the block, which is currently buffer-fixed.
@param block a pointer to a buffer-fixed block to be stored */
inline void store(buf_block_t *block)
{
ut_ad(block->page.buf_fix_count());
m_block= block;
m_page_id= block->page.id();
}
/** Clears currently stored pointer. */
inline void clear() { m_block= nullptr; }
/** Invoke f on m_block(which may be null)
@param f The function to be executed. It will be passed the pointer.
If you wish to use the block pointer subsequently,
you need to ensure you buffer-fix it before returning from f.
@return the return value of f
*/
template <typename F>
bool run_with_hint(const F &f)
{
buffer_fix_block_if_still_valid();
/* m_block could be changed during f() call, so we use local
variable to remember which block we need to unfix */
buf_block_t *block= m_block;
bool res= f(block);
if (block)
block->page.unfix();
return res;
}
buf_block_t *block() const { return m_block; }
private:
/** The block pointer stored by store(). */
buf_block_t *m_block= nullptr;
/** If m_block is non-null, the m_block->page.id at time it was stored. */
page_id_t m_page_id{0, 0};
/** A helper function which checks if m_block is not a dangling pointer and
still points to block with page with m_page_id and if so, buffer-fixes it,
otherwise clear()s it */
void buffer_fix_block_if_still_valid();
};
} // namespace buf
......@@ -158,14 +158,25 @@ buf_block_free(
#define buf_page_get(ID, SIZE, LA, MTR) \
buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR)
/** Try to acquire a page latch.
@param rw_latch RW_S_LATCH or RW_X_LATCH
/** Try to buffer-fix a page.
@param block guessed block
@param id expected block->page.id()
@return block if it was buffer-fixed
@retval nullptr if the block no longer is valid */
buf_block_t *buf_page_optimistic_fix(buf_block_t *block, page_id_t id)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Try to acquire a page latch after buf_page_optimistic_fix().
@param block buffer-fixed block
@param rw_latch RW_S_LATCH or RW_X_LATCH
@param modify_clock expected value of block->modify_clock
@param mtr mini-transaction
@return whether the latch was acquired (the page is an allocated file page) */
bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
uint64_t modify_clock, mtr_t *mtr);
@return block if the latch was acquired
@retval nullptr if block->unfix() was called because it no longer is valid */
buf_block_t *buf_page_optimistic_get(buf_block_t *block,
rw_lock_type_t rw_latch,
uint64_t modify_clock, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Try to S-latch a page.
Suitable for using when holding the lock_sys latches (as it avoids deadlock).
......@@ -290,15 +301,6 @@ on the block. */
UNIV_INLINE
void
buf_block_modify_clock_inc(
/*=======================*/
buf_block_t* block); /*!< in: block */
/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block.
@return value */
UNIV_INLINE
ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
buf_block_t* block); /*!< in: block */
#endif /* !UNIV_INNOCHECKSUM */
......
......@@ -116,17 +116,3 @@ buf_block_modify_clock_inc(
block->modify_clock++;
}
/********************************************************************//**
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block.
@return value */
UNIV_INLINE
ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
buf_block_t* block) /*!< in: block */
{
ut_ad(block->page.lock.have_any());
return(block->modify_clock);
}
......@@ -985,7 +985,7 @@ void mtr_t::upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch)
ut_ad(slot.type == MTR_MEMO_BUF_FIX);
buf_block_t *block= static_cast<buf_block_t*>(slot.object);
ut_d(const auto state= block->page.state());
ut_ad(state > buf_page_t::UNFIXED);
ut_ad(state > buf_page_t::FREED);
ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX);
static_assert(int{MTR_MEMO_PAGE_S_FIX} == int{RW_S_LATCH}, "");
static_assert(int{MTR_MEMO_PAGE_X_FIX} == int{RW_X_LATCH}, "");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment