Commit e7924a85 authored by Marko Mäkelä's avatar Marko Mäkelä

Remove code duplication around buf_pool->flush_rbt

The purpose of buf_pool->flush_rbt is to ensure that
buf_pool->flush_list is ordered by oldest_modification.
This should speed up multi-pass redo log application
(when the buffer pool is not large enough to accommodate
all pages that were modified since the latest log checkpoint).

The buf_pool->flush_rbt is not being used after redo log has
been applied. It could be better to always flush pages in
the ascending order of oldest_modification. Currently, whenever
a page is first modified, it will be moved to the start of the
buf_pool->flush_list, overtaking blocks whose oldest_modification
could be much older.

buf_flush_insert_sorted_into_flush_list(): Merge into
buf_flush_insert_into_flush_list().

buf_flush_recv_note_modification(): Remove.
The function buf_flush_note_modification() can be invoked instead.
parent 301bd62b
......@@ -427,137 +427,44 @@ buf_flush_insert_into_flush_list(
ut_ad(buf_page_mutex_own(block));
buf_flush_list_mutex_enter(buf_pool);
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= lsn));
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (buf_pool->flush_rbt != NULL) {
buf_flush_list_mutex_exit(buf_pool);
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
return;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE);
ut_ad(!block->page.oldest_modification);
block->page.oldest_modification = lsn;
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
UNIV_MEM_ASSERT_RW(block->page.zip
? block->page.zip.data : block->frame,
block->page.size.physical());
incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND
void* p;
if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}
UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit(buf_pool);
}
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn) /*!< in: oldest modification */
{
buf_page_t* prev_b;
buf_page_t* b;
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
ut_ad(buf_page_mutex_own(block));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_flush_list_mutex_enter(buf_pool);
/* The field in_LRU_list is protected by buf_pool->mutex, which
we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
transitions hold block->mutex and the flush list mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
#ifdef UNIV_DEBUG_VALGRIND
void* p;
if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}
UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
prev_b = NULL;
/* For the most part when this function is called the flush_rbt
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
io-handler thread. In that case we'll just do a simple
linear search in the else block. */
if (buf_pool->flush_rbt != NULL) {
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
} else {
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
while (b != NULL && b->oldest_modification
> block->page.oldest_modification) {
ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
/* The field in_LRU_list is protected by buf_pool->mutex, which
we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
transitions hold block->mutex and the flush list mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);
if (buf_page_t* prev_b =
buf_flush_insert_in_flush_rbt(&block->page)) {
UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
goto func_exit;
}
}
if (prev_b == NULL) {
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
} else {
UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
}
incr_flush_list_size_in_bytes(block, buf_pool);
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
func_exit:
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool));
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit(buf_pool);
......@@ -686,7 +593,7 @@ buf_flush_remove(
}
/* If the flush_rbt is active then delete from there as well. */
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
}
......@@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list(
/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage);
}
......@@ -3600,7 +3507,7 @@ buf_flush_validate_low(
/* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present
in the flush_rbt. */
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt);
}
......@@ -3621,7 +3528,7 @@ buf_flush_validate_low(
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_page_t** prpage;
ut_a(rnode != NULL);
......
......@@ -191,18 +191,6 @@ buf_flush_note_modification(
lsn_t end_lsn, /*!< in: end lsn of the last mtr in the
set of mtr's */
FlushObserver* observer); /*!< in: flush observer */
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed.
......
......@@ -37,17 +37,6 @@ buf_flush_insert_into_flush_list(
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not
......@@ -63,24 +52,11 @@ buf_flush_note_modification(
modified this block */
FlushObserver* observer) /*!< in: flush observer */
{
#ifdef UNIV_DEBUG
{
/* Allow write to proceed to shared temporary tablespace
in read-only mode. */
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
}
#endif /* UNIV_DEBUG */
mutex_enter(&block->mutex);
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn;
......@@ -98,52 +74,7 @@ buf_flush_note_modification(
ut_ad(block->page.oldest_modification <= start_lsn);
}
buf_page_mutex_exit(block);
mutex_exit(&block->mutex);
srv_stats.buf_pool_write_requests.inc();
}
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
#ifdef UNIV_DEBUG
{
ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
}
#endif /* UNIV_DEBUG */
buf_page_mutex_enter(block);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
buf_pool_t* buf_pool = buf_pool_from_block(block);
buf_flush_insert_sorted_into_flush_list(
buf_pool, block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
buf_page_mutex_exit(block);
}
......@@ -1925,7 +1925,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
if (start_lsn) {
log_flush_order_mutex_enter();
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
buf_flush_note_modification(block, start_lsn, end_lsn, NULL);
log_flush_order_mutex_exit();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment