Commit e7924a85 authored by Marko Mäkelä's avatar Marko Mäkelä

Remove code duplication around buf_pool->flush_rbt

The purpose of buf_pool->flush_rbt is to ensure that
buf_pool->flush_list is ordered by oldest_modification.
This should speed up multi-pass redo log application
(when the buffer pool is not large enough to accommodate
all pages that were modified since the latest log checkpoint).

The buf_pool->flush_rbt is not being used after redo log has
been applied. It could be better to always flush pages in
the ascending order of oldest_modification. Currently, whenever
a page is first modified, it will be moved to the start of the
buf_pool->flush_list, overtaking blocks whose oldest_modification
could be much older.

buf_flush_insert_sorted_into_flush_list(): Merge into
buf_flush_insert_into_flush_list().

buf_flush_recv_note_modification(): Remove.
The function buf_flush_note_modification() can be invoked instead.
parent 301bd62b
...@@ -427,137 +427,44 @@ buf_flush_insert_into_flush_list( ...@@ -427,137 +427,44 @@ buf_flush_insert_into_flush_list(
ut_ad(buf_page_mutex_own(block)); ut_ad(buf_page_mutex_own(block));
buf_flush_list_mutex_enter(buf_pool); buf_flush_list_mutex_enter(buf_pool);
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= lsn));
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (buf_pool->flush_rbt != NULL) {
buf_flush_list_mutex_exit(buf_pool);
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
return;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_flush_list); ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE); ut_d(block->page.in_flush_list = TRUE);
ut_ad(!block->page.oldest_modification);
block->page.oldest_modification = lsn; block->page.oldest_modification = lsn;
UNIV_MEM_ASSERT_RW(block->page.zip
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page); ? block->page.zip.data : block->frame,
block->page.size.physical());
incr_flush_list_size_in_bytes(block, buf_pool); incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
void* p; ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
/* The field in_LRU_list is protected by buf_pool->mutex, which
if (block->page.size.is_compressed()) { we are not holding. However, while a block is in the flush
p = block->page.zip.data; list, it is dirty and cannot be discarded, not from the
} else { page_hash or from the LRU list. At most, the uncompressed
p = block->frame; page frame of a compressed block may be discarded or created
} (copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
UNIV_MEM_ASSERT_RW(p, block->page.size.physical()); transitions hold block->mutex and the flush list mutex (via
#endif /* UNIV_DEBUG_VALGRIND */ buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_ad(block->page.in_LRU_list);
ut_a(buf_flush_validate_skip(buf_pool)); ut_ad(block->page.in_page_hash);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ /* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
buf_flush_list_mutex_exit(buf_pool); ut_ad(!block->page.in_zip_hash);
}
if (buf_page_t* prev_b =
/********************************************************************//** buf_flush_insert_in_flush_rbt(&block->page)) {
Inserts a modified block into the flush list in the right sorted position. UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
This function is used by recovery, because there the modifications do not goto func_exit;
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn) /*!< in: oldest modification */
{
buf_page_t* prev_b;
buf_page_t* b;
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
ut_ad(buf_page_mutex_own(block));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_flush_list_mutex_enter(buf_pool);
/* The field in_LRU_list is protected by buf_pool->mutex, which
we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
transitions hold block->mutex and the flush list mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
#ifdef UNIV_DEBUG_VALGRIND
void* p;
if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}
UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
prev_b = NULL;
/* For the most part when this function is called the flush_rbt
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
io-handler thread. In that case we'll just do a simple
linear search in the else block. */
if (buf_pool->flush_rbt != NULL) {
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
} else {
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
while (b != NULL && b->oldest_modification
> block->page.oldest_modification) {
ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
} }
} }
if (prev_b == NULL) { UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page); func_exit:
} else {
UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
}
incr_flush_list_size_in_bytes(block, buf_pool);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool)); ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit(buf_pool); buf_flush_list_mutex_exit(buf_pool);
...@@ -686,7 +593,7 @@ buf_flush_remove( ...@@ -686,7 +593,7 @@ buf_flush_remove(
} }
/* If the flush_rbt is active then delete from there as well. */ /* If the flush_rbt is active then delete from there as well. */
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage); buf_flush_delete_from_flush_rbt(bpage);
} }
...@@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list( ...@@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list(
/* If recovery is active we must swap the control blocks in /* If recovery is active we must swap the control blocks in
the flush_rbt as well. */ the flush_rbt as well. */
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage); buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage); prev_b = buf_flush_insert_in_flush_rbt(dpage);
} }
...@@ -3600,7 +3507,7 @@ buf_flush_validate_low( ...@@ -3600,7 +3507,7 @@ buf_flush_validate_low(
/* If we are in recovery mode i.e.: flush_rbt != NULL /* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present then each block in the flush_list must also be present
in the flush_rbt. */ in the flush_rbt. */
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt); rnode = rbt_first(buf_pool->flush_rbt);
} }
...@@ -3621,7 +3528,7 @@ buf_flush_validate_low( ...@@ -3621,7 +3528,7 @@ buf_flush_validate_low(
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0); ut_a(om > 0);
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_page_t** prpage; buf_page_t** prpage;
ut_a(rnode != NULL); ut_a(rnode != NULL);
......
...@@ -191,18 +191,6 @@ buf_flush_note_modification( ...@@ -191,18 +191,6 @@ buf_flush_note_modification(
lsn_t end_lsn, /*!< in: end lsn of the last mtr in the lsn_t end_lsn, /*!< in: end lsn of the last mtr in the
set of mtr's */ set of mtr's */
FlushObserver* observer); /*!< in: flush observer */ FlushObserver* observer); /*!< in: flush observer */
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
/********************************************************************//** /********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement, Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed. i.e., transition FILE_PAGE => NOT_USED allowed.
......
...@@ -37,17 +37,6 @@ buf_flush_insert_into_flush_list( ...@@ -37,17 +37,6 @@ buf_flush_insert_into_flush_list(
buf_block_t* block, /*!< in/out: block which is modified */ buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */ lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//** /********************************************************************//**
This function should be called at a mini-transaction commit, if a page was This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not modified in it. Puts the block to the list of modified blocks, if it is not
...@@ -63,24 +52,11 @@ buf_flush_note_modification( ...@@ -63,24 +52,11 @@ buf_flush_note_modification(
modified this block */ modified this block */
FlushObserver* observer) /*!< in: flush observer */ FlushObserver* observer) /*!< in: flush observer */
{ {
#ifdef UNIV_DEBUG
{
/* Allow write to proceed to shared temporary tablespace
in read-only mode. */
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
}
#endif /* UNIV_DEBUG */
mutex_enter(&block->mutex); mutex_enter(&block->mutex);
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
ut_ad(block->page.newest_modification <= end_lsn); ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn; block->page.newest_modification = end_lsn;
...@@ -98,52 +74,7 @@ buf_flush_note_modification( ...@@ -98,52 +74,7 @@ buf_flush_note_modification(
ut_ad(block->page.oldest_modification <= start_lsn); ut_ad(block->page.oldest_modification <= start_lsn);
} }
buf_page_mutex_exit(block); mutex_exit(&block->mutex);
srv_stats.buf_pool_write_requests.inc(); srv_stats.buf_pool_write_requests.inc();
} }
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
#ifdef UNIV_DEBUG
{
ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
}
#endif /* UNIV_DEBUG */
buf_page_mutex_enter(block);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
buf_pool_t* buf_pool = buf_pool_from_block(block);
buf_flush_insert_sorted_into_flush_list(
buf_pool, block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
buf_page_mutex_exit(block);
}
...@@ -1925,7 +1925,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block) ...@@ -1925,7 +1925,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
if (start_lsn) { if (start_lsn) {
log_flush_order_mutex_enter(); log_flush_order_mutex_enter();
buf_flush_recv_note_modification(block, start_lsn, end_lsn); buf_flush_note_modification(block, start_lsn, end_lsn, NULL);
log_flush_order_mutex_exit(); log_flush_order_mutex_exit();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment