Commit e7924a85 authored by Marko Mäkelä's avatar Marko Mäkelä

Remove code duplication around buf_pool->flush_rbt

The purpose of buf_pool->flush_rbt is to ensure that
buf_pool->flush_list is ordered by oldest_modification.
This should speed up multi-pass redo log application
(when the buffer pool is not large enough to accommodate
all pages that were modified since the latest log checkpoint).

The buf_pool->flush_rbt is not being used after redo log has
been applied. It could be better to always flush pages in
the ascending order of oldest_modification. Currently, whenever
a page is first modified, it will be moved to the start of the
buf_pool->flush_list, overtaking blocks whose oldest_modification
could be much older.

buf_flush_insert_sorted_into_flush_list(): Merge into
buf_flush_insert_into_flush_list().

buf_flush_recv_note_modification(): Remove.
The function buf_flush_note_modification() can be invoked instead.
parent 301bd62b
...@@ -427,70 +427,17 @@ buf_flush_insert_into_flush_list( ...@@ -427,70 +427,17 @@ buf_flush_insert_into_flush_list(
ut_ad(buf_page_mutex_own(block)); ut_ad(buf_page_mutex_own(block));
buf_flush_list_mutex_enter(buf_pool); buf_flush_list_mutex_enter(buf_pool);
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= lsn));
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (buf_pool->flush_rbt != NULL) {
buf_flush_list_mutex_exit(buf_pool);
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
return;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_flush_list); ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE); ut_d(block->page.in_flush_list = TRUE);
ut_ad(!block->page.oldest_modification);
block->page.oldest_modification = lsn; block->page.oldest_modification = lsn;
UNIV_MEM_ASSERT_RW(block->page.zip
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page); ? block->page.zip.data : block->frame,
block->page.size.physical());
incr_flush_list_size_in_bytes(block, buf_pool); incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
void* p;
if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}
UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit(buf_pool);
}
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn) /*!< in: oldest modification */
{
buf_page_t* prev_b;
buf_page_t* b;
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
ut_ad(buf_page_mutex_own(block));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_flush_list_mutex_enter(buf_pool);
/* The field in_LRU_list is protected by buf_pool->mutex, which /* The field in_LRU_list is protected by buf_pool->mutex, which
we are not holding. However, while a block is in the flush we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the list, it is dirty and cannot be discarded, not from the
...@@ -507,57 +454,17 @@ buf_flush_insert_sorted_into_flush_list( ...@@ -507,57 +454,17 @@ buf_flush_insert_sorted_into_flush_list(
BUF_BLOCK_MEMORY state, not a file page. */ BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash); ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_flush_list); if (buf_page_t* prev_b =
ut_d(block->page.in_flush_list = TRUE); buf_flush_insert_in_flush_rbt(&block->page)) {
block->page.oldest_modification = lsn; UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
goto func_exit;
#ifdef UNIV_DEBUG_VALGRIND
void* p;
if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}
UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */
prev_b = NULL;
/* For the most part when this function is called the flush_rbt
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
io-handler thread. In that case we'll just do a simple
linear search in the else block. */
if (buf_pool->flush_rbt != NULL) {
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
} else {
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
while (b != NULL && b->oldest_modification
> block->page.oldest_modification) {
ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
} }
} }
if (prev_b == NULL) {
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page); UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
} else { func_exit:
UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
}
incr_flush_list_size_in_bytes(block, buf_pool);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool)); ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit(buf_pool); buf_flush_list_mutex_exit(buf_pool);
...@@ -686,7 +593,7 @@ buf_flush_remove( ...@@ -686,7 +593,7 @@ buf_flush_remove(
} }
/* If the flush_rbt is active then delete from there as well. */ /* If the flush_rbt is active then delete from there as well. */
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage); buf_flush_delete_from_flush_rbt(bpage);
} }
...@@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list( ...@@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list(
/* If recovery is active we must swap the control blocks in /* If recovery is active we must swap the control blocks in
the flush_rbt as well. */ the flush_rbt as well. */
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage); buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage); prev_b = buf_flush_insert_in_flush_rbt(dpage);
} }
...@@ -3600,7 +3507,7 @@ buf_flush_validate_low( ...@@ -3600,7 +3507,7 @@ buf_flush_validate_low(
/* If we are in recovery mode i.e.: flush_rbt != NULL /* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present then each block in the flush_list must also be present
in the flush_rbt. */ in the flush_rbt. */
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt); rnode = rbt_first(buf_pool->flush_rbt);
} }
...@@ -3621,7 +3528,7 @@ buf_flush_validate_low( ...@@ -3621,7 +3528,7 @@ buf_flush_validate_low(
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0); ut_a(om > 0);
if (buf_pool->flush_rbt != NULL) { if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_page_t** prpage; buf_page_t** prpage;
ut_a(rnode != NULL); ut_a(rnode != NULL);
......
...@@ -191,18 +191,6 @@ buf_flush_note_modification( ...@@ -191,18 +191,6 @@ buf_flush_note_modification(
lsn_t end_lsn, /*!< in: end lsn of the last mtr in the lsn_t end_lsn, /*!< in: end lsn of the last mtr in the
set of mtr's */ set of mtr's */
FlushObserver* observer); /*!< in: flush observer */ FlushObserver* observer); /*!< in: flush observer */
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
/********************************************************************//** /********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement, Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed. i.e., transition FILE_PAGE => NOT_USED allowed.
......
...@@ -37,17 +37,6 @@ buf_flush_insert_into_flush_list( ...@@ -37,17 +37,6 @@ buf_flush_insert_into_flush_list(
buf_block_t* block, /*!< in/out: block which is modified */ buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */ lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */
/********************************************************************//** /********************************************************************//**
This function should be called at a mini-transaction commit, if a page was This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not modified in it. Puts the block to the list of modified blocks, if it is not
...@@ -63,24 +52,11 @@ buf_flush_note_modification( ...@@ -63,24 +52,11 @@ buf_flush_note_modification(
modified this block */ modified this block */
FlushObserver* observer) /*!< in: flush observer */ FlushObserver* observer) /*!< in: flush observer */
{ {
#ifdef UNIV_DEBUG mutex_enter(&block->mutex);
{
/* Allow write to proceed to shared temporary tablespace
in read-only mode. */
ut_ad(!srv_read_only_mode ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space())); || fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0); ut_ad(block->page.buf_fix_count > 0);
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
}
#endif /* UNIV_DEBUG */
mutex_enter(&block->mutex);
ut_ad(block->page.newest_modification <= end_lsn); ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn; block->page.newest_modification = end_lsn;
...@@ -98,52 +74,7 @@ buf_flush_note_modification( ...@@ -98,52 +74,7 @@ buf_flush_note_modification(
ut_ad(block->page.oldest_modification <= start_lsn); ut_ad(block->page.oldest_modification <= start_lsn);
} }
buf_page_mutex_exit(block); mutex_exit(&block->mutex);
srv_stats.buf_pool_write_requests.inc(); srv_stats.buf_pool_write_requests.inc();
} }
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
#ifdef UNIV_DEBUG
{
ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
}
#endif /* UNIV_DEBUG */
buf_page_mutex_enter(block);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
buf_pool_t* buf_pool = buf_pool_from_block(block);
buf_flush_insert_sorted_into_flush_list(
buf_pool, block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
buf_page_mutex_exit(block);
}
...@@ -1925,7 +1925,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block) ...@@ -1925,7 +1925,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
if (start_lsn) { if (start_lsn) {
log_flush_order_mutex_enter(); log_flush_order_mutex_enter();
buf_flush_recv_note_modification(block, start_lsn, end_lsn); buf_flush_note_modification(block, start_lsn, end_lsn, NULL);
log_flush_order_mutex_exit(); log_flush_order_mutex_exit();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment