MDEV-21572 buf_page_get_gen() should apply buffered page initialized

		redo log during recovery

- InnoDB unnecessarily reads the page even though it has fully initialized
buffered redo log records. Allow the page initialization redo log to
apply for the page in buf_page_get_gen() during recovery.
- Renamed buf_page_get_gen() to buf_page_get_low()
- Newly added buf_page_get_gen() will check for buffered redo log for
the particular page id during recovery
- Added new function buf_page_mtr_lock() which basically latches the page
for the given latch type.
- recv_recovery_create_page() is inline function which creates a page
if it has page initialization redo log records.
parent 1e6be693
...@@ -17,3 +17,19 @@ CHECK TABLE t2; ...@@ -17,3 +17,19 @@ CHECK TABLE t2;
Table Op Msg_type Msg_text Table Op Msg_type Msg_text
test.t2 check status OK test.t2 check status OK
DROP TABLE t1, t2; DROP TABLE t1, t2;
CREATE TABLE t1(pk SERIAL) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1),(2),(3);
connect con1,localhost,root,,;
BEGIN;
DELETE FROM t1 WHERE pk=1;
connection default;
SET GLOBAL innodb_flush_log_at_trx_commit=1;
DELETE FROM t1 WHERE pk=3;
# Kill the server
disconnect con1;
# Corrupt the pages
SELECT * FROM t1;
pk
1
2
DROP TABLE t1;
...@@ -62,3 +62,31 @@ SELECT * FROM t2; ...@@ -62,3 +62,31 @@ SELECT * FROM t2;
CHECK TABLE t2; CHECK TABLE t2;
DROP TABLE t1, t2; DROP TABLE t1, t2;
# MDEV-21572 buf_page_get_gen() should apply buffered page
# initialized redo log during recovery
--source ../include/no_checkpoint_start.inc
CREATE TABLE t1(pk SERIAL) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1),(2),(3);
connect (con1,localhost,root,,);
BEGIN;
DELETE FROM t1 WHERE pk=1;
connection default;
SET GLOBAL innodb_flush_log_at_trx_commit=1;
DELETE FROM t1 WHERE pk=3;
--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1;
--source ../include/no_checkpoint_end.inc
disconnect con1;
--echo # Corrupt the pages
perl;
my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
open(FILE, "+<$file") || die "Unable to open $file";
binmode FILE;
seek (FILE, $ENV{INNODB_PAGE_SIZE} * 3, SEEK_SET) or die "seek";
print FILE "junk";
close FILE or die "close";
EOF
--source include/start_mysqld.inc
SELECT * FROM t1;
DROP TABLE t1;
...@@ -4190,7 +4190,45 @@ buf_wait_for_read( ...@@ -4190,7 +4190,45 @@ buf_wait_for_read(
} }
} }
/** This is the general function used to get access to a database page. /** Lock the page with the given latch type.
@param[in,out] block block to be locked
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] mtr mini-transaction
@param[in] file file name
@param[in] line line where called
@return pointer to locked block */
static buf_block_t* buf_page_mtr_lock(buf_block_t *block,
ulint rw_latch,
mtr_t* mtr,
const char *file,
unsigned line)
{
mtr_memo_type_t fix_type;
switch (rw_latch)
{
case RW_NO_LATCH:
fix_type= MTR_MEMO_BUF_FIX;
break;
case RW_S_LATCH:
rw_lock_s_lock_inline(&block->lock, 0, file, line);
fix_type= MTR_MEMO_PAGE_S_FIX;
break;
case RW_SX_LATCH:
rw_lock_sx_lock_inline(&block->lock, 0, file, line);
fix_type= MTR_MEMO_PAGE_SX_FIX;
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
rw_lock_x_lock_inline(&block->lock, 0, file, line);
fix_type= MTR_MEMO_PAGE_X_FIX;
break;
}
mtr_memo_push(mtr, block, fix_type);
return block;
}
/** This is the low level function used to get access to a database page.
@param[in] page_id page id @param[in] page_id page id
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL @param[in] guess guessed block or NULL
...@@ -4201,7 +4239,7 @@ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH ...@@ -4201,7 +4239,7 @@ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
@param[in] mtr mini-transaction @param[in] mtr mini-transaction
@return pointer to the block or NULL */ @return pointer to the block or NULL */
buf_block_t* buf_block_t*
buf_page_get_gen( buf_page_get_low(
const page_id_t page_id, const page_id_t page_id,
const page_size_t& page_size, const page_size_t& page_size,
ulint rw_latch, ulint rw_latch,
...@@ -4844,35 +4882,7 @@ buf_page_get_gen( ...@@ -4844,35 +4882,7 @@ buf_page_get_gen(
return NULL; return NULL;
} }
mtr_memo_type_t fix_type; fix_block = buf_page_mtr_lock(fix_block, rw_latch, mtr, file, line);
switch (rw_latch) {
case RW_NO_LATCH:
fix_type = MTR_MEMO_BUF_FIX;
break;
case RW_S_LATCH:
rw_lock_s_lock_inline(&fix_block->lock, 0, file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
break;
case RW_SX_LATCH:
rw_lock_sx_lock_inline(&fix_block->lock, 0, file, line);
fix_type = MTR_MEMO_PAGE_SX_FIX;
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
rw_lock_x_lock_inline(&fix_block->lock, 0, file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
break;
}
mtr_memo_push(mtr, fix_block, fix_type);
if (mode != BUF_PEEK_IF_IN_POOL && !access_time) { if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
/* In the case of a first access, try to apply linear /* In the case of a first access, try to apply linear
...@@ -4887,6 +4897,42 @@ buf_page_get_gen( ...@@ -4887,6 +4897,42 @@ buf_page_get_gen(
return(fix_block); return(fix_block);
} }
/** This is the general function used to get access to a database page.
It does page initialization and applies the buffered redo logs.
@param[in] page_id page id
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
@param[in] file file name
@param[in] line line where called
@param[in] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
const page_id_t page_id,
const page_size_t& page_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
const char* file,
unsigned line,
mtr_t* mtr,
dberr_t* err)
{
if (buf_block_t *block = recv_recovery_create_page(page_id))
{
buf_block_fix(block);
ut_ad(rw_lock_s_lock_nowait(&block->debug_latch, file, line));
block= buf_page_mtr_lock(block, rw_latch, mtr, file, line);
return block;
}
return buf_page_get_low(page_id, page_size, rw_latch,
guess, mode, file, line, mtr, err);
}
/********************************************************************//** /********************************************************************//**
This is the general function used to get optimistic access to a database This is the general function used to get optimistic access to a database
page. page.
......
...@@ -436,6 +436,7 @@ buf_page_get_zip( ...@@ -436,6 +436,7 @@ buf_page_get_zip(
const page_size_t& page_size); const page_size_t& page_size);
/** This is the general function used to get access to a database page. /** This is the general function used to get access to a database page.
It does page initialization and applies the buffered redo logs.
@param[in] page_id page id @param[in] page_id page id
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL @param[in] guess guessed block or NULL
...@@ -458,6 +459,29 @@ buf_page_get_gen( ...@@ -458,6 +459,29 @@ buf_page_get_gen(
mtr_t* mtr, mtr_t* mtr,
dberr_t* err); dberr_t* err);
/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
@param[in] file file name
@param[in] line line where called
@param[in] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_low(
const page_id_t page_id,
const page_size_t& page_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
const char* file,
unsigned line,
mtr_t* mtr,
dberr_t* err);
/** Initializes a page to the buffer buf_pool. The page is usually not read /** Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED => of the functions which perform to a block a state transition NOT_USED =>
......
...@@ -342,4 +342,22 @@ times! */ ...@@ -342,4 +342,22 @@ times! */
roll-forward */ roll-forward */
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) #define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
/** This is a low level function for the recovery system
to create a page which has buffered intialized redo log records.
@param[in] page_id page to be created using redo logs
@return whether the page creation successfully */
buf_block_t* recv_recovery_create_page_low(const page_id_t page_id);
/** Recovery system creates a page which has buffered intialized
redo log records.
@param[in] page_id page to be created using redo logs
@return block which contains page was initialized */
inline buf_block_t* recv_recovery_create_page(const page_id_t page_id)
{
if (UNIV_LIKELY(!recv_recovery_on))
return NULL;
return recv_recovery_create_page_low(page_id);
}
#endif #endif
...@@ -313,7 +313,7 @@ class mlog_init_t ...@@ -313,7 +313,7 @@ class mlog_init_t
if (!i->second.created) { if (!i->second.created) {
continue; continue;
} }
if (buf_block_t* block = buf_page_get_gen( if (buf_block_t* block = buf_page_get_low(
i->first, univ_page_size, RW_X_LATCH, NULL, i->first, univ_page_size, RW_X_LATCH, NULL,
BUF_GET_IF_IN_POOL, __FILE__, __LINE__, BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
&mtr, NULL)) { &mtr, NULL)) {
...@@ -2293,6 +2293,99 @@ static void recv_read_in_area(const page_id_t page_id) ...@@ -2293,6 +2293,99 @@ static void recv_read_in_area(const page_id_t page_id)
mutex_enter(&recv_sys->mutex); mutex_enter(&recv_sys->mutex);
} }
/** This is another low level function for the recovery system
to create a page which has buffered page intialization redo log records.
@param[in] page_id page to be created using redo logs
@param[in,out] recv_addr Hashed redo logs for the given page id
@return whether the page creation successfully */
static buf_block_t* recv_recovery_create_page_low(const page_id_t page_id,
recv_addr_t* recv_addr)
{
mtr_t mtr;
mlog_init_t::init& i = mlog_init.last(page_id);
const lsn_t end_lsn = UT_LIST_GET_LAST(recv_addr->rec_list)->end_lsn;
if (end_lsn < i.lsn)
{
DBUG_LOG("ib_log", "skip log for page "
<< page_id
<< " LSN " << end_lsn
<< " < " << i.lsn);
recv_addr->state = RECV_PROCESSED;
ignore:
ut_a(recv_sys->n_addrs);
recv_sys->n_addrs--;
return NULL;
}
fil_space_t* space = fil_space_acquire(recv_addr->space);
if (!space)
{
recv_addr->state = RECV_PROCESSED;
goto ignore;
}
if (space->enable_lsn)
{
init_fail:
fil_space_release(space);
recv_addr->state = RECV_NOT_PROCESSED;
return NULL;
}
/* Determine if a tablespace could be for an internal table
for FULLTEXT INDEX. For those tables, no MLOG_INDEX_LOAD record
used to be written when redo logging was disabled. Hence, we
cannot optimize away page reads, because all the redo
log records for initializing and modifying the page in the
past could be older than the page in the data file.
The check is too broad, causing all
tables whose names start with FTS_ to skip the optimization. */
if (strstr(space->name, "/FTS_"))
goto init_fail;
mtr.start();
mtr.set_log_mode(MTR_LOG_NONE);
buf_block_t* block = buf_page_create(page_id, page_size_t(space->flags),
&mtr);
if (recv_addr->state == RECV_PROCESSED)
/* The page happened to exist in the buffer pool, or it was
just being read in. Before buf_page_get_with_no_latch() returned,
all changes must have been applied to the page already. */
mtr.commit();
else
{
i.created = true;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
mtr.x_latch_at_savepoint(0, block);
recv_recover_page(block, mtr, recv_addr, i.lsn);
ut_ad(mtr.has_committed());
}
fil_space_release(space);
return block;
}
/** This is a low level function for the recovery system
to create a page which has buffered intialized redo log records.
@param[in] page_id page to be created using redo logs
@return whether the page creation successfully */
buf_block_t* recv_recovery_create_page_low(const page_id_t page_id)
{
buf_block_t* block= NULL;
mutex_enter(&recv_sys->mutex);
recv_addr_t* recv_addr= recv_get_fil_addr_struct(page_id.space(),
page_id.page_no());
if (recv_addr && recv_addr->state == RECV_WILL_NOT_READ)
{
block= recv_recovery_create_page_low(page_id, recv_addr);
}
mutex_exit(&recv_sys->mutex);
return block;
}
/** Apply the hash table of stored log records to persistent data pages. /** Apply the hash table of stored log records to persistent data pages.
@param[in] last_batch whether the change buffer merge will be @param[in] last_batch whether the change buffer merge will be
performed as part of the operation */ performed as part of the operation */
...@@ -2384,7 +2477,7 @@ void recv_apply_hashed_log_recs(bool last_batch) ...@@ -2384,7 +2477,7 @@ void recv_apply_hashed_log_recs(bool last_batch)
apply: apply:
mtr.start(); mtr.start();
mtr.set_log_mode(MTR_LOG_NONE); mtr.set_log_mode(MTR_LOG_NONE);
if (buf_block_t* block = buf_page_get_gen( if (buf_block_t* block = buf_page_get_low(
page_id, univ_page_size, page_id, univ_page_size,
RW_X_LATCH, NULL, RW_X_LATCH, NULL,
BUF_GET_IF_IN_POOL, BUF_GET_IF_IN_POOL,
...@@ -2398,77 +2491,9 @@ void recv_apply_hashed_log_recs(bool last_batch) ...@@ -2398,77 +2491,9 @@ void recv_apply_hashed_log_recs(bool last_batch)
mtr.commit(); mtr.commit();
recv_read_in_area(page_id); recv_read_in_area(page_id);
} }
} else { } else if (!recv_recovery_create_page_low(
mlog_init_t::init& i = mlog_init.last(page_id); page_id, recv_addr)) {
const lsn_t end_lsn = UT_LIST_GET_LAST( goto apply;
recv_addr->rec_list)->end_lsn;
if (end_lsn < i.lsn) {
DBUG_LOG("ib_log", "skip log for page "
<< page_id
<< " LSN " << end_lsn
<< " < " << i.lsn);
skip:
recv_addr->state = RECV_PROCESSED;
goto ignore;
}
fil_space_t* space = fil_space_acquire(
recv_addr->space);
if (!space) {
goto skip;
}
if (space->enable_lsn) {
do_read:
fil_space_release(space);
recv_addr->state = RECV_NOT_PROCESSED;
goto apply;
}
/* Determine if a tablespace could be
for an internal table for FULLTEXT INDEX.
For those tables, no MLOG_INDEX_LOAD record
used to be written when redo logging was
disabled. Hence, we cannot optimize
away page reads, because all the redo
log records for initializing and
modifying the page in the past could
be older than the page in the data
file.
The check is too broad, causing all
tables whose names start with FTS_ to
skip the optimization. */
if (strstr(space->name, "/FTS_")) {
goto do_read;
}
mtr.start();
mtr.set_log_mode(MTR_LOG_NONE);
buf_block_t* block = buf_page_create(
page_id, page_size_t(space->flags),
&mtr);
if (recv_addr->state == RECV_PROCESSED) {
/* The page happened to exist
in the buffer pool, or it was
just being read in. Before
buf_page_get_with_no_latch()
returned, all changes must have
been applied to the page already. */
mtr.commit();
} else {
i.created = true;
buf_block_dbg_add_level(
block, SYNC_NO_ORDER_CHECK);
mtr.x_latch_at_savepoint(0, block);
recv_recover_page(block, mtr,
recv_addr, i.lsn);
ut_ad(mtr.has_committed());
}
fil_space_release(space);
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment