Commit ab8199f3 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-12103 Reduce the time of looking for MLOG_CHECKPOINT during crash recovery

This fixes MySQL Bug#80788 in MariaDB 10.2.5.

When I made the InnoDB crash recovery more robust by implementing
WL#7142, I also introduced an extra redo log scan pass that can be
shortened.

This fix will slightly extend the InnoDB redo log format that I
introduced in MySQL 5.7.9 by writing the start LSN of the MLOG_CHECKPOINT
mini-transaction to the end of the log checkpoint page, so that recovery
can jump straight to it without scanning all the preceding redo log.

LOG_CHECKPOINT_END_LSN: At the end of the checkpoint page, the start LSN
of the MLOG_CHECKPOINT mini-transaction. Previously, these bytes were
written as 0.

log_write_checkpoint_info(), log_group_checkpoint(): Add the parameter
end_lsn for writing LOG_CHECKPOINT_END_LSN.

log_checkpoint(): Remember the LSN at which the MLOG_CHECKPOINT
mini-transaction is starting (or at which the redo log ends on
shutdown).

recv_init_crash_recovery(): Remove.

recv_group_scan_log_recs(): Add the parameter checkpoint_lsn.

recv_recovery_from_checkpoint_start(): Read LOG_CHECKPOINT_END_LSN
and if it is set, start the first scan from it instead of the
checkpoint LSN. Improve some messages and remove bogus assertions.

recv_parse_log_recs(): Do not skip DBUG_PRINT("ib_log") for some
file-level redo log records.

recv_parse_or_apply_log_rec_body(): If we have not parsed all redo
log between the checkpoint and the corresponding MLOG_CHECKPOINT
record, defer the check for MLOG_FILE_DELETE or MLOG_FILE_NAME records
to recv_init_crash_recovery_spaces().

recv_init_crash_recovery_spaces(): Refuse recovery if
MLOG_FILE_NAME or MLOG_FILE_DELETE records are missing.
parent e4a2e80a
......@@ -84,6 +84,12 @@ WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND /len 22; hex 38000000000012860cb7809781e800066269676f7400; asc 8 bigot ;/ in mysqld.1.err
# missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
NOT FOUND /InnoDB: Missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT for tablespace 42$/ in mysqld.1.err
# Minimal MariaDB 10.1.21 encrypted redo log
SELECT COUNT(*) `1` FROM INFORMATION_SCHEMA.ENGINES WHERE engine='innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
......
......@@ -84,6 +84,12 @@ WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND /len 22; hex 38000000000012860cb7809781e800066269676f7400; asc 8 bigot ;/ in mysqld.1.err
# missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
NOT FOUND /InnoDB: Missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT for tablespace 42$/ in mysqld.1.err
# Minimal MariaDB 10.1.21 encrypted redo log
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
......
......@@ -13,6 +13,7 @@ call mtr.add_suppression("InnoDB: Missing MLOG_CHECKPOINT");
call mtr.add_suppression("InnoDB: MLOG_FILE_NAME incorrect");
call mtr.add_suppression("InnoDB: ############### CORRUPT LOG RECORD FOUND");
call mtr.add_suppression("InnoDB: Log scan aborted at LSN");
call mtr.add_suppression("InnoDB: Missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT for tablespace 42\\r?$");
call mtr.add_suppression("InnoDB: Obtaining redo log encryption key version 1 failed");
call mtr.add_suppression("InnoDB: Decrypting checkpoint failed");
--enable_query_log
......@@ -316,6 +317,37 @@ eval $check_no_innodb;
--let SEARCH_PATTERN= len 22; hex 38000000000012860cb7809781e800066269676f7400; asc 8 bigot ;
--source include/search_pattern_in_file.inc
--echo # missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT
perl;
die unless open OUT, "+<", "$ENV{bugdir}/ib_logfile0";
binmode OUT;
# header block
print OUT pack("Nx[5]nx[5]", 1, 0x1286);
print OUT "ibbackup was here!!!1!";
print OUT pack("x[470]N", 0x52b54540);
# invalid (all-zero) checkpoint page 1 and an empty log page
print OUT chr(0) x 1024;
# valid checkpoint block 2
print OUT pack("x[12]NNNx[264]", 0x12860c, 0, 0x80c);
# pointer to the MLOG_CHECKPOINT record, and checkpoint page checksum
print OUT pack("H*x[204]NNN", "590DBAACFE922582", 0x128612, 0, 0x101741b);
# log page
print OUT pack("H*", "800009440022000c00000001");
# MLOG_1BYTE record for missing tablespace 42 page 123
print OUT pack("CCCnC", 128 | 1, 42, 123, 4567, 89);
# MLOG_CHECKPOINT record
print OUT pack("CNN", 56, 0, 0x12860c);
# padding (MLOG_DUMMY_RECORD) and block checksum
print OUT " " x 481, pack("N", 0x644e6db8);
close OUT or die;
EOF
--source include/start_mysqld.inc
eval $check_no_innodb;
--source include/shutdown_mysqld.inc
--let SEARCH_PATTERN= InnoDB: Missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT for tablespace 42\$
--source include/search_pattern_in_file.inc
--echo # Minimal MariaDB 10.1.21 encrypted redo log
perl;
die unless open OUT, "+<", "$ENV{bugdir}/ib_logfile0";
......
......@@ -252,10 +252,10 @@ log_group_header_read(
const log_group_t* group,
ulint header);
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@param[in] sync whether to wait for the write to complete */
@param[in] sync whether to wait for the write to complete
@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
void
log_write_checkpoint_info(
bool sync);
log_write_checkpoint_info(bool sync, lsn_t end_lsn);
/** Set extra data to be written to the redo log during checkpoint.
@param[in] buf data to be appended on checkpoint, or NULL
......@@ -487,10 +487,14 @@ extern my_bool innodb_log_checksums;
.._HDR_NO */
#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
/* Offsets inside the checkpoint pages (redo log format version 1) */
/** Offsets inside the checkpoint pages (redo log format version 1) @{ */
/** Checkpoint number */
#define LOG_CHECKPOINT_NO 0
/** Log sequence number up to which all changes have been flushed */
#define LOG_CHECKPOINT_LSN 8
/** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
#define LOG_CHECKPOINT_OFFSET 16
/** log_sys_t::buf_size at the time of the checkpoint (not used) */
#define LOG_CHECKPOINT_LOG_BUF_SIZE 24
/** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
#define LOG_CHECKPOINT_CRYPT_KEY 32
......@@ -498,6 +502,11 @@ extern my_bool innodb_log_checksums;
#define LOG_CHECKPOINT_CRYPT_NONCE 36
/** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */
#define LOG_CHECKPOINT_CRYPT_MESSAGE 40
/** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding
to this checkpoint, or 0 if the information has not been written */
#define LOG_CHECKPOINT_END_LSN OS_FILE_LOG_BLOCK_SIZE - 16
/* @} */
/** Offsets of a log file header */
/* @{ */
......
......@@ -1612,19 +1612,22 @@ log_io_complete_checkpoint(void)
log_mutex_exit();
}
/******************************************************//**
Writes the checkpoint info to a log group header. */
/** Write checkpoint info to the log header.
@param[in,out] group redo log
@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
static
void
log_group_checkpoint(
/*=================*/
log_group_t* group) /*!< in: log group */
log_group_checkpoint(log_group_t* group, lsn_t end_lsn)
{
lsn_t lsn_offset;
byte* buf;
ut_ad(!srv_read_only_mode);
ut_ad(log_mutex_own());
ut_ad(end_lsn == 0 || end_lsn >= log_sys->next_checkpoint_lsn);
ut_ad(end_lsn <= log_sys->lsn);
ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys->lsn
|| srv_shutdown_state != SRV_SHUTDOWN_NONE);
DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
" written to group " ULINTPF,
......@@ -1646,6 +1649,7 @@ log_group_checkpoint(
group);
mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
......@@ -1702,9 +1706,10 @@ log_group_header_read(
}
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@param[in] sync whether to wait for the write to complete */
@param[in] sync whether to wait for the write to complete
@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
void
log_write_checkpoint_info(bool sync)
log_write_checkpoint_info(bool sync, lsn_t end_lsn)
{
ut_ad(log_mutex_own());
ut_ad(!srv_read_only_mode);
......@@ -1713,7 +1718,7 @@ log_write_checkpoint_info(bool sync)
group;
group = UT_LIST_GET_NEXT(log_groups, group)) {
log_group_checkpoint(group);
log_group_checkpoint(group, end_lsn);
}
log_mutex_exit();
......@@ -1827,12 +1832,13 @@ log_checkpoint(
threads will be blocked, and no pages can be added to the
flush lists. */
lsn_t flush_lsn = oldest_lsn;
const lsn_t end_lsn = log_sys->lsn;
const bool do_write
= srv_shutdown_state == SRV_SHUTDOWN_NONE
|| flush_lsn != log_sys->lsn;
|| flush_lsn != end_lsn;
if (fil_names_clear(flush_lsn, do_write)) {
ut_ad(log_sys->lsn >= flush_lsn + SIZE_OF_MLOG_CHECKPOINT);
ut_ad(log_sys->lsn >= end_lsn + SIZE_OF_MLOG_CHECKPOINT);
flush_lsn = log_sys->lsn;
}
......@@ -1878,7 +1884,7 @@ log_checkpoint(
}
log_sys->next_checkpoint_lsn = oldest_lsn;
log_write_checkpoint_info(sync);
log_write_checkpoint_info(sync, end_lsn);
ut_ad(!log_mutex_own());
return(true);
......
......@@ -138,16 +138,6 @@ const char*
get_mlog_string(mlog_id_t type);
#endif /* !DBUG_OFF */
/* prototypes */
/*******************************************************//**
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == false. */
static
void
recv_init_crash_recovery(void);
/*===========================*/
/** Tablespace item during recovery */
struct file_name_t {
/** Tablespace file name (MLOG_FILE_NAME) */
......@@ -818,7 +808,7 @@ recv_synchronize_groups()
checkpoint info on disk certain */
if (!srv_read_only_mode) {
log_write_checkpoint_info(true);
log_write_checkpoint_info(true, 0);
log_mutex_enter();
}
}
......@@ -1042,14 +1032,17 @@ recv_find_max_checkpoint(
log_group_header_read(group, field);
if (!recv_check_log_header_checksum(buf)) {
const ulint crc32 = log_block_calc_checksum_crc32(buf);
const ulint cksum = log_block_get_checksum(buf);
if (crc32 != cksum) {
DBUG_PRINT("ib_log",
("invalid checkpoint,"
" group " ULINTPF " at " ULINTPF
", checksum %x",
", checksum %x expected %x",
group->id, field,
(unsigned) log_block_get_checksum(
buf)));
(unsigned) cksum,
(unsigned) crc32));
continue;
}
......@@ -1167,12 +1160,24 @@ recv_parse_or_apply_log_rec_body(
} else if (apply
&& !is_predefined_tablespace(space_id)
&& recv_spaces.find(space_id) == recv_spaces.end()) {
ib::fatal() << "Missing MLOG_FILE_NAME or MLOG_FILE_DELETE"
if (recv_sys->recovered_lsn < recv_sys->mlog_checkpoint_lsn) {
/* We have not seen all records between the
checkpoint and MLOG_CHECKPOINT. There should be
a MLOG_FILE_DELETE for this tablespace later. */
recv_spaces.insert(
std::make_pair(space_id,
file_name_t("", false)));
goto parse_log;
}
ib::error() << "Missing MLOG_FILE_NAME or MLOG_FILE_DELETE"
" for redo log record " << type << " (page "
<< space_id << ":" << page_no << ") at "
<< recv_sys->recovered_lsn << ".";
<< space_id << ":" << page_no << ") at "
<< recv_sys->recovered_lsn << ".";
recv_sys->found_corrupt_log = true;
return(NULL);
} else {
parse_log:
/* Parsing a page log record. */
page = NULL;
page_zip = NULL;
......@@ -2485,15 +2490,6 @@ recv_parse_log_recs(
return(true);
}
break;
case MLOG_FILE_NAME:
case MLOG_FILE_DELETE:
case MLOG_FILE_CREATE2:
case MLOG_FILE_RENAME2:
case MLOG_TRUNCATE:
/* These were already handled by
recv_parse_log_rec() and
recv_parse_or_apply_log_rec_body(). */
break;
#ifdef UNIV_LOG_LSN_DEBUG
case MLOG_LSN:
/* Do not add these records to the hash table.
......@@ -2518,6 +2514,14 @@ recv_parse_log_recs(
recv_sys->recovered_lsn);
}
/* fall through */
case MLOG_FILE_NAME:
case MLOG_FILE_DELETE:
case MLOG_FILE_CREATE2:
case MLOG_FILE_RENAME2:
case MLOG_TRUNCATE:
/* These were already handled by
recv_parse_log_rec() and
recv_parse_or_apply_log_rec_body(). */
case MLOG_INDEX_LOAD:
DBUG_PRINT("ib_log",
("scan " LSN_PF ": log rec %s"
......@@ -2851,19 +2855,17 @@ recv_scan_log_recs(
environment before parsing these log records. */
if (!recv_needed_recovery) {
recv_needed_recovery = true;
if (!srv_read_only_mode) {
ib::info() << "Log scan progressed"
" past the checkpoint lsn "
<< recv_sys->scanned_lsn;
recv_init_crash_recovery();
} else {
if (srv_read_only_mode) {
ib::warn() << "innodb_read_only"
" prevents crash recovery";
recv_needed_recovery = true;
return(true);
}
ib::info() << "Log scan progressed "
"past the checkpoint lsn "
<< recv_sys->scanned_lsn;
}
/* We were able to find more log data: add it to the
......@@ -2944,6 +2946,7 @@ recv_scan_log_recs(
/** Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found.
@param[in,out] group log group
@param[in] checkpoint_lsn latest checkpoint log sequence number
@param[in,out] contiguous_lsn log sequence number
until which all redo log has been scanned
@param[in] last_phase whether changes
......@@ -2953,6 +2956,7 @@ static
bool
recv_group_scan_log_recs(
log_group_t* group,
lsn_t checkpoint_lsn,
lsn_t* contiguous_lsn,
bool last_phase)
{
......@@ -2976,7 +2980,6 @@ recv_group_scan_log_recs(
ut_ad(last_phase || !recv_writer_thread_active);
mutex_exit(&recv_sys->mutex);
lsn_t checkpoint_lsn = *contiguous_lsn;
lsn_t start_lsn;
lsn_t end_lsn;
store_t store_to_hash = recv_sys->mlog_checkpoint_lsn == 0
......@@ -3023,19 +3026,6 @@ recv_group_scan_log_recs(
DBUG_RETURN(store_to_hash == STORE_NO);
}
/*******************************************************//**
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == false. */
static
void
recv_init_crash_recovery(void)
{
ut_ad(!srv_read_only_mode);
ut_a(!recv_needed_recovery);
recv_needed_recovery = true;
}
/** Report a missing tablespace for which page-redo log exists.
@param[in] err previous error code
@param[in] i tablespace descriptor
......@@ -3083,6 +3073,8 @@ recv_init_crash_recovery_spaces(void)
for (recv_spaces_t::iterator i = recv_spaces.begin();
i != recv_spaces.end(); i++) {
ut_ad(!is_predefined_tablespace(i->first));
ut_ad(i->second.deleted || i->second.name != "");
ut_ad(!i->second.deleted || !i->second.space);
if (i->second.deleted) {
/* The tablespace was deleted,
......@@ -3092,6 +3084,13 @@ recv_init_crash_recovery_spaces(void)
/* The tablespace was found, and there
are some redo log records for it. */
fil_names_dirty(i->second.space);
} else if (i->second.name == "") {
ib::error() << "Missing MLOG_FILE_NAME"
" or MLOG_FILE_DELETE"
" before MLOG_CHECKPOINT for tablespace "
<< i->first;
recv_sys->found_corrupt_log = true;
return(DB_CORRUPTION);
} else {
missing_spaces.insert(i->first);
flag_deleted = true;
......@@ -3208,9 +3207,7 @@ recv_recovery_from_checkpoint_start(
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
if (err != DB_SUCCESS) {
log_mutex_exit();
return(err);
}
......@@ -3231,6 +3228,8 @@ recv_recovery_from_checkpoint_start(
ut_ad(UT_LIST_GET_LEN(log_sys->log_groups) == 1);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
const lsn_t end_lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_END_LSN);
ut_ad(recv_sys->n_addrs == 0);
contiguous_lsn = checkpoint_lsn;
......@@ -3240,16 +3239,23 @@ recv_recovery_from_checkpoint_start(
return(recv_log_format_0_recover(checkpoint_lsn));
case LOG_HEADER_FORMAT_CURRENT:
case LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED:
break;
if (end_lsn == 0) {
break;
}
if (end_lsn >= checkpoint_lsn) {
contiguous_lsn = end_lsn;
break;
}
/* fall through */
default:
ut_ad(0);
recv_sys->found_corrupt_log = true;
log_mutex_exit();
return(DB_ERROR);
}
/* Look for MLOG_CHECKPOINT. */
recv_group_scan_log_recs(group, &contiguous_lsn, false);
recv_group_scan_log_recs(group, checkpoint_lsn, &contiguous_lsn,
false);
/* The first scan should not have stored or applied any records. */
ut_ad(recv_sys->n_addrs == 0);
ut_ad(!recv_sys->found_corrupt_fs);
......@@ -3268,11 +3274,15 @@ recv_recovery_from_checkpoint_start(
if (recv_sys->mlog_checkpoint_lsn == 0) {
if (!srv_read_only_mode
&& group->scanned_lsn != checkpoint_lsn) {
ib::error() << "Missing"
" MLOG_CHECKPOINT between the checkpoint "
<< checkpoint_lsn << " and the end "
<< group->scanned_lsn << ".";
lsn_t scan_lsn = group->scanned_lsn;
log_mutex_exit();
ib::error err;
err << "Missing MLOG_CHECKPOINT";
if (end_lsn) {
err << " at " << end_lsn;
}
err << " between the checkpoint " << checkpoint_lsn
<< " and the end " << scan_lsn << ".";
return(DB_ERROR);
}
......@@ -3281,7 +3291,7 @@ recv_recovery_from_checkpoint_start(
} else {
contiguous_lsn = checkpoint_lsn;
rescan = recv_group_scan_log_recs(
group, &contiguous_lsn, false);
group, checkpoint_lsn, &contiguous_lsn, false);
if ((recv_sys->found_corrupt_log && !srv_force_recovery)
|| recv_sys->found_corrupt_fs) {
......@@ -3313,13 +3323,13 @@ recv_recovery_from_checkpoint_start(
<< " in the ib_logfiles!";
if (srv_read_only_mode) {
ib::error() << "Can't initiate database"
" recovery, running in read-only-mode.";
ib::error() << "innodb_read_only"
" prevents crash recovery";
log_mutex_exit();
return(DB_READ_ONLY);
}
recv_init_crash_recovery();
recv_needed_recovery = true;
}
}
......@@ -3336,7 +3346,8 @@ recv_recovery_from_checkpoint_start(
if (rescan) {
contiguous_lsn = checkpoint_lsn;
recv_group_scan_log_recs(group, &contiguous_lsn, true);
recv_group_scan_log_recs(group, checkpoint_lsn,
&contiguous_lsn, true);
if ((recv_sys->found_corrupt_log
&& !srv_force_recovery)
......@@ -3367,11 +3378,6 @@ recv_recovery_from_checkpoint_start(
ib::error() << "Recovered only to lsn:"
<< recv_sys->recovered_lsn << " checkpoint_lsn: " << checkpoint_lsn;
/* No harm in trying to do RO access. */
if (!srv_read_only_mode) {
ut_error;
}
return(DB_ERROR);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment