Commit a9d00db1 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-11799 InnoDB can abort if the doublewrite buffer

contains a bad and a good copy

Clean up the InnoDB doublewrite buffer code.

buf_dblwr_init_or_load_pages(): Do not add empty pages to the buffer.

buf_dblwr_process(): Do consider changes to pages that are all zero.
Do not abort when finding a corrupted copy of a page in the doublewrite
buffer, because there could be multiple copies in the doublewrite buffer,
and only one of them needs to be good.
parent 9b99d9be
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
c CHAR(255),
......
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
c CHAR(255),
......
......@@ -4,10 +4,6 @@
# Temporary tablename will be unique. This makes sure that future
# in-place ALTERs of the same table will not be blocked due to
# temporary tablename.
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed
");
call mtr.add_suppression("InnoDB: file read of space .* page .*");
call mtr.add_suppression("InnoDB: Trying to recover it from the doublewrite buffer.");
# Crash the server in ha_innobase::commit_inplace_alter_table()
CREATE TABLE t1 (f1 INT NOT NULL, f2 INT NOT NULL) ENGINE=innodb;
SET debug='d,innodb_alter_commit_crash_before_commit';
......
call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed.*");
CALL mtr.add_suppression("InnoDB: Error: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
CALL mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
CALL mtr.add_suppression("InnoDB: Database page corruption on disk or a failed");
CALL mtr.add_suppression("InnoDB: Space .* file test/t1 read of page .*");
CALL mtr.add_suppression("InnoDB: You may have to recover from a backup.");
......
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
XA START 'x';
......@@ -6,6 +5,7 @@ UPDATE t1 set a=2;
XA END 'x';
XA PREPARE 'x';
call mtr.add_suppression("Found 1 prepared XA transactions");
# Kill the server
SELECT * FROM t1 LOCK IN SHARE MODE;
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t1;
......
......@@ -9,8 +9,6 @@
--source include/have_debug.inc
--source include/have_log_bin.inc
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
let $file_format_max=`SELECT @@innodb_file_format_max`;
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
......
......@@ -9,8 +9,6 @@
--source include/have_debug.inc
--source include/have_log_bin.inc
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
let $file_format_max=`SELECT @@innodb_file_format_max`;
CREATE TABLE t1(a CHAR(255),
b CHAR(255),
......
......@@ -20,13 +20,6 @@
--echo # in-place ALTERs of the same table will not be blocked due to
--echo # temporary tablename.
# As we intentionally crash below, there could be partially written
# pages that are then recovered from the doublewrite buffer
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed
");
call mtr.add_suppression("InnoDB: file read of space .* page .*");
call mtr.add_suppression("InnoDB: Trying to recover it from the doublewrite buffer.");
let datadir= `select @@datadir`;
--let $_server_id= `SELECT @@server_id`
......
......@@ -21,7 +21,6 @@ source include/have_debug.inc;
source include/not_windows.inc;
CALL mtr.add_suppression("InnoDB: Error: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
CALL mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
CALL mtr.add_suppression("InnoDB: Database page corruption on disk or a failed");
CALL mtr.add_suppression("InnoDB: Space .* file test/t1 read of page .*");
CALL mtr.add_suppression("InnoDB: You may have to recover from a backup.");
......
......@@ -12,11 +12,6 @@ if (`select plugin_auth_version <= "5.6.24" from information_schema.plugins wher
FLUSH TABLES;
--enable_query_log
#
# We kill server belown with timeout 0 that is not fully safe
#
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
connect (con1,localhost,root);
......@@ -25,15 +20,8 @@ connection default;
call mtr.add_suppression("Found 1 prepared XA transactions");
# Kill and restart the server.
-- exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-- shutdown_server 0
-- source include/wait_until_disconnected.inc
-- exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-- enable_reconnect
-- source include/wait_until_connected_again.inc
-- disable_reconnect
--source include/kill_mysqld.inc
--source include/start_mysqld.inc
disconnect con1;
connect (con1,localhost,root);
......
......@@ -3,7 +3,6 @@ include/rpl_init.inc [topology=1->2]
call mtr.add_suppression("Checking table:");
call mtr.add_suppression("client is using or hasn't closed the table properly");
call mtr.add_suppression("Table .* is marked as crashed and should be repaired");
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
flush tables;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
......
......@@ -12,11 +12,6 @@
call mtr.add_suppression("Checking table:");
call mtr.add_suppression("client is using or hasn't closed the table properly");
call mtr.add_suppression("Table .* is marked as crashed and should be repaired");
# We have seen this warning a couple of times in Buildbot. Since we crash the
# server deliberately, it seems possible that we could in rare cases crash in
# the middle of a page write. The page is recovered from the doublewrite
# buffer ("[Note] InnoDB: Recovered the page from the doublewrite buffer.").
call mtr.add_suppression("InnoDB: Warning: database page corruption or a failed");
flush tables;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
......
......@@ -455,8 +455,11 @@ buf_dblwr_init_or_load_pages(
os_file_write(path, file, page,
source_page_no * UNIV_PAGE_SIZE,
UNIV_PAGE_SIZE);
} else if (load_corrupt_pages) {
} else if (load_corrupt_pages
&& !buf_page_is_zeroes(page, FIL_PAGE_DATA)) {
/* Each valid page header must contain some
nonzero bytes, such as FIL_PAGE_OFFSET
or FIL_PAGE_LSN. */
recv_dblwr.add(page);
}
......@@ -492,8 +495,6 @@ buf_dblwr_process()
for (std::list<byte*>::iterator i = recv_dblwr.pages.begin();
i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) {
bool is_compressed = false;
page = *i;
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID);
......@@ -501,18 +502,20 @@ buf_dblwr_process()
if (!fil_tablespace_exists_in_mem(space_id)) {
/* Maybe we have dropped the single-table tablespace
and this page once belonged to it: do nothing */
continue;
}
} else if (!fil_check_adress_in_tablespace(space_id,
page_no)) {
if (!fil_check_adress_in_tablespace(space_id, page_no)) {
ib_logf(IB_LOG_LEVEL_WARN,
"A page in the doublewrite buffer is not "
"within space bounds; space id %lu "
"page number %lu, page %lu in "
"doublewrite buf.",
(ulong) space_id, (ulong) page_no,
page_no_dblwr);
} else {
"A copy of page " ULINTPF ":" ULINTPF
" in the doublewrite buffer slot " ULINTPF
" is not within space bounds",
space_id, page_no, page_no_dblwr);
continue;
}
ulint zip_size = fil_space_get_zip_size(space_id);
ut_ad(!buf_page_is_zeroes(page, zip_size));
/* Read in the actual page from the file */
fil_io(OS_FILE_READ,
......@@ -526,114 +529,77 @@ buf_dblwr_process()
NULL,
0);
/* Is page compressed ? */
is_compressed = fil_page_is_compressed_encrypted(read_buf) |
fil_page_is_compressed(read_buf);
const bool is_all_zero = buf_page_is_zeroes(
read_buf, zip_size);
/* If page was compressed, decompress it before we
check checksum. */
if (is_compressed) {
fil_decompress_page(NULL, read_buf, UNIV_PAGE_SIZE, NULL, true);
if (is_all_zero) {
/* We will check if the copy in the
doublewrite buffer is valid. If not, we will
ignore this page (there should be redo log
records to initialize it). */
} else {
if (fil_page_is_compressed_encrypted(read_buf) ||
fil_page_is_compressed(read_buf)) {
/* Decompress the page before
validating the checksum. */
fil_decompress_page(
NULL, read_buf, UNIV_PAGE_SIZE,
NULL, true);
}
if (fil_space_verify_crypt_checksum(read_buf, zip_size)) {
/* page is encrypted and checksum is OK */
} else if (buf_page_is_corrupted(true, read_buf, zip_size)) {
if (fil_space_verify_crypt_checksum(
read_buf, zip_size)
|| !buf_page_is_corrupted(
true, read_buf, zip_size)) {
/* The page is good; there is no need
to consult the doublewrite buffer. */
continue;
}
fprintf(stderr,
"InnoDB: Warning: database page"
" corruption or a failed\n"
"InnoDB: file read of"
" space %lu page %lu.\n"
"InnoDB: Trying to recover it from"
" the doublewrite buffer.\n",
(ulong) space_id, (ulong) page_no);
/* Is page compressed ? */
is_compressed = fil_page_is_compressed_encrypted(page) |
fil_page_is_compressed(page);
/* If page was compressed, decompress it before we
check checksum. */
if (is_compressed) {
fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, NULL, true);
/* We intentionally skip this message for
is_all_zero pages. */
ib_logf(IB_LOG_LEVEL_INFO,
"Trying to recover page " ULINTPF ":" ULINTPF
" from the doublewrite buffer.",
space_id, page_no);
}
if (fil_space_verify_crypt_checksum(page, zip_size)) {
/* the doublewrite buffer page is encrypted and OK */
} else if (buf_page_is_corrupted(true,
page,
zip_size)) {
fprintf(stderr,
"InnoDB: Dump of the page:\n");
buf_page_print(
read_buf, zip_size,
BUF_PAGE_PRINT_NO_CRASH);
fprintf(stderr,
"InnoDB: Dump of"
" corresponding page"
" in doublewrite buffer:\n");
buf_page_print(
page, zip_size,
BUF_PAGE_PRINT_NO_CRASH);
/* Next, validate the doublewrite page. */
if (fil_page_is_compressed_encrypted(page) ||
fil_page_is_compressed(page)) {
/* Decompress the page before
validating the checksum. */
fil_decompress_page(
NULL, page, UNIV_PAGE_SIZE, NULL, true);
}
fprintf(stderr,
"InnoDB: Also the page in the"
" doublewrite buffer"
" is corrupt.\n"
"InnoDB: Cannot continue"
" operation.\n"
"InnoDB: You can try to"
" recover the database"
" with the my.cnf\n"
"InnoDB: option:\n"
"InnoDB:"
" innodb_force_recovery=6\n");
ut_error;
if (!fil_space_verify_crypt_checksum(page, zip_size)
&& buf_page_is_corrupted(true, page, zip_size)) {
if (!is_all_zero) {
ib_logf(IB_LOG_LEVEL_WARN,
"A doublewrite copy of page "
ULINTPF ":" ULINTPF " is corrupted.",
space_id, page_no);
}
/* Theoretically we could have another good
copy for this page in the doublewrite
buffer. If not, we will report a fatal error
for a corrupted page somewhere else if that
page was truly needed. */
continue;
}
/* Write the good page from the
doublewrite buffer to the intended
position */
/* Write the good page from the doublewrite buffer to
the intended position. */
fil_io(OS_FILE_WRITE,
true,
space_id,
zip_size,
page_no,
0,
fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page,
NULL,
0);
page, NULL, 0);
ib_logf(IB_LOG_LEVEL_INFO,
"Recovered the page from"
" the doublewrite buffer.");
} else if (buf_page_is_zeroes(read_buf, zip_size)) {
if (!buf_page_is_zeroes(page, zip_size)
&& !buf_page_is_corrupted(true, page,
zip_size)) {
/* Database page contained only
zeroes, while a valid copy is
available in dblwr buffer. */
fil_io(OS_FILE_WRITE,
true,
space_id,
zip_size,
page_no,
0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page,
NULL,
0);
}
}
}
"Recovered page " ULINTPF ":" ULINTPF " from"
" the doublewrite buffer.",
space_id, page_no);
}
ut_free(unaligned_read_buf);
......
......@@ -455,8 +455,11 @@ buf_dblwr_init_or_load_pages(
os_file_write(path, file, page,
source_page_no * UNIV_PAGE_SIZE,
UNIV_PAGE_SIZE);
} else if (load_corrupt_pages) {
} else if (load_corrupt_pages
&& !buf_page_is_zeroes(page, FIL_PAGE_DATA)) {
/* Each valid page header must contain some
nonzero bytes, such as FIL_PAGE_OFFSET
or FIL_PAGE_LSN. */
recv_dblwr.add(page);
}
......@@ -492,8 +495,6 @@ buf_dblwr_process()
for (std::list<byte*>::iterator i = recv_dblwr.pages.begin();
i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) {
bool is_compressed = false;
page = *i;
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID);
......@@ -501,18 +502,20 @@ buf_dblwr_process()
if (!fil_tablespace_exists_in_mem(space_id)) {
/* Maybe we have dropped the single-table tablespace
and this page once belonged to it: do nothing */
continue;
}
} else if (!fil_check_adress_in_tablespace(space_id,
page_no)) {
if (!fil_check_adress_in_tablespace(space_id, page_no)) {
ib_logf(IB_LOG_LEVEL_WARN,
"A page in the doublewrite buffer is not "
"within space bounds; space id %lu "
"page number %lu, page %lu in "
"doublewrite buf.",
(ulong) space_id, (ulong) page_no,
page_no_dblwr);
} else {
"A copy of page " ULINTPF ":" ULINTPF
" in the doublewrite buffer slot " ULINTPF
" is not within space bounds",
space_id, page_no, page_no_dblwr);
continue;
}
ulint zip_size = fil_space_get_zip_size(space_id);
ut_ad(!buf_page_is_zeroes(page, zip_size));
/* Read in the actual page from the file */
fil_io(OS_FILE_READ,
......@@ -526,113 +529,77 @@ buf_dblwr_process()
NULL,
0);
/* Is page compressed ? */
is_compressed = fil_page_is_compressed_encrypted(read_buf) |
fil_page_is_compressed(read_buf);
const bool is_all_zero = buf_page_is_zeroes(
read_buf, zip_size);
/* If page was compressed, decompress it before we
check checksum. */
if (is_compressed) {
fil_decompress_page(NULL, read_buf, UNIV_PAGE_SIZE, NULL, true);
if (is_all_zero) {
/* We will check if the copy in the
doublewrite buffer is valid. If not, we will
ignore this page (there should be redo log
records to initialize it). */
} else {
if (fil_page_is_compressed_encrypted(read_buf) ||
fil_page_is_compressed(read_buf)) {
/* Decompress the page before
validating the checksum. */
fil_decompress_page(
NULL, read_buf, UNIV_PAGE_SIZE,
NULL, true);
}
if (fil_space_verify_crypt_checksum(read_buf, zip_size)) {
/* page is encrypted and checksum is OK */
} else if (buf_page_is_corrupted(true, read_buf, zip_size)) {
if (fil_space_verify_crypt_checksum(
read_buf, zip_size)
|| !buf_page_is_corrupted(
true, read_buf, zip_size)) {
/* The page is good; there is no need
to consult the doublewrite buffer. */
continue;
}
fprintf(stderr,
"InnoDB: Database page"
" corruption or a failed\n"
"InnoDB: file read of"
" space %lu page %lu.\n"
"InnoDB: Trying to recover it from"
" the doublewrite buffer.\n",
(ulong) space_id, (ulong) page_no);
/* Is page compressed ? */
is_compressed = fil_page_is_compressed_encrypted(page) |
fil_page_is_compressed(page);
/* If page was compressed, decompress it before we
check checksum. */
if (is_compressed) {
fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, NULL, true);
/* We intentionally skip this message for
is_all_zero pages. */
ib_logf(IB_LOG_LEVEL_INFO,
"Trying to recover page " ULINTPF ":" ULINTPF
" from the doublewrite buffer.",
space_id, page_no);
}
if (fil_space_verify_crypt_checksum(page, zip_size)) {
/* the doublewrite buffer page is encrypted and OK */
} else if (buf_page_is_corrupted(true,
page,
zip_size)) {
fprintf(stderr,
"InnoDB: Dump of the page:\n");
buf_page_print(
read_buf, zip_size,
BUF_PAGE_PRINT_NO_CRASH);
fprintf(stderr,
"InnoDB: Dump of"
" corresponding page"
" in doublewrite buffer:\n");
buf_page_print(
page, zip_size,
BUF_PAGE_PRINT_NO_CRASH);
/* Next, validate the doublewrite page. */
if (fil_page_is_compressed_encrypted(page) ||
fil_page_is_compressed(page)) {
/* Decompress the page before
validating the checksum. */
fil_decompress_page(
NULL, page, UNIV_PAGE_SIZE, NULL, true);
}
fprintf(stderr,
"InnoDB: Also the page in the"
" doublewrite buffer"
" is corrupt.\n"
"InnoDB: Cannot continue"
" operation.\n"
"InnoDB: You can try to"
" recover the database"
" with the my.cnf\n"
"InnoDB: option:\n"
"InnoDB:"
" innodb_force_recovery=6\n");
ut_error;
if (!fil_space_verify_crypt_checksum(page, zip_size)
&& buf_page_is_corrupted(true, page, zip_size)) {
if (!is_all_zero) {
ib_logf(IB_LOG_LEVEL_WARN,
"A doublewrite copy of page "
ULINTPF ":" ULINTPF " is corrupted.",
space_id, page_no);
}
/* Theoretically we could have another good
copy for this page in the doublewrite
buffer. If not, we will report a fatal error
for a corrupted page somewhere else if that
page was truly needed. */
continue;
}
/* Write the good page from the
doublewrite buffer to the intended
position */
/* Write the good page from the doublewrite buffer to
the intended position. */
fil_io(OS_FILE_WRITE,
true,
space_id,
zip_size,
page_no,
0,
fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page,
NULL,
0);
page, NULL, 0);
ib_logf(IB_LOG_LEVEL_INFO,
"Recovered the page from"
" the doublewrite buffer.");
} else if (buf_page_is_zeroes(read_buf, zip_size)) {
if (!buf_page_is_zeroes(page, zip_size)
&& !buf_page_is_corrupted(true, page,
zip_size)) {
/* Database page contained only
zeroes, while a valid copy is
available in dblwr buffer. */
fil_io(OS_FILE_WRITE,
true,
space_id,
zip_size,
page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page,
NULL,
0);
}
}
}
"Recovered page " ULINTPF ":" ULINTPF " from"
" the doublewrite buffer.",
space_id, page_no);
}
ut_free(unaligned_read_buf);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment