Commit 91bc9cc9 authored by Guilhem Bichot's avatar Guilhem Bichot

Fix for BUG#41037 "Maria: recovery failure (pushbuild2)" (checkpoint bug).

mysql-test/suite/maria/r/maria-recovery3.result:
  result.
mysql-test/suite/maria/t/maria-recovery3-master.opt:
  usual recovery test options
mysql-test/suite/maria/t/maria-recovery3.test:
  Test for BUG#41037. Without the bugfix, the test would hang because Recovery would fail with "undo_key_insert got error 126": Recovery would believe INSERT of 2 is uncommitted, try to execute its UNDO_KEY_INSERT, fail to find the key to delete because DELETE deleted it. That failure of _ma_ck_real_delete() would mark table as corrupted (see
  in d_search()) which is error 126 (HA_ERR_CRASHED).
storage/maria/ma_blockrec.c:
  set trn->rec_lsn to LSN of commit record, when writing it.
storage/maria/ma_blockrec.h:
  new function
storage/maria/ma_commit.c:
  when committing or rolling back, rec_lsn should be LSN_IMPOSSIBLE: assertion is moved here
  from trnman_end_trn(), because now ma_commit() can set rec_lsn and so when we reach trnman_end_trn()
  it's not LSN_IMPOSSIBLE anymore.
  Adding debug possibility to pause between COMMIT record write and trnman_commit_trn(), to be
  able to fire checkpoint in between.
storage/maria/ma_loghandler.c:
  in-write hook for COMMIT records
storage/maria/ma_recovery.c:
  More debugging info in maria_recovery.trace, to see how the starting point of REDO phase is determined
storage/maria/trnman.c:
  assertion cannot be here anymore see ma_commit.c
parent 44d3917f
set global maria_log_file_size=4294967295;
drop database if exists mysqltest;
create database mysqltest;
use mysqltest;
* shut down mysqld, removed logs, restarted it
* TEST of Checkpoint between writing the commit log record and committing in trnman
create table t1(a int primary key) engine=maria;
insert into t1 values(1);
flush table t1;
* copied t1 for comparison
set session debug="+d,maria_sleep_in_commit";
insert into t1 values(2);
set global maria_checkpoint_interval=1000;
delete from t1 where a=2;
SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
* crashing mysqld intentionally
set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* recovery happens
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
Checksum-check
ok
use mysqltest;
drop table t1;
drop database mysqltest_for_comparison;
drop database mysqltest;
--skip-stack-trace --skip-core-file --loose-maria-log-dir-path=$MYSQLTEST_VARDIR/tmp
--source include/not_embedded.inc
# Don't test this under valgrind, memory leaks will occur as we crash
--source include/not_valgrind.inc
# Binary must be compiled with debug for crash to occur
--source include/have_debug.inc
--source include/have_maria.inc
set global maria_log_file_size=4294967295;
let $MARIA_LOG=../tmp;
--disable_warnings
drop database if exists mysqltest;
--enable_warnings
create database mysqltest;
let $mms_tname=t;
# Include scripts can perform SQL. For it to not influence the main test
# they use a separate connection. This way if they use a DDL it would
# not autocommit in the main test.
connect (admin, 127.0.0.1, root,,mysqltest,,);
--enable_reconnect
connection default;
use mysqltest;
--enable_reconnect
let $mms_tables=1;
let $mvr_restore_old_snapshot=0;
let $mms_compare_physically=0;
let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
let $mvr_crash_statement= set global maria_checkpoint_interval=1;
-- source include/maria_empty_logs.inc
# Test for BUG#41037 (recovery failure)
--echo * TEST of Checkpoint between writing the commit log record and committing in trnman
# we want recovery to use the tables as they were at time of crash
let $mvr_restore_old_snapshot=0;
# UNDO phase prevents physical comparison, normally,
# so we'll only use checksums to compare.
let $mms_compare_physically=0;
let $mvr_crash_statement= set global maria_checkpoint_interval=1;
create table t1(a int primary key) engine=maria;
insert into t1 values(1);
-- source include/maria_make_snapshot_for_comparison.inc
set session debug="+d,maria_sleep_in_commit";
send insert into t1 values(2);
sleep 1;
# Now the INSERT of 2 has written a commit record
# but not yet called trnman_commit(), so for checkpoint it's not
# committed.
connection admin;
set global maria_checkpoint_interval=1000; # force a checkpoint
connection default;
reap; # end of INSERT
delete from t1 where a=2;
# Bug was that: Recovery starts REDO scanning from too far: from
# Checkpoint record which says INSERT is not committed, so
# Recovery executes the INSERT's UNDO and finds no key to delete
# (as DELETE already deleted it), fails.
-- source include/maria_verify_recovery.inc
drop table t1;
# Note that even if machine is loaded and thus INSERT is committed
# before checkpoint happens, test should still pass (though it won't
# reproduce the conditions of the bug).
# clean up everything
let $mms_purpose=comparison;
eval drop database mysqltest_for_$mms_purpose;
drop database mysqltest;
...@@ -5914,7 +5914,7 @@ my_bool write_hook_for_file_id(enum translog_record_type type ...@@ -5914,7 +5914,7 @@ my_bool write_hook_for_file_id(enum translog_record_type type
TRN *trn TRN *trn
__attribute__ ((unused)), __attribute__ ((unused)),
MARIA_HA *tbl_info, MARIA_HA *tbl_info,
LSN *lsn __attribute__ ((unused)), LSN *lsn,
void *hook_arg void *hook_arg
__attribute__ ((unused))) __attribute__ ((unused)))
{ {
...@@ -5923,6 +5923,44 @@ my_bool write_hook_for_file_id(enum translog_record_type type ...@@ -5923,6 +5923,44 @@ my_bool write_hook_for_file_id(enum translog_record_type type
return 0; return 0;
} }
/**
Updates transaction's rec_lsn when committing.
A transaction writes its commit record before being committed in trnman, so
if Checkpoint happens just between the COMMIT record log write and the
commit in trnman, it will record that transaction is not committed. Assume
the transaction (trn1) did an INSERT; after the checkpoint, a second
transaction (trn2) does a DELETE of what trn1 has inserted. Then crash,
Checkpoint record says that trn1 was not committed, and REDO phase starts
from Checkpoint record's LSN. So it will not find the COMMIT record of
trn1, will want to roll back trn1, which will fail because the row/key
which it wants to delete does not exist anymore.
To avoid this, Checkpoint needs to know that the REDO phase must start
before this COMMIT record, so transaction sets its rec_lsn to the COMMIT's
record LSN, and as Checkpoint reads the transaction's rec_lsn, Checkpoint
will know.
@note so after commit trn->rec_lsn is a "commit LSN", which could be of
use later.
@return Operation status, always 0 (success)
*/
my_bool write_hook_for_commit(enum translog_record_type type
__attribute__ ((unused)),
TRN *trn,
MARIA_HA *tbl_info
__attribute__ ((unused)),
LSN *lsn,
void *hook_arg
__attribute__ ((unused)))
{
trn->rec_lsn= *lsn;
return 0;
}
/*************************************************************************** /***************************************************************************
Applying of REDO log records Applying of REDO log records
***************************************************************************/ ***************************************************************************/
......
...@@ -271,6 +271,9 @@ my_bool write_hook_for_undo_bulk_insert(enum translog_record_type type, ...@@ -271,6 +271,9 @@ my_bool write_hook_for_undo_bulk_insert(enum translog_record_type type,
my_bool write_hook_for_file_id(enum translog_record_type type, my_bool write_hook_for_file_id(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info, LSN *lsn, TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
void *hook_arg); void *hook_arg);
my_bool write_hook_for_commit(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
void *hook_arg);
void _ma_block_get_status(void* param, my_bool concurrent_insert); void _ma_block_get_status(void* param, my_bool concurrent_insert);
void _ma_block_update_status(void *param); void _ma_block_update_status(void *param);
void _ma_block_restore_status(void *param); void _ma_block_restore_status(void *param);
......
...@@ -33,6 +33,7 @@ int ma_commit(TRN *trn) ...@@ -33,6 +33,7 @@ int ma_commit(TRN *trn)
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS]; LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS];
DBUG_ENTER("ma_commit"); DBUG_ENTER("ma_commit");
DBUG_ASSERT(trn->rec_lsn == LSN_IMPOSSIBLE);
if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */ if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */
DBUG_RETURN(trnman_rollback_trn(trn)); DBUG_RETURN(trnman_rollback_trn(trn));
/* /*
...@@ -61,8 +62,14 @@ int ma_commit(TRN *trn) ...@@ -61,8 +62,14 @@ int ma_commit(TRN *trn)
trn, NULL, 0, trn, NULL, 0,
sizeof(log_array)/sizeof(log_array[0]), sizeof(log_array)/sizeof(log_array[0]),
log_array, NULL, NULL) | log_array, NULL, NULL) |
translog_flush(commit_lsn) | translog_flush(commit_lsn));
trnman_commit_trn(trn));
DBUG_EXECUTE_IF("maria_sleep_in_commit",
{
DBUG_PRINT("info", ("maria_sleep_in_commit"));
sleep(3);
});
res|= trnman_commit_trn(trn);
/* /*
......
...@@ -618,11 +618,11 @@ static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE= ...@@ -618,11 +618,11 @@ static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
static LOG_DESC INIT_LOGREC_COMMIT= static LOG_DESC INIT_LOGREC_COMMIT=
{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL, write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
NULL}; NULL};
static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE= static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1, {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
"commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL}; "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
static LOG_DESC INIT_LOGREC_CHECKPOINT= static LOG_DESC INIT_LOGREC_CHECKPOINT=
......
...@@ -555,8 +555,9 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, ...@@ -555,8 +555,9 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn,
char llbuf[22]; char llbuf[22];
all_active_trans[sid].long_trid= long_id; all_active_trans[sid].long_trid= long_id;
llstr(long_id, llbuf); llstr(long_id, llbuf);
tprint(tracef, "Transaction long_trid %s short_trid %u starts\n", tprint(tracef, "Transaction long_trid %s short_trid %u starts,"
llbuf, sid); " undo_lsn (%lu,0x%lx) first_undo_lsn (%lu,0x%lx)\n",
llbuf, sid, LSN_IN_PARTS(undo_lsn), LSN_IN_PARTS(first_undo_lsn));
all_active_trans[sid].undo_lsn= undo_lsn; all_active_trans[sid].undo_lsn= undo_lsn;
all_active_trans[sid].first_undo_lsn= first_undo_lsn; all_active_trans[sid].first_undo_lsn= first_undo_lsn;
set_if_bigger(max_long_trid, long_id); set_if_bigger(max_long_trid, long_id);
...@@ -2968,6 +2969,8 @@ static LSN parse_checkpoint_record(LSN lsn) ...@@ -2968,6 +2969,8 @@ static LSN parse_checkpoint_record(LSN lsn)
ptr= log_record_buffer.str; ptr= log_record_buffer.str;
start_address= lsn_korr(ptr); start_address= lsn_korr(ptr);
ptr+= LSN_STORE_SIZE; ptr+= LSN_STORE_SIZE;
tprint(tracef, "Checkpoint record has start_horizon at (%lu,0x%lx)\n",
LSN_IN_PARTS(start_address));
/* transactions */ /* transactions */
nb_active_transactions= uint2korr(ptr); nb_active_transactions= uint2korr(ptr);
...@@ -2983,6 +2986,9 @@ static LSN parse_checkpoint_record(LSN lsn) ...@@ -2983,6 +2986,9 @@ static LSN parse_checkpoint_record(LSN lsn)
line. It may make start_address slightly decrease (only by the time it line. It may make start_address slightly decrease (only by the time it
takes to write one or a few rows, roughly). takes to write one or a few rows, roughly).
*/ */
tprint(tracef, "Checkpoint record has min_rec_lsn of active transactions"
" at (%lu,0x%lx)\n",
LSN_IN_PARTS(minimum_rec_lsn_of_active_transactions));
set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions); set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions);
for (i= 0; i < nb_active_transactions; i++) for (i= 0; i < nb_active_transactions; i++)
...@@ -3086,6 +3092,8 @@ static LSN parse_checkpoint_record(LSN lsn) ...@@ -3086,6 +3092,8 @@ static LSN parse_checkpoint_record(LSN lsn)
*/ */
start_address= checkpoint_start= start_address= checkpoint_start=
translog_next_LSN(start_address, LSN_IMPOSSIBLE); translog_next_LSN(start_address, LSN_IMPOSSIBLE);
tprint(tracef, "Checkpoint record start_horizon now adjusted to"
" LSN (%lu,0x%lx)\n", LSN_IN_PARTS(start_address));
if (checkpoint_start == LSN_IMPOSSIBLE) if (checkpoint_start == LSN_IMPOSSIBLE)
{ {
/* /*
...@@ -3095,6 +3103,8 @@ static LSN parse_checkpoint_record(LSN lsn) ...@@ -3095,6 +3103,8 @@ static LSN parse_checkpoint_record(LSN lsn)
return LSN_ERROR; return LSN_ERROR;
} }
/* now, where the REDO phase should start reading log: */ /* now, where the REDO phase should start reading log: */
tprint(tracef, "Checkpoint has min_rec_lsn of dirty pages at"
" LSN (%lu,0x%lx)\n", LSN_IN_PARTS(minimum_rec_lsn_of_dirty_pages));
set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages); set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages);
DBUG_PRINT("info", DBUG_PRINT("info",
("checkpoint_start: (%lu,0x%lx) start_address: (%lu,0x%lx)", ("checkpoint_start: (%lu,0x%lx) start_address: (%lu,0x%lx)",
......
...@@ -389,7 +389,6 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit) ...@@ -389,7 +389,6 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit)
LF_PINS *pins= trn->pins; LF_PINS *pins= trn->pins;
DBUG_ENTER("trnman_end_trn"); DBUG_ENTER("trnman_end_trn");
DBUG_ASSERT(trn->rec_lsn == 0);
/* if a rollback, all UNDO records should have been executed */ /* if a rollback, all UNDO records should have been executed */
DBUG_ASSERT(commit || trn->undo_lsn == 0); DBUG_ASSERT(commit || trn->undo_lsn == 0);
DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list")); DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment