Commit a1245a77 authored by Guilhem Bichot's avatar Guilhem Bichot

Fix for BUG#42180 "Maria: maria-recovery-bitmap.test fails repeatedly"

storage/maria/ma_loghandler.c:
  Normally, when we log LOGREC_LONG_TRANSACTION_ID, undo_lsn should be 0: assert this.
storage/maria/ma_test_force_start.pl:
  this script does not work with mtr2, make it use mtr1
storage/maria/trnman.c:
  The bug's cause was that: transaction would log LOGREC_LONG_TRANSACTION_ID, then Checkpoint would run
  and skip it (because its undo_lsn is still 0), then transaction would log REDO+UNDO, then crash. At Recovery,
  REDO phase would start from Checkpoint's record LSN, so wouldn't see LOGREC_LONG_TRANSACTION_ID, and
  as Checkpoint record does not mention transaction, transaction would be unknown, so its REDO+UNDO would
  be thrown away (assumed to belong to a transaction committed long ago as unknown), so transaction
  would not be rolled back, which is wrong. Fix is: it was wrong to skip a transaction if undo_lsn is 0;
  as soon as LOGREC_LONG_TRANSACTION_ID has been logged, it becomes potentially invisible to the REDO
  phase, and so we must include this transaction in the checkpoint record.
parent efa0a324
Branches unavailable
Tags unavailable
No related merge requests found
......@@ -6147,6 +6147,7 @@ my_bool translog_write_record(LSN *lsn,
LSN dummy_lsn;
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
uchar log_data[6];
DBUG_ASSERT(trn->undo_lsn == LSN_IMPOSSIBLE);
int6store(log_data, trn->trid);
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
......
......@@ -42,7 +42,8 @@ my $sql_name= "./var/tmp/create_table.sql";
my $error_log_name= "./var/log/master.err";
my @cmd_output;
my $whatever; # garbage data
my $base_server_cmd= "perl mysql-test-run.pl --mysqld=--maria-force-start-after-recovery-failures=$force_after maria-recover ";
$ENV{MTR_VERSION} = 1; # MTR2 does not have --start-and-exit
my $base_server_cmd= "perl mysql-test-run.pl --mysqld=--maria-force-start-after-recovery-failures=$force_after --suite=maria maria.maria-recover ";
if ($^O =~ /^mswin/i)
{
print <<EOF;
......
......@@ -712,11 +712,6 @@ my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
ptr+= TRANSID_SIZE;
for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
{
/*
trns with a short trid of 0 are not even initialized, we can ignore
them. trns with undo_lsn==0 have done no writes, we can ignore them
too. XID not needed now.
*/
uint sid;
LSN rec_lsn, undo_lsn, first_undo_lsn;
pthread_mutex_lock(&trn->state_lock);
......@@ -732,16 +727,24 @@ my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
*/
continue;
}
/* needed for low-water mark calculation */
/* needed for low-water mark calculation */
if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) &&
(cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0))
minimum_rec_lsn= rec_lsn;
/*
trn may have logged REDOs but not yet UNDO, that's why we read rec_lsn
before deciding to ignore if undo_lsn==0.
If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be
discovered when seeing that log record which is for sure located after
checkpoint_start_log_horizon.
*/
if ((undo_lsn= trn->undo_lsn) == 0) /* trn can be forgotten */
if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) &
TRANSACTION_LOGGED_LONG_ID) == 0)
continue;
/*
On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log
records; so we must include trn in the checkpoint now, because we cannot
count on LOGREC_LONG_TRANSACTION_ID (as we are already past it).
*/
undo_lsn= trn->undo_lsn;
stored_transactions++;
int2store(ptr, sid);
ptr+= 2;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment