Commit 81258f14 authored by Monty's avatar Monty

MDEV-17913 Encrypted transactional Aria tables remain corrupt after crash...

MDEV-17913 Encrypted transactional Aria tables remain corrupt after crash recovery, automatic repairment does not work

This was because of a wrong test in encryption code that wrote random
numbers over the LSN for pages for transactional Aria tables during repair.
The effect was that after an ALTER TABLE ENABLE KEYS of a encrypted
recovery of the tables would not work.

Fixed by changing testing of !share->now_transactional to
!share->base.born_transactional.

Other things:
- Extended Aria check_table() to check for wrong (= too big) LSN numbers.
- If check_table() failed just because of wrong LSN or TRN numbers,
  a following repair table will just do a zerofill which is much faster.
- Limit number of LSN errors in one check table to MAX_LSN_ERROR (10).
- Removed old obsolete test of 'if (error_count & 2)'. Changed error_count
  and warning_count from bits to numbers of errors/warnings as this is
  more useful.
parent 7f75acc0
......@@ -70,7 +70,7 @@ typedef struct st_handler_check_param
*/
ulonglong unique_count[HA_MAX_KEY_SEG + 1];
ulonglong notnull_count[HA_MAX_KEY_SEG + 1];
ulonglong max_allowed_lsn;
my_off_t search_after_block;
my_off_t new_file_pos, key_file_blocks;
my_off_t keydata, totaldata, key_blocks, start_check_pos;
......@@ -96,6 +96,7 @@ typedef struct st_handler_check_param
uint out_flag, error_printed, verbose;
uint opt_sort_key, total_files, max_level;
uint key_cache_block_size, pagecache_block_size;
uint skip_lsn_error_count;
int tmpfile_createflag, err_count;
myf myf_rw;
uint16 language;
......
......@@ -2,4 +2,12 @@ set global aria_encrypt_tables = 1;
create table t1 (i int, key(i)) engine=aria;
insert into t1 values (1);
drop table t1;
create table t1 (a int primary key, b int, c int, key(b),key(c)) engine=aria;
alter table t1 disable keys;
insert into t1 select seq,seq,seq from seq_1_to_100;
alter table t1 enable keys;
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
set global aria_encrypt_tables = 0;
--source include/have_file_key_management_plugin.inc
--source include/have_sequence.inc
#
# MDEV-8022 Assertion `rc == 0' failed in ma_encrypt on dropping an encrypted Aria table
#
--source include/have_file_key_management_plugin.inc
set global aria_encrypt_tables = 1;
create table t1 (i int, key(i)) engine=aria;
insert into t1 values (1);
drop table t1;
set global aria_encrypt_tables = 0;
#
# MDEV-17913 Encrypted transactional Aria tables remain corrupt after crash
# recovery, automatic repair does not work
#
# We are using a simplifed version of the test here. This works thanks to
# the extended check table code that also checks if LSN's are reasonable.
#
create table t1 (a int primary key, b int, c int, key(b),key(c)) engine=aria;
alter table t1 disable keys;
insert into t1 select seq,seq,seq from seq_1_to_100;
alter table t1 enable keys;
check table t1;
drop table t1;
#
# Cleanup
#
set global aria_encrypt_tables = 0;
......@@ -38,14 +38,15 @@ create_rename_lsn has non-magic value
#
check table t2;
Table Op Msg_type Msg_text
mysqltest.t2 check error Table is from another system and must be zerofilled or repaired to be usable on this system
mysqltest.t2 check error Table is probably from another system and must be zerofilled or repaired ('REPAIR TABLE table_name') to be usable on this system
mysqltest.t2 check error Corrupt
check table t2;
Table Op Msg_type Msg_text
mysqltest.t2 check error Table is from another system and must be zerofilled or repaired to be usable on this system
mysqltest.t2 check error Table is probably from another system and must be zerofilled or repaired ('REPAIR TABLE table_name') to be usable on this system
mysqltest.t2 check error Corrupt
repair table t2;
Table Op Msg_type Msg_text
mysqltest.t2 repair info Running zerofill on moved table
mysqltest.t2 repair status OK
check table t2;
Table Op Msg_type Msg_text
......@@ -61,6 +62,7 @@ mysqltest.t4 analyze Note Zerofilling moved table ./mysqltest/t4
mysqltest.t4 analyze status OK
repair table t5;
Table Op Msg_type Msg_text
mysqltest.t5 repair info Running zerofill on moved table
mysqltest.t5 repair status OK
check table t5;
Table Op Msg_type Msg_text
......
......@@ -71,7 +71,7 @@ perl;
my $fname= "$ENV{'MYSQLTEST_VARDIR'}/tmp/autozerofill.txt";
open(FILE, "<", $fname) or die;
my @content= <FILE>;
print grep(/Status:.*zerofilled/, @content);
print grep(/Status:.*/, @content);
print "create_rename_lsn has magic value\n" if grep(/create_rename \(0,0x2\)/, @content);
close FILE;
EOF
......
......@@ -37,6 +37,7 @@ static MY_TMPDIR maria_chk_tmpdir;
static my_bool opt_transaction_logging, opt_debug;
static my_bool opt_ignore_control_file, opt_require_control_file;
static my_bool opt_warning_for_wrong_transid, opt_update_state;
static my_bool have_control_file= 0;
static const char *type_names[]=
{
......@@ -144,15 +145,19 @@ int main(int argc, char **argv)
{
if ((ma_control_file_open(FALSE, opt_require_control_file ||
!(check_param.testflag & T_SILENT),
TRUE) &&
(opt_require_control_file ||
(opt_transaction_logging && (check_param.testflag & T_REP_ANY)))))
TRUE)))
{
error= 1;
goto end;
if (opt_require_control_file ||
(opt_transaction_logging && (check_param.testflag & T_REP_ANY)))
{
error= 1;
goto end;
}
}
else
have_control_file= 1;
}
else
if (!have_control_file)
opt_warning_for_wrong_transid= 0;
/*
......@@ -456,7 +461,7 @@ static struct my_option my_long_options[] =
(char**) &maria_stats_method_str, (char**) &maria_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{ "zerofill", 'z',
"Fill empty space in data and index files with zeroes. This makes the data file movable between different servers.",
"Fill empty space in data and index files with zeroes. This makes the data file movable between different servers. It also fixes any wrong transaction or LSN numbers in the table after a crash or if someone removed the Aria log files.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{ "zerofill-keep-lsn", OPT_ZEROFILL_KEEP_LSN,
"Like --zerofill but does not zero out LSN of data/index pages;"
......@@ -468,7 +473,7 @@ static struct my_option my_long_options[] =
static void print_version(void)
{
printf("%s Ver 1.2 for %s on %s\n", my_progname, SYSTEM_TYPE,
printf("%s Ver 1.3 for %s on %s\n", my_progname, SYSTEM_TYPE,
MACHINE_TYPE);
}
......@@ -614,7 +619,9 @@ Recover (repair)/ options (When using '--recover' or '--safe-recover'):\n\
Find a record, a block at given offset belongs to.\n\
-z, --zerofill Fill empty space in data and index files with zeroes.\n\
This makes the data file movable between different \n\
servers.\n\
servers. It also fixes any wrong transaction or LSN\n\
numbers in the table after a crash or if someone\n\
removed the Aria log files.\n\
--zerofill-keep-lsn Like --zerofill but does not zero out LSN of\n\
data/index pages.");
......@@ -1034,7 +1041,7 @@ static int maria_chk(HA_CHECK *param, char *filename)
0)))
{
/* Avoid twice printing of isam file name */
param->error_printed=1;
param->error_printed++;
switch (my_errno) {
case HA_ERR_CRASHED:
_ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename);
......@@ -1506,8 +1513,7 @@ static int maria_chk(HA_CHECK *param, char *filename)
"the --force (-f) option or by not using the --quick (-q) "
"flag\n");
}
else if (!(param->error_printed & 2) &&
!(param->testflag & T_FORCE_CREATE))
else if (!(param->testflag & T_FORCE_CREATE))
fprintf(stderr, "Aria table '%s' is corrupted\nFix it using switch "
"\"-r\" or \"-o\"\n", filename);
}
......@@ -1592,6 +1598,10 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
if (share->state.changed & STATE_CRASHED)
strmov(buff, share->state.changed & STATE_CRASHED_ON_REPAIR ?
"crashed on repair" : "crashed");
else if (have_control_file &&
(share->state.changed & (STATE_MOVED | STATE_NOT_ZEROFILLED)) ==
(STATE_MOVED | STATE_NOT_ZEROFILLED))
strmov(buff, "moved from another system. Use --zerofill to fix it");
else
{
if (share->state.open_count)
......@@ -1610,6 +1620,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
pos=strmov(pos,"zerofilled,");
if (!(share->state.changed & STATE_NOT_MOVABLE))
pos=strmov(pos,"movable,");
if (have_control_file && (share->state.changed & STATE_MOVED))
pos=strmov(pos,"moved,");
pos[-1]=0; /* Remove extra ',' */
}
printf("Status: %s\n",buff);
......
......@@ -61,8 +61,7 @@ C_MODE_END
ulong pagecache_division_limit, pagecache_age_threshold, pagecache_file_hash_size;
ulonglong pagecache_buffer_size;
const char *zerofill_error_msg=
"Table is from another system and must be zerofilled or repaired to be "
"usable on this system";
"Table is probably from another system and must be zerofilled or repaired ('REPAIR TABLE table_name') to be usable on this system";
/**
As the auto-repair is initiated when opened from the SQL layer
......@@ -906,7 +905,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
{
va_list args;
DBUG_ENTER("_ma_check_print_error");
param->error_printed |= 1;
param->error_printed++;
param->out_flag |= O_DATA_LOST;
if (param->testflag & T_SUPPRESS_ERR_HANDLING)
DBUG_VOID_RETURN;
......@@ -932,7 +931,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
{
va_list args;
DBUG_ENTER("_ma_check_print_warning");
param->warning_printed= 1;
param->warning_printed++;
param->out_flag |= O_DATA_LOST;
va_start(args, fmt);
_ma_check_print_msg(param, MA_CHECK_WARNING, fmt, args);
......@@ -1262,7 +1261,7 @@ int ha_maria::write_row(const uchar * buf)
int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
{
int error;
int error, fatal_error;
HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
MARIA_SHARE *share= file->s;
const char *old_proc_info;
......@@ -1294,6 +1293,7 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
return HA_ADMIN_ALREADY_DONE;
maria_chk_init_for_check(param, file);
param->max_allowed_lsn= translog_get_horizon();
if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
STATE_MOVED)
......@@ -1336,9 +1336,21 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
param->testflag= old_testflag;
}
}
if (!error)
fatal_error= error;
if (param->error_printed &&
param->error_printed == (param->skip_lsn_error_count +
param->not_visible_rows_found) &&
!(share->state.changed & (STATE_CRASHED_FLAGS | STATE_IN_REPAIR)))
{
_ma_check_print_error(param, "%s", zerofill_error_msg);
/* This ensures that a future REPAIR TABLE will only do a zerofill */
file->update|= STATE_MOVED;
share->state.changed|= STATE_MOVED;
fatal_error= 0;
}
if (!fatal_error)
{
if ((share->state.changed & (STATE_CHANGED |
if ((share->state.changed & (STATE_CHANGED | STATE_MOVED |
STATE_CRASHED_FLAGS |
STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
(param->testflag & T_STATISTICS) || maria_is_crashed(file))
......@@ -1349,9 +1361,13 @@ int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
STATE_IN_REPAIR);
if (!(table->db_stat & HA_READ_ONLY))
error= maria_update_state_info(param, file,
UPDATE_TIME | UPDATE_OPEN_COUNT |
UPDATE_STAT);
{
int tmp;
if ((tmp= maria_update_state_info(param, file,
UPDATE_TIME | UPDATE_OPEN_COUNT |
UPDATE_STAT)))
error= tmp;
}
mysql_mutex_unlock(&share->intern_lock);
info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
HA_STATUS_CONST);
......@@ -1443,6 +1459,20 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
maria_chk_init(param);
param->thd= thd;
param->op_name= "repair";
/*
The following can only be true if the table was marked as STATE_MOVED
during a CHECK TABLE and the table has not been used since then
*/
if ((file->s->state.changed & STATE_MOVED) &&
!(file->s->state.changed & STATE_CRASHED_FLAGS))
{
param->db_name= table->s->db.str;
param->table_name= table->alias.c_ptr();
_ma_check_print_info(param, "Running zerofill on moved table");
return zerofill(thd, check_opt);
}
param->testflag= ((check_opt->flags & ~(T_EXTEND)) |
T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
(check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
......@@ -1518,6 +1548,9 @@ int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
param->op_name= "zerofill";
param->testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
param->db_name= table->s->db.str;
param->table_name= table->alias.c_ptr();
error=maria_zerofill(param, file, share->open_file_name.str);
/* Reset trn, that may have been set by repair */
......
......@@ -124,6 +124,7 @@ void maria_chk_init(HA_CHECK *param)
param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
param->max_stage= 1;
param->stack_end_ptr= &my_thread_var->stack_ends_here;
param->max_allowed_lsn= (LSN) ~0ULL;
}
......@@ -903,15 +904,33 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
_ma_check_print_error(param, "Page at %s is not marked for index %u",
llstr(anc_page->pos, llbuff),
(uint) keyinfo->key_nr);
if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
!share->base.born_transactional)
if (page_flag & KEYPAGE_FLAG_HAS_TRANSID)
{
_ma_check_print_error(param,
"Page at %s is marked with HAS_TRANSID even if "
"table is not transactional",
llstr(anc_page->pos, llbuff));
if (!share->base.born_transactional)
{
_ma_check_print_error(param,
"Page at %s is marked with HAS_TRANSID even if "
"table is not transactional",
llstr(anc_page->pos, llbuff));
}
}
if (share->base.born_transactional)
{
LSN lsn= lsn_korr(anc_page->buff);
if ((ulonglong) lsn > param->max_allowed_lsn)
{
/* Avoid flooding of errors */
if (param->skip_lsn_error_count++ < MAX_LSN_ERRORS)
{
_ma_check_print_error(param,
"Page at %s as wrong LSN " LSN_FMT ". Current "
"LSN is " LSN_FMT,
llstr(anc_page->pos, llbuff),
LSN_IN_PARTS(lsn),
LSN_IN_PARTS(param->max_allowed_lsn));
}
}
}
if (anc_page->size > share->max_index_block_size)
{
_ma_check_print_error(param,
......@@ -1850,6 +1869,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
ha_rows full_page_count, tail_count;
my_bool UNINIT_VAR(full_dir), now_transactional;
uint offset_page, offset, free_count;
LSN lsn;
if (_ma_scan_init_block_record(info))
{
......@@ -1994,6 +2014,23 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
goto err;
}
if (share->base.born_transactional)
{
lsn= lsn_korr(page_buff);
if ((ulonglong) lsn > param->max_allowed_lsn)
{
/* Avoid flooding of errors */
if (param->skip_lsn_error_count++ < MAX_LSN_ERRORS)
{
_ma_check_print_error(param,
"Page %9s: Wrong LSN " LSN_FMT ". Current "
"LSN is " LSN_FMT,
llstr(page, llbuff),
LSN_IN_PARTS(lsn),
LSN_IN_PARTS(param->max_allowed_lsn));
}
}
}
if ((enum en_page_type) page_type == BLOB_PAGE)
continue;
param->empty+= empty_space;
......@@ -2778,7 +2815,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
{
param->testflag|=T_RETRY_WITHOUT_QUICK;
param->error_printed=1;
param->error_printed++;
goto err;
}
/* purecov: begin tested */
......@@ -5053,7 +5090,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
}
if (searching && ! sort_param->fix_datafile)
{
param->error_printed=1;
param->error_printed++;
param->retry_repair=1;
param->testflag|=T_RETRY_WITHOUT_QUICK;
my_errno= HA_ERR_WRONG_IN_RECORD;
......@@ -5327,7 +5364,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
DBUG_RETURN(-1);
if (searching && ! sort_param->fix_datafile)
{
param->error_printed=1;
param->error_printed++;
param->retry_repair=1;
param->testflag|=T_RETRY_WITHOUT_QUICK;
my_errno= HA_ERR_WRONG_IN_RECORD;
......
......@@ -124,7 +124,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...)
param->isam_file_name);
param->out_flag|= O_DATA_LOST;
}
param->warning_printed=1;
param->warning_printed++;
va_start(args,fmt);
fprintf(stderr,"%s: warning: ",my_progname_short);
vfprintf(stderr, fmt, args);
......@@ -149,7 +149,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt,...)
fprintf(stderr,"%s: Aria file %s\n",my_progname_short,param->isam_file_name);
param->out_flag|= O_DATA_LOST;
}
param->error_printed|=1;
param->error_printed++;
va_start(args,fmt);
fprintf(stderr,"%s: error: ",my_progname_short);
vfprintf(stderr, fmt, args);
......
......@@ -268,7 +268,7 @@ static my_bool ma_crypt_data_pre_write_hook(PAGECACHE_IO_HOOK_ARGS *args)
return 1;
}
if (!share->now_transactional)
if (!share->base.born_transactional)
{
/* store a random number instead of LSN (for counter block) */
store_rand_lsn(args->page);
......@@ -392,7 +392,7 @@ static my_bool ma_crypt_index_pre_write_hook(PAGECACHE_IO_HOOK_ARGS *args)
return 1;
}
if (!share->now_transactional)
if (!share->base.born_transactional)
{
/* store a random number instead of LSN (for counter block) */
store_rand_lsn(args->page);
......
......@@ -7926,7 +7926,8 @@ void check_skipped_lsn(MARIA_HA *info, LSN lsn, my_bool index_file,
else
{
/* Give error, but don't flood the log */
if (skipped_lsn_err_count++ < 10 && ! info->s->redo_error_given++)
if (skipped_lsn_err_count++ < MAX_LSN_ERRORS &&
! info->s->redo_error_given++)
{
eprint(tracef, "Table %s has wrong LSN: " LSN_FMT " on page: %llu",
(index_file ? info->s->data_file_name.str :
......
......@@ -109,4 +109,7 @@ typedef LSN LSN_WITH_FLAGS;
*/
#define LSN_MAX (LSN)0x00FFFFFFFFFFFFFFULL
/* Max LSN error to print on check or recovery */
#define MAX_LSN_ERRORS 10
#endif
......@@ -565,13 +565,16 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags,
!maria_in_recovery) ||
((share->state.changed & STATE_NOT_MOVABLE) &&
((!(open_flags & HA_OPEN_IGNORE_MOVED_STATE) &&
memcmp(share->base.uuid, maria_uuid, MY_UUID_SIZE))))))
memcmp(share->base.uuid, maria_uuid, MY_UUID_SIZE)))) ||
((share->state.changed & (STATE_MOVED | STATE_NOT_ZEROFILLED)) ==
(STATE_MOVED | STATE_NOT_ZEROFILLED))))
{
DBUG_PRINT("warning", ("table is moved from another system. uuid_diff: %d create_trid: %lu max_trid: %lu",
DBUG_PRINT("warning", ("table is moved from another system. uuid_diff: %d create_trid: %lu max_trid: %lu moved: &d",
memcmp(share->base.uuid, maria_uuid,
MY_UUID_SIZE) != 0,
(ulong) share->state.create_trid,
(ulong) trnman_get_max_trid()));
(ulong) trnman_get_max_trid(),
MY_TEST((share->state.changed & STATE_MOVED))));
if (open_flags & HA_OPEN_FOR_REPAIR)
share->state.changed|= STATE_MOVED;
else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment