Commit a1c23807 authored by Monty's avatar Monty

MENT-328 Retry BACKUP STAGE BLOCK DDL in case of deadlocks

MENT-328 wrongly assumed that the backup failed because of warnings from
mariabackup about not found files. This is normal (and the error message
should be deleted).

randgen failed because mariabackup didn't retry BACKUP STAGE BLOCK DDL
if it failed with a deadlock.

To simplify things, I implemented the retry loop in the server as
this particular deadlock should be quickly resolved.
parent 0ec27d7b
...@@ -39,6 +39,28 @@ MDL_INTENTION_EXCLUSIVE Schema metadata lock test ...@@ -39,6 +39,28 @@ MDL_INTENTION_EXCLUSIVE Schema metadata lock test
select * from t1; select * from t1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
backup unlock; backup unlock;
connection con1;
connection default;
#
# Check that BACKUP LOCK blocks some operations
#
create sequence seq1;
create sequence seq2;
backup lock seq1;
connection con1;
CREATE OR REPLACE SEQUENCE seq1 START -28;
ERROR HY000: Sequence 'test.seq1' values are conflicting
SET STATEMENT max_statement_time=10 FOR CREATE OR REPLACE SEQUENCE seq1 START 50;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 NOMAXVALUE;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 MAXVALUE 1000;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
SET STATEMENT max_statement_time=10 for rename table seq2 to seq3, seq3 to seq1;
ERROR 70100: Query execution was interrupted (max_statement_time exceeded)
connection default;
backup unlock;
drop table seq1,seq2;
# #
# BACKUP LOCK and BACKUP UNLOCK are not allowed in procedures. # BACKUP LOCK and BACKUP UNLOCK are not allowed in procedures.
# #
...@@ -141,7 +163,6 @@ ERROR HY000: Can't execute the given command because you have active locked tabl ...@@ -141,7 +163,6 @@ ERROR HY000: Can't execute the given command because you have active locked tabl
SET STATEMENT max_statement_time=180 FOR BACKUP LOCK test.u; SET STATEMENT max_statement_time=180 FOR BACKUP LOCK test.u;
# restart # restart
# #
connection con1;
connection default; connection default;
disconnect con1; disconnect con1;
show tables; show tables;
......
...@@ -43,10 +43,39 @@ SELECT LOCK_MODE, LOCK_TYPE, TABLE_SCHEMA, TABLE_NAME FROM information_schema.me ...@@ -43,10 +43,39 @@ SELECT LOCK_MODE, LOCK_TYPE, TABLE_SCHEMA, TABLE_NAME FROM information_schema.me
--error ER_LOCK_DEADLOCK --error ER_LOCK_DEADLOCK
select * from t1; select * from t1;
backup unlock; backup unlock;
connection con1;
--reap
connection default;
--echo #
--echo # Check that BACKUP LOCK blocks some operations
--echo #
# These test has to be done with timeouts as we want to ensure that the tables
# doesn't change
create sequence seq1;
create sequence seq2;
backup lock seq1;
connection con1;
--error ER_SEQUENCE_INVALID_DATA
CREATE OR REPLACE SEQUENCE seq1 START -28;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 FOR CREATE OR REPLACE SEQUENCE seq1 START 50;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 NOMAXVALUE;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 FOR ALTER SEQUENCE IF EXISTS seq1 MAXVALUE 1000;
--error ER_STATEMENT_TIMEOUT
SET STATEMENT max_statement_time=10 for rename table seq2 to seq3, seq3 to seq1;
connection default;
backup unlock;
drop table seq1,seq2;
--echo # --echo #
--echo # BACKUP LOCK and BACKUP UNLOCK are not allowed in procedures. --echo # BACKUP LOCK and BACKUP UNLOCK are not allowed in procedures.
--echo # --echo #
delimiter |; delimiter |;
--error ER_SP_BADSTATEMENT --error ER_SP_BADSTATEMENT
CREATE PROCEDURE p_BACKUP_LOCK() CREATE PROCEDURE p_BACKUP_LOCK()
...@@ -162,8 +191,6 @@ SET STATEMENT max_statement_time=180 FOR BACKUP LOCK test.u; ...@@ -162,8 +191,6 @@ SET STATEMENT max_statement_time=180 FOR BACKUP LOCK test.u;
--echo # --echo #
connection con1;
--reap
connection default; connection default;
disconnect con1; disconnect con1;
show tables; show tables;
......
...@@ -233,8 +233,12 @@ static bool backup_flush(THD *thd) ...@@ -233,8 +233,12 @@ static bool backup_flush(THD *thd)
This will probably require a callback from the InnoDB code. This will probably require a callback from the InnoDB code.
*/ */
/* Retry to get inital lock for 0.1 + 0.5 + 2.25 + 11.25 + 56.25 = 70.35 sec */
#define MAX_RETRY_COUNT 5
static bool backup_block_ddl(THD *thd) static bool backup_block_ddl(THD *thd)
{ {
uint sleep_time;
DBUG_ENTER("backup_block_ddl"); DBUG_ENTER("backup_block_ddl");
kill_delayed_threads(); kill_delayed_threads();
...@@ -275,10 +279,21 @@ static bool backup_block_ddl(THD *thd) ...@@ -275,10 +279,21 @@ static bool backup_block_ddl(THD *thd)
block new DDL's, in addition to all previous blocks block new DDL's, in addition to all previous blocks
We didn't do this lock above, as we wanted DDL's to be executed while We didn't do this lock above, as we wanted DDL's to be executed while
we wait for non transactional tables (which may take a while). we wait for non transactional tables (which may take a while).
We do this lock in a loop as we can get a deadlock if there are multi-object
ddl statements like
RENAME TABLE t1 TO t2, t3 TO t3
and the MDL happens in the middle of it.
*/ */
if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, sleep_time= 100; // Start with 0.1 seconds
for (uint i= 0 ; i <= MAX_RETRY_COUNT ; i++)
{
if (!thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
MDL_BACKUP_WAIT_DDL, MDL_BACKUP_WAIT_DDL,
thd->variables.lock_wait_timeout)) thd->variables.lock_wait_timeout))
break;
if (thd->get_stmt_da()->sql_errno() != ER_LOCK_DEADLOCK || thd->killed ||
i == MAX_RETRY_COUNT)
{ {
/* /*
Could be a timeout. Downgrade lock to what is was before this function Could be a timeout. Downgrade lock to what is was before this function
...@@ -287,6 +302,10 @@ static bool backup_block_ddl(THD *thd) ...@@ -287,6 +302,10 @@ static bool backup_block_ddl(THD *thd)
backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH); backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH);
DBUG_RETURN(1); DBUG_RETURN(1);
} }
thd->clear_error(); // Forget the DEADLOCK error
my_sleep(sleep_time);
sleep_time*= 5; // Wait a bit longer next time
}
DBUG_RETURN(0); DBUG_RETURN(0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment