Commit 27c24808 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

MDEV-15636 mariabackup --lock-ddl-per-table hangs if ALTER table is running

concurrently.

There is a deadlock between

C1 mariabackup's connection that holds MDL locks
C2 Online ALTER TABLE that wants to have MDL exclusively
   and tries to upgrade its mdl lock.
C3 another mariabackup's connection that does FLUSH TABLES (or FTWRL)

C3 waits waits for C2,  which waits for C1, which waits for C3,
thus the deadlock.


MDL locks cannot be released until FLUSH  succeeds, because
otherwise it would allow ALTER to sneak in, causing backup to abort and
breaking lock-ddl-per-table's promise.

The fix here workarounds the deadlock, by killing connections in
"Waiting for metadata lock" status (i.e ALTER). This killing continues
until FTWRL succeeds.

Killing connections is skipped in case --no-locks parameter
was  passed to backup, because there won't be a FLUSH.

For the reference,in Percona's xtrabackup --lock-ddl-per-connection
silently implies --no-lock ie FLUSH is always skipped there.

A rather large part of fix is introducing DBUG capability to start
a query  the new connection at the right moment of backup
compensating somewhat for mariabackup' lack of send_query or DBUG_SYNC.
parent a1d68faa
......@@ -1428,6 +1428,10 @@ void backup_release()
history_lock_time = time(NULL) - history_lock_time;
}
if (opt_lock_ddl_per_table) {
mdl_unlock_all();
}
if (opt_safe_slave_backup && sql_thread_started) {
msg("Starting slave SQL thread\n");
xb_mysql_query(mysql_connection,
......
......@@ -868,6 +868,76 @@ stop_query_killer()
os_event_wait_time(kill_query_thread_stopped, 60000);
}
/*
Killing connections that wait for MDL lock.
If lock-ddl-per-table is used, there can be some DDL statements
FLUSH TABLES would hang infinitely, if DDL statements are waiting for
MDL lock, which mariabackup currently holds. Therefore we start killing
those statements from a dedicated thread, until FLUSH TABLES WITH READ LOCK
succeeds.
*/
static os_event_t mdl_killer_stop_event;
static os_event_t mdl_killer_finished_event;
static
os_thread_ret_t
DECLARE_THREAD(kill_mdl_waiters_thread(void *))
{
MYSQL *mysql;
if ((mysql = xb_mysql_connect()) == NULL) {
msg("Error: kill mdl waiters thread failed to connect\n");
goto stop_thread;
}
for(;;){
if (os_event_wait_time(mdl_killer_stop_event, 1000) == 0)
break;
MYSQL_RES *result = xb_mysql_query(mysql,
"SELECT ID, COMMAND FROM INFORMATION_SCHEMA.PROCESSLIST "
" WHERE State='Waiting for table metadata lock'",
true, true);
while (MYSQL_ROW row = mysql_fetch_row(result))
{
char query[64];
msg_ts("Killing MDL waiting query '%s' on connection '%s'\n",
row[1], row[0]);
snprintf(query, sizeof(query), "KILL QUERY %s", row[0]);
xb_mysql_query(mysql, query, true);
}
}
mysql_close(mysql);
stop_thread:
msg_ts("Kill mdl waiters thread stopped\n");
os_event_set(mdl_killer_finished_event);
os_thread_exit();
return os_thread_ret_t(0);
}
static void start_mdl_waiters_killer()
{
mdl_killer_stop_event = os_event_create(0);
mdl_killer_finished_event = os_event_create(0);
os_thread_create(kill_mdl_waiters_thread, 0, 0);
}
/* Tell MDL killer to stop and finish for its completion*/
static void stop_mdl_waiters_killer()
{
os_event_set(mdl_killer_stop_event);
os_event_wait(mdl_killer_finished_event);
os_event_destroy(mdl_killer_stop_event);
os_event_destroy(mdl_killer_finished_event);
}
/*********************************************************************//**
Function acquires either a backup tables lock, if supported
by the server, or a global read lock (FLUSH TABLES WITH READ LOCK)
......@@ -890,6 +960,10 @@ lock_tables(MYSQL *connection)
return(true);
}
if (opt_lock_ddl_per_table) {
start_mdl_waiters_killer();
}
if (!opt_lock_wait_timeout && !opt_kill_long_queries_timeout) {
/* We do first a FLUSH TABLES. If a long update is running, the
......@@ -930,6 +1004,10 @@ lock_tables(MYSQL *connection)
xb_mysql_query(connection, "FLUSH TABLES WITH READ LOCK", false);
if (opt_lock_ddl_per_table) {
stop_mdl_waiters_killer();
}
if (opt_kill_long_queries_timeout) {
stop_query_killer();
}
......@@ -1647,25 +1725,6 @@ mdl_lock_init()
}
}
#ifndef DBUG_OFF
/* Test that table is really locked, if lock_ddl_per_table is set.
The test is executed in DBUG_EXECUTE_IF block inside mdl_lock_table().
*/
static void check_mdl_lock_works(const char *table_name)
{
MYSQL *test_con= xb_mysql_connect();
char *query;
xb_a(asprintf(&query,
"SET STATEMENT max_statement_time=1 FOR ALTER TABLE %s"
" ADD COLUMN mdl_lock_column int", table_name));
int err = mysql_query(test_con, query);
DBUG_ASSERT(err);
int err_no = mysql_errno(test_con);
DBUG_ASSERT(err_no == ER_STATEMENT_TIMEOUT);
mysql_close(test_con);
free(query);
}
#endif
void
mdl_lock_table(ulint space_id)
{
......@@ -1681,13 +1740,10 @@ mdl_lock_table(ulint space_id)
while (MYSQL_ROW row = mysql_fetch_row(mysql_result)) {
std::string full_table_name = ut_get_name(0,row[0]);
std::ostringstream lock_query;
lock_query << "SELECT * FROM " << full_table_name << " LIMIT 0";
lock_query << "SELECT 1 FROM " << full_table_name << " LIMIT 0";
msg_ts("Locking MDL for %s\n", full_table_name.c_str());
xb_mysql_query(mdl_con, lock_query.str().c_str(), false, false);
DBUG_EXECUTE_IF("check_mdl_lock_works",
check_mdl_lock_works(full_table_name.c_str()););
}
pthread_mutex_unlock(&mdl_lock_con_mutex);
......
......@@ -434,6 +434,91 @@ datafiles_iter_free(datafiles_iter_t *it)
free(it);
}
#ifndef DBUG_OFF
struct dbug_thread_param_t
{
MYSQL *con;
const char *query;
int expect_err;
int expect_errno;
os_event_t done_event;
};
/* Thread procedure used in dbug_start_query_thread. */
extern "C"
os_thread_ret_t
DECLARE_THREAD(dbug_execute_in_new_connection)(void *arg)
{
mysql_thread_init();
dbug_thread_param_t *par= (dbug_thread_param_t *)arg;
int err = mysql_query(par->con, par->query);
int err_no = mysql_errno(par->con);
DBUG_ASSERT(par->expect_err == err);
if (err && par->expect_errno)
DBUG_ASSERT(err_no == par->expect_errno);
mysql_close(par->con);
mysql_thread_end();
os_event_t done = par->done_event;
delete par;
os_event_set(done);
os_thread_exit();
return os_thread_ret_t(0);
}
/*
Execute query from a new connection, in own thread.
@param query - query to be executed
@param wait_state - if not NULL, wait until query from new connection
reaches this state (value of column State in I_S.PROCESSLIST)
@param expected_err - if 0, query is supposed to finish successfully,
otherwise query should return error.
@param expected_errno - if not 0, and query finished with error,
expected mysql_errno()
*/
static os_event_t dbug_start_query_thread(
const char *query,
const char *wait_state,
int expected_err,
int expected_errno)
{
dbug_thread_param_t *par = new dbug_thread_param_t;
par->query = query;
par->expect_err = expected_err;
par->expect_errno = expected_errno;
par->done_event = os_event_create(0);
par->con = xb_mysql_connect();
os_thread_create(dbug_execute_in_new_connection, par, 0);
if (!wait_state)
return par->done_event;
char q[256];
snprintf(q, sizeof(q),
"SELECT 1 FROM INFORMATION_SCHEMA.PROCESSLIST where ID=%lu"
" AND Command='Query' AND State='%s'",
mysql_thread_id(par->con), wait_state);
for (;;) {
MYSQL_RES *result = xb_mysql_query(mysql_connection,q, true, true);
while (MYSQL_ROW row = mysql_fetch_row(result)) {
goto end;
}
msg_ts("Waiting for query '%s' on connection %lu to "
" reach state '%s'", query, mysql_thread_id(par->con),
wait_state);
my_sleep(1000);
}
end:
msg_ts("query '%s' on connection %lu reached state '%s'", query,
mysql_thread_id(par->con), wait_state);
return par->done_event;
}
os_event_t dbug_alter_thread_done;
#endif
void mdl_lock_all()
{
mdl_lock_init();
......@@ -449,6 +534,11 @@ void mdl_lock_all()
mdl_lock_table(node->space->id);
}
datafiles_iter_free(it);
DBUG_EXECUTE_IF("check_mdl_lock_works",
dbug_alter_thread_done =
dbug_start_query_thread("ALTER TABLE test.t ADD COLUMN mdl_lock_column int",
"Waiting for table metadata lock",1, ER_QUERY_INTERRUPTED););
}
/** Check if the space id belongs to the table which name should
......@@ -4078,6 +4168,11 @@ xtrabackup_backup_func()
backup_release();
DBUG_EXECUTE_IF("check_mdl_lock_works",
os_event_wait(dbug_alter_thread_done);
os_event_destroy(dbug_alter_thread_done);
);
if (ok) {
backup_finish();
}
......@@ -4087,10 +4182,6 @@ xtrabackup_backup_func()
goto fail;
}
if (opt_lock_ddl_per_table) {
mdl_unlock_all();
}
xtrabackup_destroy_datasinks();
msg("mariabackup: Redo log (from LSN " LSN_PF " to " LSN_PF
......
......@@ -110,6 +110,7 @@ extern my_bool opt_noversioncheck;
extern my_bool opt_no_backup_locks;
extern my_bool opt_decompress;
extern my_bool opt_remove_original;
extern my_bool opt_lock_ddl_per_table;
extern char *opt_incremental_history_name;
extern char *opt_incremental_history_uuid;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment