Commit 7dde504a authored by Andrei's avatar Andrei

q# This is a combination of 2 commits.

MDEV-31503 ALTER SEQUENCE ends up in optimistic parallel slave binlog out-of-order

The OOO error still was possible even after MDEV-31077. This time
it occured through open_table() when the sequence table was not in
the table cache *and* the table was created before the last server
restart.
In such context a internal (read-only) transaction is committed
and it was not blocked from doing a wakeup() call to subsequent
transactions.

Fixed with extending suspend_subsequent_commits() effect for the entirety
of Sql_cmd_alter_sequence::execute().
An elaborated MDEV-31077 test proves the fixes of both failure scenarios.

Also the bug condition suggests a workaround to pre-SELECT sequence
tables before START SLAVE.
Reviewed-by: default avatarBrandon Nesterenko <brandon.nesterenko@mariadb.com>
parent 3f5cee8f
......@@ -47,11 +47,21 @@ Warnings:
Note 1255 Slave already has been stopped
RESET MASTER;
SET @@global.gtid_slave_pos="";
SET @@global.gtid_strict_mode=1;
connection master;
RESET MASTER;
CREATE TABLE ti (a INT) ENGINE=innodb;
CREATE SEQUENCE s2 ENGINE=innodb;
include/save_master_gtid.inc
connection slave;
include/start_slave.inc
include/sync_with_master_gtid.inc
include/stop_slave.inc
include/rpl_restart_server.inc [server_number=2]
SET @@global.slave_parallel_threads=2;
SET @@global.slave_parallel_mode=optimistic;
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
SET @@global.gtid_strict_mode=1;
connection master;
SET @@gtid_seq_no=100;
ALTER SEQUENCE s2 restart with 1;
INSERT INTO ti SET a=1;
......@@ -60,6 +70,7 @@ SELECT @@global.gtid_binlog_state "Master gtid state";
Master gtid state
0-1-101
connection slave;
SET STATEMENT sql_log_bin=0 FOR FLUSH TABLES;
include/start_slave.inc
SELECT @@global.gtid_binlog_state, @@global.gtid_slave_pos as "no 100,101 yet in both";
@@global.gtid_binlog_state no 100,101 yet in both
......
......@@ -77,15 +77,28 @@ SET DEBUG_SYNC = 'now SIGNAL continue_worker';
--source include/stop_slave.inc
RESET MASTER;
SET @@global.gtid_slave_pos="";
--let $slave_gtid_strict_mode=`select @@global.gtid_strict_mode`
SET @@global.gtid_strict_mode=1;
--connection master
RESET MASTER;
# Load from master
CREATE TABLE ti (a INT) ENGINE=innodb;
CREATE SEQUENCE s2 ENGINE=innodb;
--source include/save_master_gtid.inc
--connection slave
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
--let $rpl_server_number= 2
--source include/rpl_restart_server.inc
# upon restart
SET @@global.slave_parallel_threads=2;
SET @@global.slave_parallel_mode=optimistic;
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
--let $slave_gtid_strict_mode=`select @@global.gtid_strict_mode`
SET @@global.gtid_strict_mode=1;
--connection master
SET @@gtid_seq_no=100;
ALTER SEQUENCE s2 restart with 1;
INSERT INTO ti SET a=1;
......@@ -93,6 +106,10 @@ INSERT INTO ti SET a=1;
SELECT @@global.gtid_binlog_state "Master gtid state";
--connection slave
# The following FT complicates the opening table time with committing
# an internal transaction. The rest of the test also proves
# MDEV-31503 "branch" of the OOO error is fixed.
SET STATEMENT sql_log_bin=0 FOR FLUSH TABLES;
--source include/start_slave.inc
--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit"
......
......@@ -900,6 +900,20 @@ int SEQUENCE::set_value(TABLE *table, longlong next_val, ulonglong next_round,
DBUG_RETURN(error);
}
#if defined(HAVE_REPLICATION)
class wait_for_commit_raii
{
private:
THD *m_thd;
wait_for_commit *m_wfc;
public:
wait_for_commit_raii(THD* thd) :
m_thd(thd), m_wfc(thd->suspend_subsequent_commits())
{}
~wait_for_commit_raii() { m_thd->resume_subsequent_commits(m_wfc); }
};
#endif
bool Sql_cmd_alter_sequence::execute(THD *thd)
{
......@@ -912,7 +926,10 @@ bool Sql_cmd_alter_sequence::execute(THD *thd)
SEQUENCE *seq;
No_such_table_error_handler no_such_table_handler;
DBUG_ENTER("Sql_cmd_alter_sequence::execute");
#if defined(HAVE_REPLICATION)
/* No wakeup():s of subsequent commits is allowed in this function. */
wait_for_commit_raii suspend_wfc(thd);
#endif
if (check_access(thd, ALTER_ACL, first_table->db.str,
&first_table->grant.privilege,
......@@ -1010,19 +1027,15 @@ bool Sql_cmd_alter_sequence::execute(THD *thd)
else
table->file->print_error(error, MYF(0));
table->s->sequence->write_unlock(table);
{
wait_for_commit* suspended_wfc= thd->suspend_subsequent_commits();
if (trans_commit_stmt(thd))
error= 1;
if (trans_commit_implicit(thd))
error= 1;
thd->resume_subsequent_commits(suspended_wfc);
DBUG_EXECUTE_IF("hold_worker_on_schedule",
{
/* delay binlogging of a parent trx in rpl_parallel_seq */
my_sleep(100000);
});
}
if (trans_commit_stmt(thd))
error= 1;
if (trans_commit_implicit(thd))
error= 1;
DBUG_EXECUTE_IF("hold_worker_on_schedule",
{
/* delay binlogging of a parent trx in rpl_parallel_seq */
my_sleep(100000);
});
if (likely(!error))
error= write_bin_log(thd, 1, thd->query(), thd->query_length());
if (likely(!error))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment