Commit cdf19cd6 authored by Brandon Nesterenko's avatar Brandon Nesterenko

MDEV-16091: Seconds_Behind_Master spikes to millions of seconds

The rpl.rpl_seconds_behind_master_spike test would sometimes
timeout or take a very long time to complete. This happened
because an MTR DEBUG_SYNC signal would be lost due to a
subsequent call to RESET. I.e., the slave SQL thread would
be paused due to the WAIT_FOR signal being lost, resulting in
either a failed test if the `select master_pos_wait` timeout
occurs first, or a very long run-time if the DBUG_SYNC timeout
occurs first.

The fix ensures that the MTR signal is processed by the slave
SQL thread before issuing the call to RESET

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
parent b557f263
...@@ -29,9 +29,10 @@ count(*)=1 ...@@ -29,9 +29,10 @@ count(*)=1
# not considered in Seconds_Behind_Master calculation # not considered in Seconds_Behind_Master calculation
connection slave1; connection slave1;
# Safely resume slave SQL thread # Safely resume slave SQL thread
SET @@global.debug_dbug=''; # Prove SQL thread is in state "debug sync point: now"
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
# Wait for SQL thread to continue into normal execution
SET DEBUG_SYNC='RESET'; SET DEBUG_SYNC='RESET';
connection master; connection master;
DROP TABLE t1; DROP TABLE t1;
......
...@@ -71,10 +71,24 @@ if(`select $sbm > $t_now - $t_master_events_logged + 1`) ...@@ -71,10 +71,24 @@ if(`select $sbm > $t_now - $t_master_events_logged + 1`)
} }
--echo # Safely resume slave SQL thread --echo # Safely resume slave SQL thread
SET @@global.debug_dbug='';
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; --let $dbug_wait_state="debug sync point: now"
--echo # Prove SQL thread is in state $dbug_wait_state
--let $wait_condition= SELECT STATE=$dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
--source include/wait_condition.inc
SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
# We have to wait for the SQL thread to acknowledge the sql_thread_continue
# signal. Otherwise the below RESET command can overwrite the signal before
# the SQL thread is notified to proceed, causing it to "permanently" become
# stuck awaiting the signal (until timeout is reached).
--echo # Wait for SQL thread to continue into normal execution
--let $wait_condition= SELECT STATE!= $dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
--source include/wait_condition.inc
# Reset last sql_thread_continue signal # Reset last sql_thread_continue signal
SET DEBUG_SYNC='RESET'; SET DEBUG_SYNC='RESET';
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment