Commit 2a7dd644 authored by Andrei Elkin's avatar Andrei Elkin

MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file for recovery too eary

There was race between a committing transaction and the following in binlog
order FLUSH LOGS that could create a 2nd Binlog checkpoint (BCP) event
in the new file *before* the first logged-in-old-binlog transaction gets committed in
Innodb. That would cause the transaction loss at recovery, should
the server stop right after the BCP.

The race is tackled by enforcing the necessary set of mutexes to be acquired
by FLUSH-LOGS handler in the correct order (of the group commit leader
pattern).

Note, there remain two cases where a similar race is still possible:
  - the above race as it is when the server is run with ("unlikely")
    non-default `--binlog-optimize-thread-scheduling=0` (MDEV-24530), and
  - at unlikely event of bin-logging of Incident event (MDEV-24531) that
    also triggers binlog rotation,
    in both cases though with lesser chances after the current fixes.
parent ef2749c9
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
*** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
connect con1,localhost,root,,;
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (1, REPEAT("x", 1));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
FLUSH LOGS;
connect con2,localhost,root,,;
Trx_1 is not yet committed:
SELECT count(*) as 'ZERO' from t1;
ZERO
0
Wait for Trx_2 has rotated binlog:
SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
SET DEBUG_SYNC= "now SIGNAL default_go";
connection default;
Must be tree logs in the list:
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Gtid_list # # []
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Annotate_rows # # INSERT INTO t1 VALUES (1, REPEAT("x", 1))
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
Only one Binlog checkpoint must exist and point to master-bin.000001
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000002 # Gtid_list # # [#-#-#]
master-bin.000002 # Binlog_checkpoint # # master-bin.000001
SELECT count(*) as 'ONE' from t1;
ONE
1
connection default;
DROP TABLE t1;
SET debug_sync = 'reset';
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/have_binlog_format_row.inc
# References:
#
# MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file too eary
#
# The test for MDEV-24526 proves the fixes correct observed race condition
# between a commiting transaction and FLUSH-LOGS.
# The plot.
# Trx_1 (con1) transaction binlogs first
# to yield its turn acquiring LOCK_commit_ordered to Trx_2 and stand
# still waiting of a signal that will never arrive.
# Trx_2 can't acquire it in the fixed version even though
# Trx_3 makes sure Trx_2 has reached a post-rotation execution point
# to signal it to proceed.
# Then the server gets crashed and Trx_1 must recover unlike
# in the OLD buggy version.
#
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
--echo *** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
connect(con1,localhost,root,,); # Trx_1
# hang before doing acquiring Commit Ordered mutex
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
--send INSERT INTO t1 VALUES (1, REPEAT("x", 1))
connection default; # Trx_2
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
--send FLUSH LOGS
connect(con2,localhost,root,,); # Trx_3
--echo Trx_1 is not yet committed:
SELECT count(*) as 'ZERO' from t1;
--echo Wait for Trx_2 has rotated binlog:
SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
# Useless signal to prove Trx_2 cannot race Trx_1's commit
# even though Trx_1 never received the being waited 'con1_go'.
SET DEBUG_SYNC= "now SIGNAL default_go";
--let $shutdown_timeout=0
--source include/restart_mysqld.inc
connection default;
--enable_reconnect
--error 0,2013
--reap
--echo Must be tree logs in the list:
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000001
--let $binlog_start= 4
--source include/show_binlog_events.inc
--echo Only one Binlog checkpoint must exist and point to master-bin.000001
--let $binlog_file= master-bin.000002
--let $binlog_start= 4
--source include/show_binlog_events.inc
# In the buggy server version the following select may have
# resulted with ZERO:
SELECT count(*) as 'ONE' from t1;
# Clean up.
connection default;
DROP TABLE t1;
SET debug_sync = 'reset';
......@@ -6851,6 +6851,9 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
bool check_purge= false;
mysql_mutex_lock(&LOCK_log);
DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
prev_binlog_id= current_binlog_id;
if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
......@@ -6861,11 +6864,22 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
}
else if ((error= rotate(force_rotate, &check_purge)))
check_purge= false;
DEBUG_SYNC(current_thd, "rotate_after_rotate");
/*
NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
the mutex. Otherwise causes various deadlocks.
Explicit binlog rotation must be synchronized with a concurrent
binlog ordered commit, in particular not let binlog
checkpoint notification request until early binlogged
concurrent commits have has been completed.
*/
mysql_mutex_lock(&LOCK_after_binlog_sync);
mysql_mutex_unlock(&LOCK_log);
mysql_mutex_lock(&LOCK_commit_ordered);
mysql_mutex_unlock(&LOCK_after_binlog_sync);
mysql_mutex_unlock(&LOCK_commit_ordered);
if (check_purge)
checkpoint_and_purge(prev_binlog_id);
......@@ -8014,7 +8028,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
}
DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
mysql_mutex_lock(&LOCK_commit_ordered);
DBUG_EXECUTE_IF("crash_before_engine_commit",
{
DBUG_SUICIDE();
});
last_commit_pos_offset= commit_offset;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment