Commit eb4458e9 authored by Brandon Nesterenko's avatar Brandon Nesterenko Committed by Brandon Nesterenko

MDEV-33465: an option to enable semisync recovery

The current semi-sync binlog fail-over recovery process uses
rpl_semi_sync_slave_enabled==TRUE as its condition to truncate a
primary server’s binlog, as it is anticipating the server to re-join
a replication topology as a replica. However, for servers configured
with both rpl_semi_sync_master_enabled=1 and
rpl_semi_sync_slave_enabled=1, if a primary is just re-started (i.e.
retaining its role as master), it can truncate its binlog to drop
transactions which its replica(s) has already received and executed.
If this happens, when the replica reconnects, its gtid_slave_pos can
be ahead of the recovered primary’s gtid_binlog_pos, resulting in an
error state where the replica’s state is ahead of the primary’s.

This patch changes the condition for semi-sync recovery to truncate
the binlog to instead use the configuration variable
--init-rpl-role, when set to SLAVE. This allows for both
rpl_semi_sync_master_enabled and rpl_semi_sync_slave_enabled to be
set for a primary that is restarted, and no transactions will be
lost, so long as --init-rpl-role is not set to SLAVE.

Reviewed By:
============
Sergei Golubchik <serg@mariadb.com>
parent e40d232a
...@@ -40,7 +40,7 @@ disconnect master1; ...@@ -40,7 +40,7 @@ disconnect master1;
disconnect master2; disconnect master2;
disconnect master3; disconnect master3;
disconnect master4; disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 # restart: --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-7/ in mysqld.1.err FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-7/ in mysqld.1.err
Pre-crash binlog file content: Pre-crash binlog file content:
include/show_binlog_events.inc include/show_binlog_events.inc
...@@ -104,7 +104,7 @@ disconnect master1; ...@@ -104,7 +104,7 @@ disconnect master1;
disconnect master2; disconnect master2;
disconnect master3; disconnect master3;
disconnect master4; disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 # restart: --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-11/ in mysqld.1.err FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-11/ in mysqld.1.err
Pre-crash binlog file content: Pre-crash binlog file content:
include/show_binlog_events.inc include/show_binlog_events.inc
...@@ -173,7 +173,7 @@ disconnect master1; ...@@ -173,7 +173,7 @@ disconnect master1;
disconnect master2; disconnect master2;
disconnect master3; disconnect master3;
disconnect master4; disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 # restart: --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-15/ in mysqld.1.err FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-15/ in mysqld.1.err
Pre-crash binlog file content: Pre-crash binlog file content:
include/show_binlog_events.inc include/show_binlog_events.inc
...@@ -248,7 +248,7 @@ disconnect master1; ...@@ -248,7 +248,7 @@ disconnect master1;
disconnect master2; disconnect master2;
disconnect master3; disconnect master3;
disconnect master4; disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 # restart: --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-21/ in mysqld.1.err FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-21/ in mysqld.1.err
Pre-crash binlog file content: Pre-crash binlog file content:
include/show_binlog_events.inc include/show_binlog_events.inc
......
...@@ -31,9 +31,9 @@ Log_name File_size ...@@ -31,9 +31,9 @@ Log_name File_size
master-bin.000001 # master-bin.000001 #
master-bin.000002 # master-bin.000002 #
master-bin.000003 # master-bin.000003 #
# restart the server with --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 # restart the server with --init-rpl-role=SLAVE --sync-binlog=1
# the server is restarted # the server is restarted
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 # restart: --init-rpl-role=SLAVE --sync-binlog=1
connection default; connection default;
# #
# *** Summary: 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3: # *** Summary: 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3:
...@@ -98,7 +98,7 @@ INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); ...@@ -98,7 +98,7 @@ INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
COMMIT; COMMIT;
connection default; connection default;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 # restart: --init-rpl-role=SLAVE --sync-binlog=1
connection default; connection default;
# #
# *** Summary: 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4: # *** Summary: 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4:
...@@ -155,9 +155,9 @@ Log_name File_size ...@@ -155,9 +155,9 @@ Log_name File_size
master-bin.000001 # master-bin.000001 #
master-bin.000002 # master-bin.000002 #
master-bin.000003 # master-bin.000003 #
# restart the server with --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 # restart the server with --init-rpl-role=SLAVE --sync-binlog=1
# the server is restarted # the server is restarted
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 # restart: --init-rpl-role=SLAVE --sync-binlog=1
connection default; connection default;
# #
# *** Summary: 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4: # *** Summary: 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4:
......
...@@ -42,7 +42,7 @@ connection default; ...@@ -42,7 +42,7 @@ connection default;
disconnect master1; disconnect master1;
disconnect master2; disconnect master2;
disconnect master3; disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 # restart: --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err
"One record should be present in table" "One record should be present in table"
SELECT * FROM ti; SELECT * FROM ti;
......
...@@ -42,7 +42,7 @@ SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx'; ...@@ -42,7 +42,7 @@ SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
# #
# Server restart # Server restart
# #
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 --let $restart_parameters= --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
--source include/start_mysqld.inc --source include/start_mysqld.inc
# Check error log for a successful truncate message. # Check error log for a successful truncate message.
......
...@@ -36,7 +36,7 @@ CREATE TABLE tm (f INT) ENGINE=Aria; ...@@ -36,7 +36,7 @@ CREATE TABLE tm (f INT) ENGINE=Aria;
# Using 'debug_sync' hold 'query1' execution after 'query1' is flushed and # Using 'debug_sync' hold 'query1' execution after 'query1' is flushed and
# synced to binary log but not yet committed. In an another connection hold # synced to binary log but not yet committed. In an another connection hold
# 'query2' execution after 'query2' is flushed and synced to binlog. # 'query2' execution after 'query2' is flushed and synced to binlog.
# Crash and restart server with --rpl-semi-sync-slave-enabled=1 # Crash and restart server with --init-rpl-role=SLAVE
# #
# During recovery of binary log 'query1' status is checked with InnoDB engine, # During recovery of binary log 'query1' status is checked with InnoDB engine,
# it will be in prepared but not yet commited. All transactions starting from # it will be in prepared but not yet commited. All transactions starting from
......
...@@ -28,7 +28,7 @@ CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; ...@@ -28,7 +28,7 @@ CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb;
# The transaction is killed along with the server after that. # The transaction is killed along with the server after that.
--let $shutdown_timeout=0 --let $shutdown_timeout=0
--let $debug_sync_action = "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal" --let $debug_sync_action = "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal"
--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --let $restart_parameters = --init-rpl-role=SLAVE --sync-binlog=1
--let $test_outcome= 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3 --let $test_outcome= 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3
--source binlog_truncate_multi_engine.inc --source binlog_truncate_multi_engine.inc
--echo Proof of the truncated binlog file is readable (two transactions must be seen): --echo Proof of the truncated binlog file is readable (two transactions must be seen):
...@@ -41,7 +41,7 @@ CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; ...@@ -41,7 +41,7 @@ CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb;
--let $debug_sync_action = "" --let $debug_sync_action = ""
# Both debug_sync and debug-dbug are required to make sure Engines remember the commit state # Both debug_sync and debug-dbug are required to make sure Engines remember the commit state
# debug_sync alone will not help. # debug_sync alone will not help.
--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --let $restart_parameters = --init-rpl-role=SLAVE --sync-binlog=1
--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4 --let $test_outcome= 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4
--source binlog_truncate_multi_engine.inc --source binlog_truncate_multi_engine.inc
...@@ -50,7 +50,7 @@ CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; ...@@ -50,7 +50,7 @@ CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb;
--let $debug_sync_action = "commit_after_run_commit_ordered SIGNAL con1_ready" --let $debug_sync_action = "commit_after_run_commit_ordered SIGNAL con1_ready"
# Hold off after both engines have committed. The server is shut down. # Hold off after both engines have committed. The server is shut down.
--let $shutdown_timeout= --let $shutdown_timeout=
--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --let $restart_parameters = --init-rpl-role=SLAVE --sync-binlog=1
--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4 --let $test_outcome= 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4
--source binlog_truncate_multi_engine.inc --source binlog_truncate_multi_engine.inc
......
...@@ -63,7 +63,7 @@ SELECT @@global.gtid_binlog_state; ...@@ -63,7 +63,7 @@ SELECT @@global.gtid_binlog_state;
# #
# Server restart # Server restart
# #
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3 --let $restart_parameters= --init-rpl-role=SLAVE --sync-binlog=1 --log-warnings=3
--source include/start_mysqld.inc --source include/start_mysqld.inc
# Check error log for a successful truncate message. # Check error log for a successful truncate message.
......
...@@ -92,7 +92,7 @@ SELECT @@global.gtid_binlog_state; ...@@ -92,7 +92,7 @@ SELECT @@global.gtid_binlog_state;
# #
--echo # Failed restart as the semisync slave --echo # Failed restart as the semisync slave
--error 1 --error 1
--exec $MYSQLD_LAST_CMD --rpl-semi-sync-slave-enabled=1 >> $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1 --exec $MYSQLD_LAST_CMD --init-rpl-role=SLAVE >> $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1
--echo # Normal restart --echo # Normal restart
--source include/start_mysqld.inc --source include/start_mysqld.inc
......
...@@ -50,7 +50,7 @@ on slave must be 2 ...@@ -50,7 +50,7 @@ on slave must be 2
SELECT @@GLOBAL.gtid_current_pos; SELECT @@GLOBAL.gtid_current_pos;
@@GLOBAL.gtid_current_pos @@GLOBAL.gtid_current_pos
0-1-4 0-1-4
# restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 # restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 --init-rpl-role=SLAVE
connection server_1; connection server_1;
# Ensuring variable rpl_semi_sync_slave_enabled is ON.. # Ensuring variable rpl_semi_sync_slave_enabled is ON..
# Ensuring status rpl_semi_sync_slave_status is OFF.. # Ensuring status rpl_semi_sync_slave_status is OFF..
...@@ -136,7 +136,7 @@ on slave must be 5 ...@@ -136,7 +136,7 @@ on slave must be 5
SELECT @@GLOBAL.gtid_current_pos; SELECT @@GLOBAL.gtid_current_pos;
@@GLOBAL.gtid_current_pos @@GLOBAL.gtid_current_pos
0-2-7 0-2-7
# restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 # restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 --init-rpl-role=SLAVE
connection server_2; connection server_2;
# Ensuring variable rpl_semi_sync_slave_enabled is ON.. # Ensuring variable rpl_semi_sync_slave_enabled is ON..
# Ensuring status rpl_semi_sync_slave_status is OFF.. # Ensuring status rpl_semi_sync_slave_status is OFF..
...@@ -221,7 +221,7 @@ on slave must be 7 ...@@ -221,7 +221,7 @@ on slave must be 7
SELECT @@GLOBAL.gtid_current_pos; SELECT @@GLOBAL.gtid_current_pos;
@@GLOBAL.gtid_current_pos @@GLOBAL.gtid_current_pos
0-1-9 0-1-9
# restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 # restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 --init-rpl-role=SLAVE
connection server_1; connection server_1;
# Ensuring variable rpl_semi_sync_slave_enabled is ON.. # Ensuring variable rpl_semi_sync_slave_enabled is ON..
# Ensuring status rpl_semi_sync_slave_status is OFF.. # Ensuring status rpl_semi_sync_slave_status is OFF..
......
...@@ -74,7 +74,7 @@ source include/wait_for_slave_param.inc; ...@@ -74,7 +74,7 @@ source include/wait_for_slave_param.inc;
SELECT @@GLOBAL.gtid_current_pos; SELECT @@GLOBAL.gtid_current_pos;
--let $restart_parameters=--skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 --let $restart_parameters=--skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 --init-rpl-role=SLAVE
--let $allow_rpl_inited=1 --let $allow_rpl_inited=1
--source include/start_mysqld.inc --source include/start_mysqld.inc
--connection server_$server_to_crash --connection server_$server_to_crash
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "sql_audit.h" #include "sql_audit.h"
#include "mysqld.h" #include "mysqld.h"
#include "ddl_log.h" #include "ddl_log.h"
#include "repl_failsafe.h"
#include <my_dir.h> #include <my_dir.h>
#include <m_ctype.h> // For test_if_number #include <m_ctype.h> // For test_if_number
...@@ -11025,7 +11026,7 @@ Recovery_context::Recovery_context() : ...@@ -11025,7 +11026,7 @@ Recovery_context::Recovery_context() :
prev_event_pos(0), prev_event_pos(0),
last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false), last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false),
last_gtid_engines(0), last_gtid_engines(0),
do_truncate(global_rpl_semi_sync_slave_enabled), do_truncate(rpl_status == RPL_IDLE_SLAVE),
truncate_validated(false), truncate_reset_done(false), truncate_validated(false), truncate_reset_done(false),
truncate_set_in_1st(false), id_binlog(MAX_binlog_id), truncate_set_in_1st(false), id_binlog(MAX_binlog_id),
checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL) checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL)
......
...@@ -6627,7 +6627,7 @@ struct my_option my_long_options[]= ...@@ -6627,7 +6627,7 @@ struct my_option my_long_options[]=
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
{"init-rpl-role", 0, "Set the replication role", {"init-rpl-role", 0, "Set the replication role",
&rpl_status, &rpl_status, &rpl_role_typelib, &rpl_status, &rpl_status, &rpl_role_typelib,
GET_ENUM, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, GET_ENUM, REQUIRED_ARG, RPL_AUTH_MASTER, 0, 0, 0, 0, 0},
#endif /* HAVE_REPLICATION */ #endif /* HAVE_REPLICATION */
{"memlock", 0, "Lock mysqld in memory.", &locked_in_memory, {"memlock", 0, "Lock mysqld in memory.", &locked_in_memory,
&locked_in_memory, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, &locked_in_memory, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment