Commit 74bd3572 authored by Alexey Yurchenko's avatar Alexey Yurchenko Committed by Julius Goryavsky

MDEV-34998: master can stop responding after cluster vote to evict a node

After cluster vote to evict a node that failed a transaction,
current master can't commit anymore. Commit related to MENT-2081
for galera libray fixes this issue. This commit adds 3 MTR tests
to verify the fix for MENT-2081 works as designed.

Requires Galera commit 91f0090a05e96c3cc353b80d961ede45cefb9279
Signed-off-by: default avatarJulius Goryavsky <julius.goryavsky@mariadb.com>
parent 4b4cf8a3
connection node_4;
connection node_3;
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_3;
connection node_4;
connection node_1;
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
CALL p1(130);
connection node_4;
Shutting down server 4...
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_2;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_3;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
Server 4 left the cluster
connection node_1;
CALL p1(130);
connection node_1;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
connection node_2;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
connection node_3;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
INSERT INTO t2 VALUES (DEFAULT);
CALL p1(130);
connection node_1;
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
Restarting server 4
Wait for server 1 to become a donor
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
Server 1 got SST request from server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
Waiting for server 4 to leave the cluster
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_2;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_3;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_4;
Server 4 left the cluster, killing it...
Killed server 4...
Restarting server 4...
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_1;
SELECT count(*) AS expect1_390 FROM t1;
expect1_390
390
SELECT count(*) AS expect1_1 FROM t2;
expect1_1
1
connection node_2;
SELECT count(*) AS expect2_390 FROM t1;
expect2_390
390
SELECT count(*) AS expect2_1 FROM t2;
expect2_1
1
connection node_3;
SELECT count(*) AS expect3_390 FROM t1;
expect3_390
390
SELECT count(*) AS expect3_1 FROM t2;
expect3_1
1
connection node_4;
SELECT count(*) AS expect4_390 FROM t1;
expect4_390
390
SELECT count(*) AS expect4_1 FROM t2;
expect4_1
1
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE p1;
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
CALL mtr.add_suppression("Failed to apply write set");
connection node_4;
connection node_3;
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_3;
connection node_4;
connection node_1;
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
CALL p1(130);
connection node_4;
Shutting down server 4...
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
SET GLOBAL debug = "+d,sync.wsrep_donor_state";
connection node_4;
Restarting server 4...
connection node_1;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
Tables on server 1 flushed and locked for SST to server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
Wait for the state snapshot to be copied to server 4
SST script unlocked server 1
connection node_1;
CALL p1(130);
connection node_1;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
connection node_2;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
connection node_3;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
INSERT INTO t2 VALUES (DEFAULT);
CALL p1(130);
Waiting for server 4 to leave the cluster
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_2;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_4;
Server 4 left the cluster, killing it...
Killed server 4...
Restarting server 4...
DROP TABLE t2;
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_1;
SELECT count(*) AS expect1_390 FROM t1;
expect1_390
390
connection node_2;
SELECT count(*) AS expect2_390 FROM t1;
expect2_390
390
connection node_3;
SELECT count(*) AS expect3_390 FROM t1;
expect3_390
390
connection node_4;
SELECT count(*) AS expect4_390 FROM t1;
expect4_390
390
DROP TABLE t1;
DROP PROCEDURE p1;
connection node_4;
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus");
CALL mtr.add_suppression("Failed to apply write set: gtid:");
connection node_4;
connection node_3;
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_3;
connection node_4;
connection node_1;
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
CALL p1(130);
connection node_4;
Shutting down server 4...
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
SET GLOBAL debug = "+d,sync.wsrep_donor_state";
connection node_4;
Restarting server 4...
connection node_1;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
Tables on server 1 flushed and locked for SST to server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
Wait for the state snapshot to be copied to server 4
SST script unlocked server 1
connection node_1;
CALL p1(130);
connection node_3;
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
INSERT INTO t2 VALUES (DEFAULT);
SET SESSION wsrep_on = OFF;
connection node_1;
CALL p1(130);
Waiting for server 3 to leave the cluster
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_2;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_4;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_3;
Server 3 left the cluster, killing it...
Killed server 3.
Restarting server 3...
Waiting for server 3 to rejoin the cluster
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_3;
sleeping for 20
Waiting ready
Server 3 restarted.
connection node_1;
SET SESSION wsrep_on = ON;
SET SESSION wsrep_sync_wait = 15;
connection node_1;
SELECT count(*) AS expect1_390 FROM t1;
expect1_390
390
connection node_2;
SELECT count(*) AS expect2_390 FROM t1;
expect2_390
390
connection node_3;
SELECT count(*) AS expect3_390 FROM t1;
expect3_390
390
connection node_4;
SELECT count(*) AS expect4_390 FROM t1;
expect4_390
390
DROP TABLE t1;
DROP PROCEDURE p1;
connection node_1;
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
connection node_2;
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
connection node_3;
CALL mtr.add_suppression("Vote 0 \\(success\\) on .* is inconsistent with group");
connection node_4;
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
!include ../galera_4nodes.cnf
[mysqld]
wsrep-ignore-apply-errors=0
[mysqld.1]
wsrep_node_name='node_1'
[mysqld.2]
wsrep_node_name='node_2'
[mysqld.3]
wsrep_node_name='node_3'
[mysqld.4]
wsrep_node_name='node_4'
wsrep_sst_donor='node_1'
[ENV]
galera_cluster_size=4
#
# Test a case where a joiner encounters an error during IST
# Instead of voting it should assume error and bail out.
#
--source include/galera_cluster.inc
--source include/big_test.inc
--source include/have_debug_sync.inc
--let $node_1=node_1
--let $node_2=node_2
--let $node_3=node_3
--let $node_4=node_4
--source ../include/auto_increment_offset_save.inc
# create table t1 and procedure p1 to generate wirtesets
--connection node_1
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
DELIMITER |;
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
DELIMITER ;|
CALL p1(130);
--connection node_4
--echo Shutting down server 4...
--let $node_4_server_id= `SELECT @@server_id`
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
--let $node_4_pid_file= `SELECT @@pid_file`
--source include/shutdown_mysqld.inc
# Wait for node #4 to leave cluster
--let $members = 3
--connection node_1
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--echo Server 4 left the cluster
# Create some writesets for IST
--connection node_1
CALL p1(130);
# Create a writeset that node 4 won't be able to apply by creating a table
# that won't be present in the replication stream
--connection node_1
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--connection node_2
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--connection node_3
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
# This should cause error during IST
INSERT INTO t2 VALUES (DEFAULT);
# make sure nodes 1,2,3 progress far enough for commit cut update
CALL p1(130);
--connection node_1
# prepare to stop SST donor thread when it receives a request from starting node #4
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
--echo Restarting server 4
# Need to use this form instead of start_mysqld.inc because the latter is blocking
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
--echo Wait for server 1 to become a donor
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
--echo Server 1 got SST request from server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
#
# After this point node #4 shall proceed to IST and bail out
#
--echo Waiting for server 4 to leave the cluster
--let $members = 3
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--connection node_4
--echo Server 4 left the cluster, killing it...
# Kill the connected server
--exec echo "wait" > $node_4_expect_file_name
--let KILL_NODE_PIDFILE = $node_4_pid_file
--perl
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
my $mysqld_pid = `cat $pid_filename`;
chomp($mysqld_pid);
system("kill -9 $mysqld_pid");
exit(0);
EOF
--echo Killed server 4...
--source include/wait_until_disconnected.inc
--echo Restarting server 4...
--source include/start_mysqld.inc
--source include/galera_wait_ready.inc
# Confirm node #4 has rejoined
--connection node_1
--let $members = 4
--source include/wsrep_wait_membership.inc
# Confirm that all is good and all nodes have identical data
--connection node_1
SELECT count(*) AS expect1_390 FROM t1;
SELECT count(*) AS expect1_1 FROM t2;
--connection node_2
SELECT count(*) AS expect2_390 FROM t1;
SELECT count(*) AS expect2_1 FROM t2;
--connection node_3
SELECT count(*) AS expect3_390 FROM t1;
SELECT count(*) AS expect3_1 FROM t2;
--connection node_4
SELECT count(*) AS expect4_390 FROM t1;
SELECT count(*) AS expect4_1 FROM t2;
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE p1;
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
CALL mtr.add_suppression("Failed to apply write set");
--source ../include/auto_increment_offset_restore.inc
!include ../galera_4nodes.cnf
[mysqld]
wsrep-ignore-apply-errors=0
[mysqld.1]
wsrep_node_name='node_1'
[mysqld.2]
wsrep_node_name='node_2'
[mysqld.3]
wsrep_node_name='node_3'
[mysqld.4]
wsrep_node_name='node_4'
wsrep_sst_donor='node_1'
[ENV]
galera_cluster_size=4
MTR_SST_JOINER_DELAY=20
#
# Test a case where a vote happens in JOINED state after SST on a writeset
# that should be applied.
#
--source galera_vote_joined_begin.inc
#
# At this point state snapshot has been copied, node 1 is operational and
# we have about 10 seconds while everything we do will go into the replication
# queue on node 4 which it will have to apply on top of the snapshot.
#
# Increase replication queue on node_4
--connection node_1
CALL p1(130);
# Create a writeset that node 4 won't be able to apply by creating a table
# that won't be present in the replication stream
--connection node_1
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--connection node_2
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--connection node_3
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
# This should cause node #4 to initiate a vote and leave the cluster
INSERT INTO t2 VALUES (DEFAULT);
# make sure nodes 1,2,3 progress far enough for commit cut update
CALL p1(130);
--echo Waiting for server 4 to leave the cluster
--let $members = 3
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_1
--source include/wsrep_wait_membership.inc
--connection node_4
--echo Server 4 left the cluster, killing it...
# Kill the connected server
--exec echo "wait" > $node_4_expect_file_name
--let KILL_NODE_PIDFILE = $node_4_pid_file
--perl
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
my $mysqld_pid = `cat $pid_filename`;
chomp($mysqld_pid);
system("kill -9 $mysqld_pid");
exit(0);
EOF
--echo Killed server 4...
--source include/wait_until_disconnected.inc
--echo Restarting server 4...
--source include/start_mysqld.inc
--source include/galera_wait_ready.inc
DROP TABLE t2;
--source galera_vote_joined_end.inc
--connection node_4
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus");
CALL mtr.add_suppression("Failed to apply write set: gtid:");
# This file purpose is to set up node 4 to require SST which is artificaially
# prolonged and as a result accumulate sufficient relication queue.
# The contents of the qeuee are controlled in the sourcing test files.
--source include/galera_cluster.inc
--source include/big_test.inc
--source include/have_debug_sync.inc
--let $node_1=node_1
--let $node_2=node_2
--let $node_3=node_3
--let $node_4=node_4
--source ../include/auto_increment_offset_save.inc
# create table t1 and procedure p1 to generate wirtesets
--connection node_1
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
DELIMITER |;
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
DELIMITER ;|
# 130 events move the commit cut, it is essential in voting
CALL p1(130);
--connection node_4
--echo Shutting down server 4...
--let $node_4_server_id= `SELECT @@server_id`
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
--let $node_4_pid_file= `SELECT @@pid_file`
--source include/shutdown_mysqld.inc
# enforce SST
--exec rm -rf $MYSQLTEST_VARDIR/mysqld.4/data/grastate.dat
# Wait for node #4 to leave cluster
--connection node_1
--let $members = 3
--source include/wsrep_wait_membership.inc
# prepare to stop SST donor thread when node is in donor state
SET GLOBAL debug = "+d,sync.wsrep_donor_state";
--connection node_4
--echo Restarting server 4...
# Need to use this form instead of start_mysqld.inc because the latter is blocking
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
# Wait for node #1 to become a donor
--connection node_1
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached";
--echo Tables on server 1 flushed and locked for SST to server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
--echo Wait for the state snapshot to be copied to server 4
--source include/galera_wait_ready.inc
--echo SST script unlocked server 1
#
# At this point state snapshot has been copied, node 1 is operational and
# we have about 20 seconds while everything we do will go into the replication
# queue on node 4 which it will have to apply on top of the snapshot.
#
# Confirm node #4 has rejoined
--connection node_1
--let $members = 4
--source include/wsrep_wait_membership.inc
#DROP TABLE IF EXISTS t2;
# Confirm that all is good and all nodes have identical data
--connection node_1
SELECT count(*) AS expect1_390 FROM t1;
#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows");
#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno [0-9]*");
--connection node_2
SELECT count(*) AS expect2_390 FROM t1;
#CALL mtr.add_suppression("mysqld: Can't find record in 't1'");
#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows");
#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno seqno [0-9]*");
--connection node_3
SELECT count(*) AS expect3_390 FROM t1;
--connection node_4
SELECT count(*) AS expect4_390 FROM t1;
DROP TABLE t1;
DROP PROCEDURE p1;
#CALL mtr.add_suppression("inconsistent with group");
--source ../include/auto_increment_offset_restore.inc
!include ../galera_4nodes.cnf
[mysqld]
wsrep-ignore-apply-errors=0
[mysqld.1]
wsrep_node_name='node_1'
[mysqld.2]
wsrep_node_name='node_2'
[mysqld.3]
wsrep_node_name='node_3'
[mysqld.4]
wsrep_node_name='node_4'
wsrep_sst_donor='node_1'
[ENV]
galera_cluster_size=4
MTR_SST_JOINER_DELAY=20
#
# Test a case where a vote happens in JOINED state after SST on a writeset
# that should be skipped. I.e. JOINED node should continue operation.
#
--source galera_vote_joined_begin.inc
#
# At this point state snapshot has been copied, node 1 is operational and
# we have about 10 seconds while everything we do will go into the replication
# queue on node 4 which it will have to apply on top of the snapshot.
#
# Increase replication queue on node_4
--connection node_1
CALL p1(130);
#
# Create a writeset that node 4 won't be able to apply by making node 3
# inconsisitent
#
--connection node_3
--let $node_3_server_id= `SELECT @@server_id`
--let $node_3_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_3_server_id.expect
--let $node_3_pid_file= `SELECT @@pid_file`
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
# This should cause nodes #1 and #2 to initiate a vote and kick node #3
# out of the cluster, node #4 should recover the vote when fails to apply
# the event and continue
INSERT INTO t2 VALUES (DEFAULT);
SET SESSION wsrep_on = OFF;
# make sure nodes 1,2 progress far enough for commit cut update
--connection node_1
CALL p1(130);
--let $members = 3
--echo Waiting for server 3 to leave the cluster
--connection node_1
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_4
# need to wait for extra SST delay on joiner
--sleep $MTR_SST_JOINER_DELAY
--sleep $MTR_SST_JOINER_DELAY
--enable_reconnect
--let $wait_timeout = 60
--source include/wsrep_wait_membership.inc
--connection node_3
--echo Server 3 left the cluster, killing it...
# Kill the connected server
--exec echo "wait" > $node_3_expect_file_name
--let KILL_NODE_PIDFILE = $node_3_pid_file
--perl
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
my $mysqld_pid = `cat $pid_filename`;
chomp($mysqld_pid);
system("kill -9 $mysqld_pid");
exit(0);
EOF
--echo Killed server 3.
--source include/wait_until_disconnected.inc
--echo Restarting server 3...
--exec echo "restart:$start_mysqld_params" > $node_3_expect_file_name
--echo Waiting for server 3 to rejoin the cluster
--connection node_1
--let $members = 3
--source include/wsrep_wait_membership.inc
--connection node_3
--echo sleeping for $MTR_SST_JOINER_DELAY
# need to wait for extra SST delay on joiner
--sleep $MTR_SST_JOINER_DELAY
--sleep $MTR_SST_JOINER_DELAY
--echo Waiting ready
--enable_reconnect
--source include/galera_wait_ready.inc
--echo Server 3 restarted.
--source galera_vote_joined_end.inc
--connection node_1
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
--connection node_2
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
--connection node_3
CALL mtr.add_suppression("Vote 0 \\(success\\) on .* is inconsistent with group");
--connection node_4
CALL mtr.add_suppression("BF applier failed to open_and_lock_tables: 1146");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
......@@ -915,6 +915,13 @@ EOF
fi
fi
# Delay for MTR tests if needed to simulate long SST
if [ ${MTR_SST_JOINER_DELAY:=0} -gt 0 ]
then
wsrep_log_info "Sleeping $MTR_SST_JOINER_DELAY seconds for MTR test"
sleep $MTR_SST_JOINER_DELAY
fi
# Remove special tags from the magic file, and from the output:
coords=$(head -n1 "$MAGIC_FILE")
wsrep_log_info "Galera co-ords from recovery: $coords"
......
......@@ -1875,8 +1875,17 @@ static void* sst_donor_thread (void* a)
wsrep::seqno(err ? wsrep::seqno::undefined() :
wsrep::seqno(ret_seqno)));
Wsrep_server_state::instance().sst_sent(gtid, err);
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("sync.wsrep_sst_donor_after_donation", {
const char act[] =
"now "
"SIGNAL sync.wsrep_sst_donor_after_donation_reached "
"WAIT_FOR signal.wsrep_sst_donor_after_donation_continue";
assert(!debug_sync_set_action(thd.ptr, STRING_WITH_LEN(act)));
});
#endif /* ENABLED_DEBUG_SYNC */
Wsrep_server_state::instance().sst_sent(gtid, err);
proc.wait();
wsrep_donor_monitor_end();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment