Commit d388e7eb authored by Julius Goryavsky's avatar Julius Goryavsky Committed by Sergei Golubchik

MDEV-28583: Galera: binlogs disappear after rsync IST

This commit sends a flag indicating the presence of the "--bypass"
option from the donor node to the joiner nodes during rsync IST,
because without such a flag it is impossible to distinguish IST
from the SST on the joiner nodes (in IST/SST scripts, because the
"--bypass" option is still not passed to scripts from server code).
Specifically, this fixes an issue with binary logs disappearing
after IST (via rsync). There are also changes to diagnostic messages
here that will make it easier to diagnose script-related problems
in the future when debugging and when checking the logs. This commit
also adds more robust signal handlers - to handle exceptions during
script execution. These handlers won't mask some crashes and it
also unifies exit codes between different scripts. These changes
have already been helpful to debugging "bypass" flag handling.
parent b081ad8c
--- r/galera_ist_MDEV-28583.result
+++ r/galera_ist_MDEV-28583,debug.reject
@@ -517,3 +517,187 @@
1
DROP TABLE t1;
COMMIT;
+Performing State Transfer on a server that has been killed and restarted
+while a DDL was in progress on it
+connection node_1;
+CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB;
+SET AUTOCOMMIT=OFF;
+START TRANSACTION;
+INSERT INTO t1 VALUES (1,'node1_committed_before');
+INSERT INTO t1 VALUES (2,'node1_committed_before');
+INSERT INTO t1 VALUES (3,'node1_committed_before');
+INSERT INTO t1 VALUES (4,'node1_committed_before');
+INSERT INTO t1 VALUES (5,'node1_committed_before');
+connection node_2;
+START TRANSACTION;
+INSERT INTO t1 VALUES (6,'node2_committed_before');
+INSERT INTO t1 VALUES (7,'node2_committed_before');
+INSERT INTO t1 VALUES (8,'node2_committed_before');
+INSERT INTO t1 VALUES (9,'node2_committed_before');
+INSERT INTO t1 VALUES (10,'node2_committed_before');
+COMMIT;
+SET GLOBAL debug_dbug = 'd,sync.alter_opened_table';
+connection node_1;
+ALTER TABLE t1 ADD COLUMN f2 INTEGER;
+connection node_2;
+SET wsrep_sync_wait = 0;
+Killing server ...
+connection node_1;
+SET AUTOCOMMIT=OFF;
+START TRANSACTION;
+INSERT INTO t1 (id,f1) VALUES (11,'node1_committed_during');
+INSERT INTO t1 (id,f1) VALUES (12,'node1_committed_during');
+INSERT INTO t1 (id,f1) VALUES (13,'node1_committed_during');
+INSERT INTO t1 (id,f1) VALUES (14,'node1_committed_during');
+INSERT INTO t1 (id,f1) VALUES (15,'node1_committed_during');
+COMMIT;
+START TRANSACTION;
+INSERT INTO t1 (id,f1) VALUES (16,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (17,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (18,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (19,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (20,'node1_to_be_committed_after');
+connect node_1a_galera_st_kill_slave_ddl, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+SET AUTOCOMMIT=OFF;
+START TRANSACTION;
+INSERT INTO t1 (id,f1) VALUES (21,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (22,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (23,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (24,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (25,'node1_to_be_rollbacked_after');
+connection node_2;
+Performing --wsrep-recover ...
+connection node_2;
+Starting server ...
+Using --wsrep-start-position when starting mysqld ...
+SET AUTOCOMMIT=OFF;
+START TRANSACTION;
+INSERT INTO t1 (id,f1) VALUES (26,'node2_committed_after');
+INSERT INTO t1 (id,f1) VALUES (27,'node2_committed_after');
+INSERT INTO t1 (id,f1) VALUES (28,'node2_committed_after');
+INSERT INTO t1 (id,f1) VALUES (29,'node2_committed_after');
+INSERT INTO t1 (id,f1) VALUES (30,'node2_committed_after');
+COMMIT;
+connection node_1;
+INSERT INTO t1 (id,f1) VALUES (31,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (32,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (33,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (34,'node1_to_be_committed_after');
+INSERT INTO t1 (id,f1) VALUES (35,'node1_to_be_committed_after');
+COMMIT;
+SET AUTOCOMMIT=OFF;
+START TRANSACTION;
+INSERT INTO t1 (id,f1) VALUES (36,'node1_committed_after');
+INSERT INTO t1 (id,f1) VALUES (37,'node1_committed_after');
+INSERT INTO t1 (id,f1) VALUES (38,'node1_committed_after');
+INSERT INTO t1 (id,f1) VALUES (39,'node1_committed_after');
+INSERT INTO t1 (id,f1) VALUES (40,'node1_committed_after');
+COMMIT;
+connection node_1a_galera_st_kill_slave_ddl;
+INSERT INTO t1 (id,f1) VALUES (41,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (42,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (43,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (44,'node1_to_be_rollbacked_after');
+INSERT INTO t1 (id,f1) VALUES (45,'node1_to_be_rollbacked_after');
+ROLLBACK;
+SET AUTOCOMMIT=ON;
+SET SESSION wsrep_sync_wait=15;
+SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1';
+EXPECT_3
+3
+SELECT COUNT(*) AS EXPECT_35 FROM t1;
+EXPECT_35
+35
+SELECT * FROM t1;
+id f1 f2
+1 node1_committed_before NULL
+2 node1_committed_before NULL
+3 node1_committed_before NULL
+4 node1_committed_before NULL
+5 node1_committed_before NULL
+6 node2_committed_before NULL
+7 node2_committed_before NULL
+8 node2_committed_before NULL
+9 node2_committed_before NULL
+10 node2_committed_before NULL
+11 node1_committed_during NULL
+12 node1_committed_during NULL
+13 node1_committed_during NULL
+14 node1_committed_during NULL
+15 node1_committed_during NULL
+16 node1_to_be_committed_after NULL
+17 node1_to_be_committed_after NULL
+18 node1_to_be_committed_after NULL
+19 node1_to_be_committed_after NULL
+20 node1_to_be_committed_after NULL
+26 node2_committed_after NULL
+27 node2_committed_after NULL
+28 node2_committed_after NULL
+29 node2_committed_after NULL
+30 node2_committed_after NULL
+31 node1_to_be_committed_after NULL
+32 node1_to_be_committed_after NULL
+33 node1_to_be_committed_after NULL
+34 node1_to_be_committed_after NULL
+35 node1_to_be_committed_after NULL
+36 node1_committed_after NULL
+37 node1_committed_after NULL
+38 node1_committed_after NULL
+39 node1_committed_after NULL
+40 node1_committed_after NULL
+SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1;
+COUNT(*) = 0
+1
+COMMIT;
+connection node_1;
+SET AUTOCOMMIT=ON;
+SET SESSION wsrep_sync_wait=15;
+SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1';
+EXPECT_3
+3
+SELECT COUNT(*) AS EXPECT_35 FROM t1;
+EXPECT_35
+35
+SELECT * FROM t1;
+id f1 f2
+1 node1_committed_before NULL
+2 node1_committed_before NULL
+3 node1_committed_before NULL
+4 node1_committed_before NULL
+5 node1_committed_before NULL
+6 node2_committed_before NULL
+7 node2_committed_before NULL
+8 node2_committed_before NULL
+9 node2_committed_before NULL
+10 node2_committed_before NULL
+11 node1_committed_during NULL
+12 node1_committed_during NULL
+13 node1_committed_during NULL
+14 node1_committed_during NULL
+15 node1_committed_during NULL
+16 node1_to_be_committed_after NULL
+17 node1_to_be_committed_after NULL
+18 node1_to_be_committed_after NULL
+19 node1_to_be_committed_after NULL
+20 node1_to_be_committed_after NULL
+26 node2_committed_after NULL
+27 node2_committed_after NULL
+28 node2_committed_after NULL
+29 node2_committed_after NULL
+30 node2_committed_after NULL
+31 node1_to_be_committed_after NULL
+32 node1_to_be_committed_after NULL
+33 node1_to_be_committed_after NULL
+34 node1_to_be_committed_after NULL
+35 node1_to_be_committed_after NULL
+36 node1_committed_after NULL
+37 node1_committed_after NULL
+38 node1_committed_after NULL
+39 node1_committed_after NULL
+40 node1_committed_after NULL
+SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1;
+COUNT(*) = 0
+1
+DROP TABLE t1;
+COMMIT;
+SET GLOBAL debug_dbug = $debug_orig;
This diff is collapsed.
!include ../galera_2nodes.cnf
[mysqld.1]
# server-id=101
#wsrep-debug=1
innodb_file_per_table
innodb_autoinc_lock_mode=2
wsrep_sst_method=rsync
#wsrep_sst_method=mariabackup
wsrep_sst_auth=root:
binlog_format=ROW
core-file
log-output=none
wsrep_slave_threads=2
wsrep_on=1
gtid_strict_mode=1
log_slave_updates=ON
log_bin=binlog
[mysqld.2]
# server-id=102
#wsrep-debug=1
innodb_file_per_table
innodb_autoinc_lock_mode=2
wsrep_sst_method=rsync
#wsrep_sst_method=mariabackup
wsrep_sst_auth=root:
binlog_format=ROW
core-file
log-output=none
wsrep_slave_threads=2
wsrep_on=1
gtid_strict_mode=1
log_slave_updates=ON
log_bin=binlog
[mysqld.1]
wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true'
[mysqld.2]
wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true'
[sst]
transferfmt=@ENV.MTR_GALERA_TFMT
# MDEV-28583: Galera: binlogs disappear after rsync IST
--source include/big_test.inc
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_mariabackup.inc
--let $node_1=node_1
--let $node_2=node_2
--source include/auto_increment_offset_save.inc
--source suite/galera/include/galera_st_disconnect_slave.inc
--source suite/galera/include/galera_st_shutdown_slave.inc
--source suite/galera/include/galera_st_kill_slave.inc
--source suite/galera/include/galera_st_kill_slave_ddl.inc
--source include/auto_increment_offset_restore.inc
......@@ -64,7 +64,7 @@ then
[ -f "$FLUSHED" ] && rm -f "$FLUSHED"
[ -f "$ERROR" ] && rm -f "$ERROR"
echo "flush tables"
echo "flush tables"
# Wait for :
# (a) Tables to be flushed, AND
......@@ -72,7 +72,7 @@ then
# (c) ERROR file, in case flush tables operation failed.
while [ ! -r "$FLUSHED" ] && \
! grep -q -F ':' '--' "$FLUSHED" >/dev/null 2>&1
! grep -q -F ':' -- "$FLUSHED" >/dev/null 2>&1
do
# Check whether ERROR file exists.
if [ -f "$ERROR" ]; then
......@@ -98,15 +98,11 @@ then
echo "done $STATE"
elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
then
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
exit 22 # EINVAL
else # joiner
else
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
wsrep_log_error "Unsupported role: '$WSREP_SST_OPT_ROLE'"
exit 22 # EINVAL
fi
exit 0
......@@ -17,7 +17,8 @@
# This is a common command line parser to be sourced by other SST scripts
set -ue
trap 'exit 32' HUP PIPE
trap 'exit 3' INT QUIT TERM
# Setting the path for some utilities on CentOS
export PATH="$PATH:/usr/sbin:/usr/bin:/sbin:/bin"
......@@ -184,7 +185,7 @@ case "$1" in
shift
;;
'--bypass')
WSREP_SST_OPT_BYPASS=1
readonly WSREP_SST_OPT_BYPASS=1
;;
'--datadir')
# Let's remove the trailing slash:
......@@ -511,7 +512,24 @@ case "$1" in
esac
shift
done
readonly WSREP_SST_OPT_BYPASS
WSREP_TRANSFER_TYPE='SST'
[ $WSREP_SST_OPT_BYPASS -ne 0 ] && readonly WSREP_TRANSFER_TYPE='IST'
# Let's take the name of the current script as a base,
# removing the directory, extension and "wsrep_sst_" prefix:
WSREP_METHOD="${0##*/}"
WSREP_METHOD="${WSREP_METHOD%.*}"
readonly WSREP_METHOD="${WSREP_METHOD#wsrep_sst_}"
if [ -n "${WSREP_SST_OPT_ROLE+x}" ]; then
if [ "$WSREP_SST_OPT_ROLE" != 'donor' -a \
"$WSREP_SST_OPT_ROLE" != 'joiner' ]
then
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
exit 22 # EINVAL
fi
else
readonly WSREP_SST_OPT_ROLE='donor'
fi
# The same argument can be present on the command line several
# times, in this case we must take its last value:
......@@ -719,7 +737,7 @@ wsrep_log()
{
# echo everything to stderr so that it gets into common error log
# deliberately made to look different from the rest of the log
local readonly tst="$(date +%Y%m%d\ %H:%M:%S.%N | cut -b -21)"
local readonly tst=$(date "+%Y%m%d %H:%M:%S.%N" | cut -b -21)
echo "WSREP_SST: $* ($tst)" >&2
}
......@@ -1050,7 +1068,7 @@ is_local_ip()
[ "$1" = '127.0.0.1' -o \
"$1" = '127.0.0.2' -o \
"$1" = 'localhost' -o \
"$1" = '[::1]' ] && return 0
"$1" = '::1' ] && return 0
# If the address starts with "127." this is probably a local
# address, but we need to clarify what follows this prefix:
if [ "${1#127.}" != "$1" ]; then
......@@ -1067,21 +1085,25 @@ is_local_ip()
"$1" = "$(hostname -f)" -o \
"$1" = "$(hostname -d)" ] && return 0
fi
# If the address contains anything other than digits
# and separators, it is not a local address:
[ "${1#*[!0-9.]}" != "$1" ] && \
[ "${1#*[!0-9A-Fa-f:\[\]]}" != "$1" ] && return 1
# Now let's check if the given address is assigned to
# one of the network cards:
local ip_util=$(commandex 'ip')
if [ -n "$ip_util" ]; then
# ip address show ouput format is " inet[6] <address>/<mask>":
"$ip_util" address show \
| grep -E '^[[:space:]]*inet.? [^[:space:]]+/' -o \
| grep -F " $1/" >/dev/null && return 0
| grep -o -E '^[[:space:]]*inet.?[[:space:]]+[^[:space:]]+/' \
| grep -qw -F -- "$1/" && return 0
else
local ifconfig_util=$(commandex 'ifconfig')
if [ -n "$ifconfig_util" ]; then
# ifconfig output format is " inet[6] <address> ...":
"$ifconfig_util" \
| grep -E '^[[:space:]]*inet.? [^[:space:]]+ ' -o \
| grep -F " $1 " >/dev/null && return 0
| grep -o -E '^[[:space:]]*inet.?[[:space:]]+[^[:space:]]+' \
| grep -qw -F -- "$1" && return 0
fi
fi
return 1
......@@ -1403,7 +1425,7 @@ get_proc()
if [ -z "$nproc" ]; then
set +e
if [ "$OS" = 'Linux' ]; then
nproc=$(grep -c processor /proc/cpuinfo 2>/dev/null)
nproc=$(grep -cw -E '^processor' /proc/cpuinfo 2>/dev/null)
elif [ "$OS" = 'Darwin' -o "$OS" = 'FreeBSD' ]; then
nproc=$(sysctl -n hw.ncpu)
fi
......@@ -1452,3 +1474,19 @@ check_server_ssl_config()
fi
fi
}
simple_cleanup()
{
# Since this is invoked just after exit NNN
local estatus=$?
if [ $estatus -ne 0 ]; then
wsrep_log_error "Cleanup after exit with status: $estatus"
fi
if [ -n "${SST_PID:-}" ]; then
[ "$(pwd)" != "$OLD_PWD" ] && cd "$OLD_PWD"
[ -f "$SST_PID" ] && rm -f "$SST_PID" || :
fi
exit $estatus
}
wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE started on $WSREP_SST_OPT_ROLE"
......@@ -86,7 +86,7 @@ encrypt_threads=""
encrypt_chunk=""
readonly SECRET_TAG='secret'
readonly TOTAL_TAG='total'
readonly TOTAL_TAG='secret /total'
# Required for backup locks
# For backup locks it is 1 sent by joiner
......@@ -166,12 +166,10 @@ get_keys()
exit 3
fi
if [ -z "$ekey" ]; then
if [ ! -r "$ekeyfile" ]; then
wsrep_log_error "FATAL: Either key must be specified" \
"or keyfile must be readable"
exit 3
fi
if [ -z "$ekey" -a ! -r "$ekeyfile" ]; then
wsrep_log_error "FATAL: Either key must be specified" \
"or keyfile must be readable"
exit 3
fi
if [ "$eformat" = 'openssl' ]; then
......@@ -218,9 +216,7 @@ get_keys()
exit 2
fi
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
ecmd="$ecmd -d"
fi
[ "$WSREP_SST_OPT_ROLE" = 'joiner' ] && ecmd="$ecmd -d"
stagemsg="$stagemsg-XB-Encrypted"
}
......@@ -597,18 +593,6 @@ get_stream()
wsrep_log_info "Streaming with $sfmt"
}
sig_joiner_cleanup()
{
local estatus=$?
if [ $estatus -ne 0 ]; then
wsrep_log_error "Cleanup after exit with status: $estatus"
fi
wsrep_log_error "Removing $MAGIC_FILE file due to signal"
[ "$(pwd)" != "$OLD_PWD" ] && cd "$OLD_PWD"
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
exit $estatus
}
cleanup_at_exit()
{
# Since this is invoked just after exit NNN
......@@ -619,6 +603,11 @@ cleanup_at_exit()
[ "$(pwd)" != "$OLD_PWD" ] && cd "$OLD_PWD"
if [ $estatus -ne 0 ]; then
wsrep_log_error "Removing $MAGIC_FILE file due to signal"
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" || :
fi
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
wsrep_log_info "Removing the sst_in_progress file"
wsrep_cleanup_progress_file
......@@ -648,7 +637,7 @@ cleanup_at_exit()
fi
# Final cleanup
pgid=$(ps -o pgid= $$ 2>/dev/null | grep -o -E '[0-9]*' || :)
pgid=$(ps -o pgid= $$ 2>/dev/null | grep -o -E '[0-9]+' || :)
# This means no setsid done in mysqld.
# We don't want to kill mysqld here otherwise.
......@@ -744,17 +733,15 @@ recv_joiner()
fi
fi
pushd "$dir" 1>/dev/null
set +e
if [ $wait -ne 0 ]; then
wait_for_listen &
fi
cd "$dir"
set +e
timeit "$msg" "$ltcmd | $strmcmd; RC=( "\${PIPESTATUS[@]}" )"
set -e
popd 1>/dev/null
cd "$OLD_PWD"
if [ ${RC[0]} -eq 124 ]; then
wsrep_log_error "Possible timeout in receiving first data from" \
......@@ -779,20 +766,19 @@ recv_joiner()
wsrep_log_info $(ls -l "$dir/"*)
exit 32
fi
# check donor supplied secret
SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE" || :)
# Select the "secret" tag whose value does not start
# with a slash symbol. All new tags must to start with
# the space and the slash symbol after the word "secret" -
# to be removed by older versions of the SST scripts:
SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]+[^/]" \
-- "$MAGIC_FILE" || :)
# Check donor supplied secret:
SECRET=$(trim_string "${SECRET#$SECRET_TAG}")
if [ "$SECRET" != "$MY_SECRET" ]; then
wsrep_log_error "Donor does not know my secret!"
wsrep_log_info "Donor: '$SECRET', my: '$MY_SECRET'"
exit 32
fi
# remove secret and total from the magic file
grep -v -E "^($SECRET_TAG|$TOTAL_TAG)[[:space:]]" -- \
"$MAGIC_FILE" > "$MAGIC_FILE.new"
mv "$MAGIC_FILE.new" "$MAGIC_FILE"
fi
}
......@@ -801,11 +787,11 @@ send_donor()
local dir="$1"
local msg="$2"
pushd "$dir" 1>/dev/null
cd "$dir"
set +e
timeit "$msg" "$strmcmd | $tcmd; RC=( "\${PIPESTATUS[@]}" )"
set -e
popd 1>/dev/null
cd "$OLD_PWD"
for ecode in "${RC[@]}"; do
if [ $ecode -ne 0 ]; then
......@@ -820,7 +806,7 @@ monitor_process()
{
local sst_stream_pid=$1
while true ; do
while :; do
if ! ps -p "$WSREP_SST_OPT_PARENT" >/dev/null 2>&1; then
wsrep_log_error \
"Parent mysqld process (PID: $WSREP_SST_OPT_PARENT)" \
......@@ -837,13 +823,6 @@ monitor_process()
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
if [ "$WSREP_SST_OPT_ROLE" != 'joiner' -a \
"$WSREP_SST_OPT_ROLE" != 'donor' ]
then
wsrep_log_error "Invalid role '$WSREP_SST_OPT_ROLE'"
exit 22
fi
read_cnf
setup_ports
......@@ -960,8 +939,8 @@ setup_commands()
get_stream
get_transfer
if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]
then
if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]; then
trap cleanup_at_exit EXIT
if [ $WSREP_SST_OPT_BYPASS -eq 0 ]
......@@ -1082,6 +1061,7 @@ then
fi
setup_commands
set +e
timeit "$stagemsg-SST" "$INNOBACKUP | $tcmd; RC=( "\${PIPESTATUS[@]}" )"
set -e
......@@ -1125,10 +1105,9 @@ then
echo "done $WSREP_SST_OPT_GTID"
wsrep_log_info "Total time on donor: $totime seconds"
wsrep_log_info "mariabackup SST/IST completed on donor"
elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
then
else # joiner
[ -e "$SST_PROGRESS_FILE" ] && \
wsrep_log_info "Stale sst_in_progress file: $SST_PROGRESS_FILE"
[ -n "$SST_PROGRESS_FILE" ] && touch "$SST_PROGRESS_FILE"
......@@ -1199,6 +1178,7 @@ then
sleep 1
done
trap simple_cleanup EXIT
echo $$ > "$SST_PID"
stagemsg='Joiner-Recv'
......@@ -1208,7 +1188,7 @@ then
[ -f "$DATA/$IST_FILE" ] && rm -f "$DATA/$IST_FILE"
# May need xtrabackup_checkpoints later on
[ -f "$DATA/xtrabackup_binary" ] && rm -f "$DATA/xtrabackup_binary"
[ -f "$DATA/xtrabackup_binary" ] && rm -f "$DATA/xtrabackup_binary"
[ -f "$DATA/xtrabackup_galera_info" ] && rm -f "$DATA/xtrabackup_galera_info"
ADDR="$WSREP_SST_OPT_HOST"
......@@ -1235,7 +1215,6 @@ then
MY_SECRET="" # for check down in recv_joiner()
fi
trap sig_joiner_cleanup HUP PIPE INT TERM
trap cleanup_at_exit EXIT
if [ -n "$progress" ]; then
......@@ -1287,13 +1266,13 @@ then
cd "$DATA"
wsrep_log_info "Cleaning the old binary logs"
# If there is a file with binlogs state, delete it:
[ -f "$binlog_base.state" ] && rm -fv "$binlog_base.state" 1>&2
[ -f "$binlog_base.state" ] && rm -f "$binlog_base.state" >&2
# Clean up the old binlog files and index:
if [ -f "$binlog_index" ]; then
while read bin_file || [ -n "$bin_file" ]; do
rm -fv "$bin_file" 1>&2 || :
rm -f "$bin_file" >&2 || :
done < "$binlog_index"
rm -fv "$binlog_index" 1>&2
rm -f "$binlog_index" >&2
fi
if [ -n "$binlog_dir" -a "$binlog_dir" != '.' -a \
-d "$binlog_dir" ]
......@@ -1304,7 +1283,7 @@ then
"Cleaning the binlog directory '$binlog_dir' as well"
fi
fi
rm -fv "$binlog_base".[0-9]* 1>&2 || :
rm -f "$binlog_base".[0-9]* >&2 || :
cd "$OLD_PWD"
fi
......@@ -1315,13 +1294,13 @@ then
${ib_undo_dir:+"$ib_undo_dir"} \
${ib_log_dir:+"$ib_log_dir"} \
"$DATA" -mindepth 1 -prune -regex "$cpat" \
-o -exec rm -rfv {} 1>&2 \+
-o -exec rm -rf {} >&2 \+
else
find ${ib_home_dir:+"$ib_home_dir"} \
${ib_undo_dir:+"$ib_undo_dir"} \
${ib_log_dir:+"$ib_log_dir"} \
"$DATA" -mindepth 1 -prune -regex "$cpat" \
-o -exec rm -rfv {} 1>&2 \+
-o -exec rm -rf {} >&2 \+
fi
TDATA="$DATA"
......@@ -1395,7 +1374,6 @@ then
wsrep_log_info "Preparing the backup at $DATA"
setup_commands
timeit 'mariabackup prepare stage' "$INNOAPPLY"
if [ $? -ne 0 ]; then
wsrep_log_error "mariabackup apply finished with errors." \
"Check syslog or '$INNOAPPLYLOG' for details."
......@@ -1452,6 +1430,10 @@ then
else
wsrep_log_info "'$IST_FILE' received from donor: Running IST"
if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then
readonly WSREP_SST_OPT_BYPASS=1
readonly WSREP_TRANSFER_TYPE='IST'
fi
fi
......@@ -1460,12 +1442,13 @@ then
exit 2
fi
coords=$(cat "$MAGIC_FILE")
# Remove special tags from the magic file, and from the output:
coords=$(grep -v -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE")
wsrep_log_info "Galera co-ords from recovery: $coords"
cat "$MAGIC_FILE" # Output : UUID:seqno wsrep_gtid_domain_id
echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id
wsrep_log_info "Total time on joiner: $totime seconds"
wsrep_log_info "mariabackup SST/IST completed on joiner"
fi
wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE completed on $WSREP_SST_OPT_ROLE"
exit 0
......@@ -163,10 +163,11 @@ then
echo "$STOP_WSREP" && $MYSQLDUMP && echo "$CSV_TABLES_FIX" && \
echo "$RESTORE_GENERAL_LOG" && echo "$RESTORE_SLOW_QUERY_LOG" && \
echo "$SET_START_POSITION" && echo "$SET_WSREP_GTID_DOMAIN_ID" \
|| echo "SST failed to complete;") | $MYSQL
|| echo "SST failed to complete;") | $MYSQL || exit $?
else
wsrep_log_info "Bypassing state dump."
echo "$SET_START_POSITION" | $MYSQL
echo "$SET_START_POSITION" | $MYSQL || exit $?
fi
#
wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE completed on $WSREP_SST_OPT_ROLE"
exit 0
......@@ -34,6 +34,12 @@ wsrep_check_programs rsync
cleanup_joiner()
{
# Since this is invoked just after exit NNN
local estatus=$?
if [ $estatus -ne 0 ]; then
wsrep_log_error "Cleanup after exit with status: $estatus"
fi
local failure=0
[ "$(pwd)" != "$OLD_PWD" ] && cd "$OLD_PWD"
......@@ -72,7 +78,9 @@ cleanup_joiner()
wsrep_cleanup_progress_file
fi
[ -f "$SST_PID" ] && rm -f "$SST_PID"
[ -f "$SST_PID" ] && rm -f "$SST_PID" || :
exit $estatus
}
check_pid_and_port()
......@@ -310,6 +318,7 @@ if [ -n "$SSLMODE" -a "$SSLMODE" != 'DISABLED' ]; then
fi
readonly SECRET_TAG='secret'
readonly BYPASS_TAG='secret /bypass'
SST_PID="$WSREP_SST_OPT_DATA/wsrep_sst.pid"
......@@ -325,6 +334,7 @@ while check_pid "$SST_PID" 0; do
sleep 1
done
trap simple_cleanup EXIT
echo $$ > "$SST_PID"
# give some time for stunnel from the previous SST to complete:
......@@ -358,7 +368,7 @@ while check_pid "$RSYNC_PID" 1 "$RSYNC_CONF"; do
sleep 1
done
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
[ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE"
if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]; then
......@@ -390,7 +400,7 @@ EOF
ERROR="$WSREP_SST_OPT_DATA/sst_error"
[ -f "$FLUSHED" ] && rm -f "$FLUSHED"
[ -f "$ERROR" ] && rm -f "$ERROR"
[ -f "$ERROR" ] && rm -f "$ERROR"
echo 'flush tables'
......@@ -400,7 +410,7 @@ EOF
# (c) ERROR file, in case flush tables operation failed.
while [ ! -r "$FLUSHED" ] && \
! grep -q -F ':' '--' "$FLUSHED" >/dev/null 2>&1
! grep -q -F ':' -- "$FLUSHED" 2>/dev/null
do
# Check whether ERROR file exists.
if [ -f "$ERROR" ]; then
......@@ -440,10 +450,10 @@ EOF
tar_type=0
if tar --help | grep -qw -F -- '--transform'; then
tar_type=1
elif tar --version | grep -q -E '^bsdtar\>'; then
elif tar --version | grep -qw -E '^bsdtar'; then
tar_type=2
fi
if [ $tar_type -ne 2 ]; then
if [ $tar_type -eq 2 ]; then
if [ -n "$BASH_VERSION" ]; then
printf '%s' "$binlog_files" >&2
else
......@@ -508,9 +518,8 @@ EOF
fi
# Use deltaxfer only for WAN:
inv=$(basename "$0")
WHOLE_FILE_OPT=""
if [ "${inv%wsrep_sst_rsync_wan*}" = "$inv" ]; then
if [ "${WSREP_METHOD%_wan}" = "$WSREP_METHOD" ]; then
WHOLE_FILE_OPT='--whole-file'
fi
......@@ -620,7 +629,6 @@ FILTER="-f '- /lost+found'
wsrep_log_info "Transfer of data done"
else # BYPASS
wsrep_log_info "Bypassing state dump."
......@@ -641,6 +649,10 @@ FILTER="-f '- /lost+found'
echo "$SECRET_TAG $WSREP_SST_OPT_REMOTE_PSWD" >> "$MAGIC_FILE"
fi
if [ $WSREP_SST_OPT_BYPASS -ne 0 ]; then
echo "$BYPASS_TAG" >> "$MAGIC_FILE"
fi
rsync ${STUNNEL:+--rsh="$STUNNEL"} \
--archive --quiet --checksum "$MAGIC_FILE" \
"rsync://$WSREP_SST_OPT_ADDR" >&2 || RC=$?
......@@ -654,15 +666,11 @@ FILTER="-f '- /lost+found'
if [ -n "$STUNNEL" ]; then
[ -f "$STUNNEL_CONF" ] && rm -f "$STUNNEL_CONF"
[ -f "$STUNNEL_PID" ] && rm -f "$STUNNEL_PID"
[ -f "$STUNNEL_PID" ] && rm -f "$STUNNEL_PID"
fi
[ -f "$SST_PID" ] && rm -f "$SST_PID"
wsrep_log_info "rsync SST/IST completed on donor"
else # joiner
elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
then
check_sockets_utils
ADDR="$WSREP_SST_OPT_HOST"
......@@ -670,8 +678,6 @@ then
RSYNC_ADDR="$WSREP_SST_OPT_HOST"
RSYNC_ADDR_UNESCAPED="$WSREP_SST_OPT_HOST_UNESCAPED"
trap 'exit 32' HUP PIPE
trap 'exit 3' INT TERM ABRT
trap cleanup_joiner EXIT
touch "$SST_PROGRESS_FILE"
......@@ -820,8 +826,13 @@ EOF
fi
if [ -n "$MY_SECRET" ]; then
# Select the "secret" tag whose value does not start
# with a slash symbol. All new tags must to start with
# the space and the slash symbol after the word "secret" -
# to be removed by older versions of the SST scripts:
SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]+[^/]" \
-- "$MAGIC_FILE" || :)
# Check donor supplied secret:
SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE" || :)
SECRET=$(trim_string "${SECRET#$SECRET_TAG}")
if [ "$SECRET" != "$MY_SECRET" ]; then
wsrep_log_error "Donor does not know my secret!"
......@@ -830,34 +841,45 @@ EOF
fi
fi
if [ -n "$WSREP_SST_OPT_BINLOG" ]; then
binlog_tar_present=0
[ -f "$BINLOG_TAR_FILE" ] && binlog_tar_present=1
if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then
if grep -m1 -qE "^$BYPASS_TAG([[space]]+.*)?\$" -- "$MAGIC_FILE"; then
readonly WSREP_SST_OPT_BYPASS=1
readonly WSREP_TRANSFER_TYPE='IST'
fi
fi
binlog_tar_present=0
if [ -f "$BINLOG_TAR_FILE" ]; then
if [ $WSREP_SST_OPT_BYPASS -ne 0 ]; then
wsrep_log_warning "tar with binlogs transferred in the IST mode"
fi
binlog_tar_present=1
fi
if [ $WSREP_SST_OPT_BYPASS -eq 0 -a -n "$WSREP_SST_OPT_BINLOG" ]; then
# If it is SST (not an IST) or tar with binlogs is present
# among the transferred files, then we need to remove the
# old binlogs:
if [ $WSREP_SST_OPT_BYPASS -eq 0 -o $binlog_tar_present -ne 0 ]; then
cd "$DATA"
# Clean up the old binlog files and index:
binlog_index="$WSREP_SST_OPT_BINLOG_INDEX"
if [ -f "$binlog_index" ]; then
while read bin_file || [ -n "$bin_file" ]; do
rm -f "$bin_file" || :
done < "$binlog_index"
rm -f "$binlog_index"
fi
binlog_cd=0
# Change the directory to binlog base (if possible):
if [ -n "$binlog_dir" -a "$binlog_dir" != '.' -a \
-d "$binlog_dir" ]
then
binlog_cd=1
cd "$binlog_dir"
fi
# Clean up unindexed binlog files:
rm -f "$binlog_base".[0-9]* || :
[ $binlog_cd -ne 0 ] && cd "$DATA_DIR"
cd "$DATA"
# Clean up the old binlog files and index:
binlog_index="$WSREP_SST_OPT_BINLOG_INDEX"
if [ -f "$binlog_index" ]; then
while read bin_file || [ -n "$bin_file" ]; do
rm -f "$bin_file" || :
done < "$binlog_index"
rm -f "$binlog_index"
fi
binlog_cd=0
# Change the directory to binlog base (if possible):
if [ -n "$binlog_dir" -a "$binlog_dir" != '.' -a \
-d "$binlog_dir" ]
then
binlog_cd=1
cd "$binlog_dir"
fi
# Clean up unindexed binlog files:
rm -f "$binlog_base".[0-9]* || :
[ $binlog_cd -ne 0 ] && cd "$DATA_DIR"
if [ $binlog_tar_present -ne 0 ]; then
# Create a temporary file:
tmpdir=$(parse_cnf '--mysqld|sst' 'tmpdir')
......@@ -881,7 +903,7 @@ EOF
# Extracting binlog files:
wsrep_log_info "Extracting binlog files:"
RC=0
if tar --version | grep -q -E '^bsdtar\>'; then
if tar --version | grep -qw -E '^bsdtar'; then
tar -tf "$BINLOG_TAR_FILE" > "$tmpfile" && \
tar -xvf "$BINLOG_TAR_FILE" > /dev/null || RC=$?
else
......@@ -889,8 +911,8 @@ EOF
cat "$tmpfile" >&2 || RC=$?
fi
if [ $RC -ne 0 ]; then
rm -f "$tmpfile"
wsrep_log_error "Error unpacking tar file with binlog files"
rm -f "$tmpfile"
exit 32
fi
# Rebuild binlog index:
......@@ -903,24 +925,13 @@ EOF
fi
fi
if [ -n "$MY_SECRET" ]; then
# remove secret from the magic file, and output
# the UUID:seqno & wsrep_gtid_domain_id:
grep -v -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE"
else
# Output the UUID:seqno and wsrep_gtid_domain_id:
cat "$MAGIC_FILE"
fi
wsrep_log_info "rsync SST/IST completed on joiner"
# wsrep_cleanup_progress_file
# cleanup_joiner
else
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
exit 22 # EINVAL
# Remove special tags from the magic file, and from the output:
coords=$(grep -v -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE")
wsrep_log_info "Galera co-ords from recovery: $coords"
echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id
fi
[ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE"
wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE completed on $WSREP_SST_OPT_ROLE"
exit 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment