Commit b3dd95e0 authored by Sujatha's avatar Sujatha

MDEV-14203: rpl.rpl_extra_col_master_myisam,...

MDEV-14203: rpl.rpl_extra_col_master_myisam, rpl.rpl_slave_load_tmpdir_not_exist failed in buildbot with a warning

Problem:
=======
rpl.rpl_slave_load_tmpdir_not_exist 'stmt' w3 [ fail ]  Found warnings/errors
in server log file!

Test ended at 2017-09-27 20:34:55
[Warning] Master is configured to log replication events with checksum, but
will not send such events to slaves that cannot process them
^ Found warnings in /mnt/buildbot/build/mariadb-10.2.10/mysql-test/var/3/log/mysqld.1.err
ok
Analysis:
========
When slave tries to connect to master 'get_master_version_and_clock' function
is invoked to perform elaborated slave-master handshake. During this process
slave server queries master server, to know if it is checksum aware and at the
same time master is notified about its CRC-awareness. The master's side
instant value of @@global.binlog_checksum is stored in the dump thread's
uservar area as well as cached locally to become known in consensus by master
and slave.

Post hand-shake slave requests master for binlog dump. It sends
'COM_BINLOG_DUMP'. This command is sent to master by 'cli_advanced_command'
call. If there is some temporary network failure during this request_dump
call, 'end_server' is invoked to close the current connection between master
and slave. Upon connection close the dump thread on the master gets terminated
and it clears the 'uservar' data it got through master-slave handshake.

The 'COM_BINLOG_DUMP' command is sent once again without master-slave
handshake. Since the checksum data is not available with new dump thread a
warning gets reported.

Fix:
===
Upon network write error donot attempt reconnect, proceed to master-slave
handshake. This ensures that master is aware of slave's capability to use
checksums.
parent 3a8943ae
include/master-slave.inc
[connection master]
CREATE TABLE t1(a int);
INSERT INTO t1 VALUES(1);
connection slave;
SET @saved_dbug = @@GLOBAL.debug_dbug;
SET @@global.debug_dbug= 'd,simulate_error_on_packet_write';
START SLAVE;
SET DEBUG_SYNC= 'now WAIT_FOR parked';
SET @@GLOBAL.debug_dbug = @saved_dbug;
SET DEBUG_SYNC= 'now SIGNAL continue';
SET DEBUG_SYNC= 'RESET';
include/wait_for_slave_io_to_start.inc
include/wait_for_slave_sql_to_start.inc
connection master;
include/sync_slave_sql_with_master.inc
SELECT * FROM t1;
a
1
connection master;
DROP TABLE t1;
include/rpl_end.inc
# ==== Purpose ====
#
# Test verifies that, due to a temporary network error, if request dump
# command specific packet write operation fails then the write error gets
# handled appropriately. Further retry will be initiated with appropriate
# slave registration on master. This will ensure that master has all the
# details of slave and no warnings are reported on the master side.
#
# ==== Implementation ====
#
# Steps:
# 0 - Skip the slave start.
# 1 - Enable debug simulation which will simulate packet write error during
# dump request command execution.
# 2 - Start the slave. Observe that slave is able to reconnect post
# temporary network write error.
#
# ==== References ====
#
# MDEV-14203: rpl.rpl_extra_col_master_myisam,
# rpl.rpl_slave_load_tmpdir_not_exist failed in buildbot with a
# warning
#
# MDEV-13258: rpl.rpl_skip_replication, rpl.rpl_set_statement_default_master
# failed in buildbot
#
--source include/have_debug.inc
--source include/have_debug_sync.inc
--let $rpl_skip_start_slave=1
--source include/master-slave.inc
# Do an insert on master
CREATE TABLE t1(a int);
INSERT INTO t1 VALUES(1);
# Add a debug point and start the slave so that dump request fails.
connection slave;
SET @saved_dbug = @@GLOBAL.debug_dbug;
SET @@global.debug_dbug= 'd,simulate_error_on_packet_write';
START SLAVE;
SET DEBUG_SYNC= 'now WAIT_FOR parked';
SET @@GLOBAL.debug_dbug = @saved_dbug;
SET DEBUG_SYNC= 'now SIGNAL continue';
SET DEBUG_SYNC= 'RESET';
--source include/wait_for_slave_io_to_start.inc
--source include/wait_for_slave_sql_to_start.inc
# Sync the slave and verify that slave has caught up with the master.
connection master;
--source include/sync_slave_sql_with_master.inc
SELECT * FROM t1;
# Cleanup
connection master;
DROP TABLE t1;
--source include/rpl_end.inc
......@@ -721,6 +721,8 @@ cli_advanced_command(MYSQL *mysql, enum enum_server_command command,
set_mysql_error(mysql, CR_NET_PACKET_TOO_LARGE, unknown_sqlstate);
goto end;
}
if (net->last_errno == ER_NET_ERROR_ON_WRITE && command == COM_BINLOG_DUMP)
goto end;
end_server(mysql);
if (mysql_reconnect(mysql) || stmt_skip)
goto end;
......
......@@ -45,6 +45,7 @@
#include <violite.h>
#include <signal.h>
#include "probes_mysql.h"
#include <debug_sync.h>
#ifdef EMBEDDED_LIBRARY
#undef MYSQL_SERVER
......@@ -486,6 +487,17 @@ net_write_command(NET *net,uchar command,
DBUG_ENTER("net_write_command");
DBUG_PRINT("enter",("length: %lu", (ulong) len));
DBUG_EXECUTE_IF("simulate_error_on_packet_write",
{
if (command == COM_BINLOG_DUMP)
{
net->last_errno = ER_NET_ERROR_ON_WRITE;
DBUG_ASSERT(!debug_sync_set_action(
(THD *)net->thd,
STRING_WITH_LEN("now SIGNAL parked WAIT_FOR continue")));
DBUG_RETURN(true);
}
};);
MYSQL_NET_WRITE_START(length);
buff[4]=command; /* For first packet */
......
......@@ -3297,7 +3297,8 @@ static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
in the future, we should do a better error analysis, but for
now we just fill up the error log :-)
*/
if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED ||
mysql_errno(mysql) == ER_NET_ERROR_ON_WRITE)
*suppress_warnings= TRUE; // Suppress reconnect warning
else
sql_print_error("Error on COM_BINLOG_DUMP: %d %s, will retry in %d secs",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment