Commit 9c433432 authored by Andrei's avatar Andrei

MDEV-32365 detailize the semisync replication magic number error

Semisync ack (master side) receiver thread is made to report
details of faced errors.
In case of 'magic byte' error, a hexdump of the received packet
is always (level) NOTEd into the error log.
In other cases an exact server level error is print out
as a warning (as it may not be critical) under log_warnings > 2.

An MTR test added for the magic byte error. For others existing mtr
tests cover that, provided log_warnings > 2 is set.
parent cb4c2713
...@@ -8,6 +8,7 @@ connection server_1; ...@@ -8,6 +8,7 @@ connection server_1;
# Slaves which simulate an error will produce a timeout on the primary # Slaves which simulate an error will produce a timeout on the primary
call mtr.add_suppression("Timeout waiting"); call mtr.add_suppression("Timeout waiting");
call mtr.add_suppression("did not exit"); call mtr.add_suppression("did not exit");
call mtr.add_suppression("Got an error reading communication packets");
# Suppress slave errors related to the simulated error # Suppress slave errors related to the simulated error
connection server_2; connection server_2;
call mtr.add_suppression("reply failed"); call mtr.add_suppression("reply failed");
......
...@@ -31,6 +31,13 @@ connection slave; ...@@ -31,6 +31,13 @@ connection slave;
# Compare the tables on master and slave. # Compare the tables on master and slave.
include/diff_tables.inc [master:t1, slave:t1] include/diff_tables.inc [master:t1, slave:t1]
connection master; connection master;
set statement sql_log_bin=0 for call mtr.add_suppression("Read semi-sync reply magic number error");
SET @save_debug_master= @@global.debug;
SET GLOBAL debug_dbug="+d,semisync_corrupt_magic";
insert into t1 values (11);
connection slave;
connection master;
SET GLOBAL debug_dbug= @save_debug_master;
drop table t1; drop table t1;
connection slave; connection slave;
set global rpl_semi_sync_slave_enabled = OFF; set global rpl_semi_sync_slave_enabled = OFF;
......
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
--echo # Slaves which simulate an error will produce a timeout on the primary --echo # Slaves which simulate an error will produce a timeout on the primary
call mtr.add_suppression("Timeout waiting"); call mtr.add_suppression("Timeout waiting");
call mtr.add_suppression("did not exit"); call mtr.add_suppression("did not exit");
call mtr.add_suppression("Got an error reading communication packets");
--let $sav_master_timeout= `SELECT @@global.rpl_semi_sync_master_timeout` --let $sav_master_timeout= `SELECT @@global.rpl_semi_sync_master_timeout`
--let $sav_enabled_master= `SELECT @@GLOBAL.rpl_semi_sync_master_enabled` --let $sav_enabled_master= `SELECT @@GLOBAL.rpl_semi_sync_master_enabled`
......
...@@ -77,6 +77,16 @@ insert into t1 values (10); ...@@ -77,6 +77,16 @@ insert into t1 values (10);
--source include/diff_tables.inc --source include/diff_tables.inc
--connection master --connection master
set statement sql_log_bin=0 for call mtr.add_suppression("Read semi-sync reply magic number error");
SET @save_debug_master= @@global.debug;
SET GLOBAL debug_dbug="+d,semisync_corrupt_magic";
insert into t1 values (11);
--sync_slave_with_master
--connection master
SET GLOBAL debug_dbug= @save_debug_master;
drop table t1; drop table t1;
--sync_slave_with_master --sync_slave_with_master
set global rpl_semi_sync_slave_enabled = OFF; set global rpl_semi_sync_slave_enabled = OFF;
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <my_global.h> #include <my_global.h>
#include "semisync_master.h" #include "semisync_master.h"
#include <algorithm>
#include <mysql_com.h>
#define TIME_THOUSAND 1000 #define TIME_THOUSAND 1000
#define TIME_MILLION 1000000 #define TIME_MILLION 1000000
...@@ -562,6 +564,8 @@ int Repl_semi_sync_master::report_reply_packet(uint32 server_id, ...@@ -562,6 +564,8 @@ int Repl_semi_sync_master::report_reply_packet(uint32 server_id,
DBUG_ENTER("Repl_semi_sync_master::report_reply_packet"); DBUG_ENTER("Repl_semi_sync_master::report_reply_packet");
DBUG_EXECUTE_IF("semisync_corrupt_magic",
const_cast<uchar*>(packet)[REPLY_MAGIC_NUM_OFFSET]= 0;);
if (unlikely(packet[REPLY_MAGIC_NUM_OFFSET] != if (unlikely(packet[REPLY_MAGIC_NUM_OFFSET] !=
Repl_semi_sync_master::k_packet_magic_num)) Repl_semi_sync_master::k_packet_magic_num))
{ {
...@@ -593,9 +597,18 @@ int Repl_semi_sync_master::report_reply_packet(uint32 server_id, ...@@ -593,9 +597,18 @@ int Repl_semi_sync_master::report_reply_packet(uint32 server_id,
rpl_semi_sync_master_get_ack++; rpl_semi_sync_master_get_ack++;
report_reply_binlog(server_id, log_file_name, log_file_pos); report_reply_binlog(server_id, log_file_name, log_file_pos);
result= 0;
l_end: l_end:
if (result == -1)
{
char buf[256];
octet2hex(buf, (const char*) packet, std::min(static_cast<ulong>(sizeof(buf)-1),
packet_len));
sql_print_information("First bytes of the packet from semisync slave "
"server-id %d: %s", server_id, buf);
}
DBUG_RETURN(result); DBUG_RETURN(result);
} }
......
...@@ -277,8 +277,18 @@ void Ack_receiver::run() ...@@ -277,8 +277,18 @@ void Ack_receiver::run()
if (likely(len != packet_error)) if (likely(len != packet_error))
repl_semisync_master.report_reply_packet(slave->server_id(), repl_semisync_master.report_reply_packet(slave->server_id(),
net.read_pos, len); net.read_pos, len);
else if (net.last_errno == ER_NET_READ_ERROR) else
listener.clear_socket_info(slave); {
if (net.last_errno == ER_NET_READ_ERROR)
{
listener.clear_socket_info(slave);
}
if (net.last_errno > 0 && global_system_variables.log_warnings > 2)
sql_print_warning("Semisync ack receiver got error %d \"%s\" "
"from slave server-id %d",
net.last_errno, ER_DEFAULT(net.last_errno),
slave->server_id());
}
} }
} }
mysql_mutex_unlock(&m_mutex); mysql_mutex_unlock(&m_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment