Commit f0deff86 authored by unknown's avatar unknown

MDEV-4820: Empty master does not give error for slave GTID position that does...

MDEV-4820: Empty master does not give error for slave GTID position that does not exist in the binlog

The main bug here was the following situation:

Suppose we set up a completely new master2 as an extra multi-master to an
existing slave that already has a different master1 for domain_id=0. When the
slave tries to connect to master2, master2 will not have anything that slave
requests in domain_id=0, but that is fine as master2 is supposedly meant to
serve eg. domain_id=1. (This is MDEV-4485).

But suppose that master2 then actually starts sending events from
domain_id=0. In this case, the fix for MDEV-4485 was incomplete, and the code
would fail to give the error that the position requested by the slave in
domain_id=0 was missing from the binlogs of master2. This could lead to lost
events or completely wrong replication.

The patch for this bug fixes this issue.

In addition, it cleans up the code a bit, getting rid of the fake_gtid_hash in
the code. And the error message when slave and master have diverged due to
alternate future is clarified, as requested in the bug description.
parent f08946c0
include/rpl_init.inc [topology=1->2]
include/stop_slave.inc
SET @slave_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CHANGE MASTER TO master_use_gtid=slave_pos;
include/start_slave.inc
SET @master_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (1);
SELECT * FROM t1 ORDER BY a;
a
1
include/stop_slave.inc
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
RESET MASTER;
SET GLOBAL gtid_slave_pos= 'OLD_GTID_POS';
include/start_slave.inc
INSERT INTO t1 VALUES (4);
SET sql_log_bin= 0;
CALL mtr.add_suppression("The binlog on the master is missing the GTID");
SET sql_log_bin= 1;
include/wait_for_slave_io_error.inc [errno=1236]
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_slave_pos= 'OLD_GTID_POS';
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
4
include/stop_slave.inc
RESET SLAVE ALL;
RESET MASTER;
SET GLOBAL gtid_slave_pos= '0-2-10';
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_2,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
INSERT INTO t1 VALUES (11);
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
include/wait_for_slave_io_error.inc [errno=1236]
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_slave_pos= '0-2-10';
SET GLOBAL gtid_strict_mode= 0;
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
11
include/stop_slave.inc
RESET SLAVE ALL;
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (22);
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_1,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
include/wait_for_slave_io_error.inc [errno=1236]
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_strict_mode= 0;
CHANGE MASTER TO master_use_gtid=SLAVE_POS;
SET GLOBAL gtid_slave_pos= 'OLD_GTID_POS';
Warnings:
Warning 1947 Specified GTID OLD_GTID_POS conflicts with the binary log which contains a more recent GTID 0-2-12. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos.
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
4
11
12
22
SET GLOBAL gtid_strict_mode= @slave_old_strict;
DROP TABLE t1;
SET GLOBAL gtid_strict_mode= @master_old_strict;
include/rpl_end.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
--connection server_2
--source include/stop_slave.inc
SET @slave_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CHANGE MASTER TO master_use_gtid=slave_pos;
--source include/start_slave.inc
--connection server_1
SET @master_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (1);
--save_master_pos
--connection server_2
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
--connection server_1
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
--let $old_gtid_pos= `SELECT @@GLOBAL.gtid_current_pos`
RESET MASTER;
--replace_result $old_gtid_pos OLD_GTID_POS
eval SET GLOBAL gtid_slave_pos= '$old_gtid_pos';
--connection server_2
--source include/start_slave.inc
--connection server_1
INSERT INTO t1 VALUES (4);
--save_master_pos
--connection server_2
SET sql_log_bin= 0;
CALL mtr.add_suppression("The binlog on the master is missing the GTID");
SET sql_log_bin= 1;
--let $slave_io_errno=1236
--source include/wait_for_slave_io_error.inc
STOP SLAVE SQL_THREAD;
--replace_result $old_gtid_pos OLD_GTID_POS
eval SET GLOBAL gtid_slave_pos= '$old_gtid_pos';
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
RESET SLAVE ALL;
RESET MASTER;
SET GLOBAL gtid_slave_pos= '0-2-10';
--connection server_1
--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_2,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
--connection server_2
INSERT INTO t1 VALUES (11);
--save_master_pos
--connection server_1
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
--let $slave_io_errno=1236
--source include/wait_for_slave_io_error.inc
--connection server_1
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_slave_pos= '0-2-10';
SET GLOBAL gtid_strict_mode= 0;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
RESET SLAVE ALL;
--let $old_gtid_pos= `SELECT @@GLOBAL.gtid_current_pos`
INSERT INTO t1 VALUES (12);
--save_master_pos
--connection server_2
INSERT INTO t1 VALUES (22);
--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_1,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
--let $slave_io_errno=1236
--source include/wait_for_slave_io_error.inc
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_strict_mode= 0;
CHANGE MASTER TO master_use_gtid=SLAVE_POS;
--replace_result $old_gtid_pos OLD_GTID_POS
eval SET GLOBAL gtid_slave_pos= '$old_gtid_pos';
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
# Clean up.
--connection server_2
SET GLOBAL gtid_strict_mode= @slave_old_strict;
--connection server_1
DROP TABLE t1;
SET GLOBAL gtid_strict_mode= @master_old_strict;
--source include/rpl_end.inc
...@@ -1237,8 +1237,8 @@ rpl_binlog_state::append_pos(String *str) ...@@ -1237,8 +1237,8 @@ rpl_binlog_state::append_pos(String *str)
slave_connection_state::slave_connection_state() slave_connection_state::slave_connection_state()
{ {
my_hash_init(&hash, &my_charset_bin, 32, my_hash_init(&hash, &my_charset_bin, 32,
offsetof(rpl_gtid, domain_id), sizeof(uint32), NULL, my_free, offsetof(entry, gtid) + offsetof(rpl_gtid, domain_id),
HASH_UNIQUE); sizeof(uint32), NULL, my_free, HASH_UNIQUE);
} }
...@@ -1272,7 +1272,7 @@ slave_connection_state::load(char *slave_request, size_t len) ...@@ -1272,7 +1272,7 @@ slave_connection_state::load(char *slave_request, size_t len)
char *p, *end; char *p, *end;
uchar *rec; uchar *rec;
rpl_gtid *gtid; rpl_gtid *gtid;
const rpl_gtid *gtid2; const entry *e;
reset(); reset();
p= slave_request; p= slave_request;
...@@ -1281,27 +1281,28 @@ slave_connection_state::load(char *slave_request, size_t len) ...@@ -1281,27 +1281,28 @@ slave_connection_state::load(char *slave_request, size_t len)
return 0; return 0;
for (;;) for (;;)
{ {
if (!(rec= (uchar *)my_malloc(sizeof(*gtid), MYF(MY_WME)))) if (!(rec= (uchar *)my_malloc(sizeof(entry), MYF(MY_WME))))
{ {
my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*gtid)); my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*gtid));
return 1; return 1;
} }
gtid= (rpl_gtid *)rec; gtid= &((entry *)rec)->gtid;
if (gtid_parser_helper(&p, end, gtid)) if (gtid_parser_helper(&p, end, gtid))
{ {
my_free(rec); my_free(rec);
my_error(ER_INCORRECT_GTID_STATE, MYF(0)); my_error(ER_INCORRECT_GTID_STATE, MYF(0));
return 1; return 1;
} }
if ((gtid2= (const rpl_gtid *) if ((e= (const entry *)
my_hash_search(&hash, (const uchar *)(&gtid->domain_id), 0))) my_hash_search(&hash, (const uchar *)(&gtid->domain_id), 0)))
{ {
my_error(ER_DUPLICATE_GTID_DOMAIN, MYF(0), gtid->domain_id, my_error(ER_DUPLICATE_GTID_DOMAIN, MYF(0), gtid->domain_id,
gtid->server_id, (ulonglong)gtid->seq_no, gtid2->domain_id, gtid->server_id, (ulonglong)gtid->seq_no, e->gtid.domain_id,
gtid2->server_id, (ulonglong)gtid2->seq_no, gtid->domain_id); e->gtid.server_id, (ulonglong)e->gtid.seq_no, gtid->domain_id);
my_free(rec); my_free(rec);
return 1; return 1;
} }
((entry *)rec)->flags= 0;
if (my_hash_insert(&hash, rec)) if (my_hash_insert(&hash, rec))
{ {
my_free(rec); my_free(rec);
...@@ -1357,30 +1358,42 @@ slave_connection_state::load(rpl_slave_state *state, ...@@ -1357,30 +1358,42 @@ slave_connection_state::load(rpl_slave_state *state,
} }
slave_connection_state::entry *
slave_connection_state::find_entry(uint32 domain_id)
{
return (entry *) my_hash_search(&hash, (const uchar *)(&domain_id), 0);
}
rpl_gtid * rpl_gtid *
slave_connection_state::find(uint32 domain_id) slave_connection_state::find(uint32 domain_id)
{ {
return (rpl_gtid *) my_hash_search(&hash, (const uchar *)(&domain_id), 0); entry *e= find_entry(domain_id);
if (!e)
return NULL;
return &e->gtid;
} }
int int
slave_connection_state::update(const rpl_gtid *in_gtid) slave_connection_state::update(const rpl_gtid *in_gtid)
{ {
rpl_gtid *new_gtid; entry *e;
uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0); uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0);
if (rec) if (rec)
{ {
memcpy(rec, in_gtid, sizeof(*in_gtid)); e= (entry *)rec;
e->gtid= *in_gtid;
return 0; return 0;
} }
if (!(new_gtid= (rpl_gtid *)my_malloc(sizeof(*new_gtid), MYF(MY_WME)))) if (!(e= (entry *)my_malloc(sizeof(*e), MYF(MY_WME))))
return 1; return 1;
memcpy(new_gtid, in_gtid, sizeof(*new_gtid)); e->gtid= *in_gtid;
if (my_hash_insert(&hash, (uchar *)new_gtid)) e->flags= 0;
if (my_hash_insert(&hash, (uchar *)e))
{ {
my_free(new_gtid); my_free(e);
return 1; return 1;
} }
...@@ -1394,7 +1407,7 @@ slave_connection_state::remove(const rpl_gtid *in_gtid) ...@@ -1394,7 +1407,7 @@ slave_connection_state::remove(const rpl_gtid *in_gtid)
uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0); uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0);
#ifndef DBUG_OFF #ifndef DBUG_OFF
bool err; bool err;
rpl_gtid *slave_gtid= (rpl_gtid *)rec; rpl_gtid *slave_gtid= &((entry *)rec)->gtid;
DBUG_ASSERT(rec /* We should never try to remove not present domain_id. */); DBUG_ASSERT(rec /* We should never try to remove not present domain_id. */);
DBUG_ASSERT(slave_gtid->server_id == in_gtid->server_id); DBUG_ASSERT(slave_gtid->server_id == in_gtid->server_id);
DBUG_ASSERT(slave_gtid->seq_no == in_gtid->seq_no); DBUG_ASSERT(slave_gtid->seq_no == in_gtid->seq_no);
...@@ -1423,8 +1436,8 @@ slave_connection_state::append_to_string(String *out_str) ...@@ -1423,8 +1436,8 @@ slave_connection_state::append_to_string(String *out_str)
first= true; first= true;
for (i= 0; i < hash.records; ++i) for (i= 0; i < hash.records; ++i)
{ {
const rpl_gtid *gtid= (const rpl_gtid *)my_hash_element(&hash, i); const entry *e= (const entry *)my_hash_element(&hash, i);
if (rpl_slave_state_tostring_helper(out_str, gtid, &first)) if (rpl_slave_state_tostring_helper(out_str, &e->gtid, &first))
return 1; return 1;
} }
return 0; return 0;
......
...@@ -174,7 +174,14 @@ struct rpl_binlog_state ...@@ -174,7 +174,14 @@ struct rpl_binlog_state
*/ */
struct slave_connection_state struct slave_connection_state
{ {
/* Mapping from domain_id to the GTID requested for that domain. */ struct entry {
rpl_gtid gtid;
uint32 flags;
};
static const uint32 START_OWN_SLAVE_POS= 0x1;
static const uint32 START_ON_EMPTY_DOMAIN= 0x2;
/* Mapping from domain_id to the entry with GTID requested for that domain. */
HASH hash; HASH hash;
slave_connection_state(); slave_connection_state();
...@@ -185,6 +192,7 @@ struct slave_connection_state ...@@ -185,6 +192,7 @@ struct slave_connection_state
int load(const rpl_gtid *gtid_list, uint32 count); int load(const rpl_gtid *gtid_list, uint32 count);
int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra); int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
rpl_gtid *find(uint32 domain_id); rpl_gtid *find(uint32 domain_id);
entry *find_entry(uint32 domain_id);
int update(const rpl_gtid *in_gtid); int update(const rpl_gtid *in_gtid);
void remove(const rpl_gtid *gtid); void remove(const rpl_gtid *gtid);
ulong count() const { return hash.records; } ulong count() const { return hash.records; }
......
...@@ -6555,3 +6555,5 @@ ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO ...@@ -6555,3 +6555,5 @@ ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO
eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a transaction" eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a transaction"
ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO
eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger" eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger"
ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2
eng "Connecting slave requested to start from GTID %u-%u-%llu, which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged due to executing extra errorneous transactions"
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment