Commit f0deff86 authored by unknown's avatar unknown

MDEV-4820: Empty master does not give error for slave GTID position that does...

MDEV-4820: Empty master does not give error for slave GTID position that does not exist in the binlog

The main bug here was the following situation:

Suppose we set up a completely new master2 as an extra multi-master to an
existing slave that already has a different master1 for domain_id=0. When the
slave tries to connect to master2, master2 will not have anything that slave
requests in domain_id=0, but that is fine as master2 is supposedly meant to
serve eg. domain_id=1. (This is MDEV-4485).

But suppose that master2 then actually starts sending events from
domain_id=0. In this case, the fix for MDEV-4485 was incomplete, and the code
would fail to give the error that the position requested by the slave in
domain_id=0 was missing from the binlogs of master2. This could lead to lost
events or completely wrong replication.

The patch for this bug fixes this issue.

In addition, it cleans up the code a bit, getting rid of the fake_gtid_hash in
the code. And the error message when slave and master have diverged due to
alternate future is clarified, as requested in the bug description.
parent f08946c0
include/rpl_init.inc [topology=1->2]
include/stop_slave.inc
SET @slave_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CHANGE MASTER TO master_use_gtid=slave_pos;
include/start_slave.inc
SET @master_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (1);
SELECT * FROM t1 ORDER BY a;
a
1
include/stop_slave.inc
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
RESET MASTER;
SET GLOBAL gtid_slave_pos= 'OLD_GTID_POS';
include/start_slave.inc
INSERT INTO t1 VALUES (4);
SET sql_log_bin= 0;
CALL mtr.add_suppression("The binlog on the master is missing the GTID");
SET sql_log_bin= 1;
include/wait_for_slave_io_error.inc [errno=1236]
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_slave_pos= 'OLD_GTID_POS';
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
4
include/stop_slave.inc
RESET SLAVE ALL;
RESET MASTER;
SET GLOBAL gtid_slave_pos= '0-2-10';
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_2,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
INSERT INTO t1 VALUES (11);
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
include/wait_for_slave_io_error.inc [errno=1236]
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_slave_pos= '0-2-10';
SET GLOBAL gtid_strict_mode= 0;
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
11
include/stop_slave.inc
RESET SLAVE ALL;
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (22);
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_1,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
include/wait_for_slave_io_error.inc [errno=1236]
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_strict_mode= 0;
CHANGE MASTER TO master_use_gtid=SLAVE_POS;
SET GLOBAL gtid_slave_pos= 'OLD_GTID_POS';
Warnings:
Warning 1947 Specified GTID OLD_GTID_POS conflicts with the binary log which contains a more recent GTID 0-2-12. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos.
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
4
11
12
22
SET GLOBAL gtid_strict_mode= @slave_old_strict;
DROP TABLE t1;
SET GLOBAL gtid_strict_mode= @master_old_strict;
include/rpl_end.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
--connection server_2
--source include/stop_slave.inc
SET @slave_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CHANGE MASTER TO master_use_gtid=slave_pos;
--source include/start_slave.inc
--connection server_1
SET @master_old_strict= @@GLOBAL.gtid_strict_mode;
SET GLOBAL gtid_strict_mode= 1;
CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (1);
--save_master_pos
--connection server_2
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
--connection server_1
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
--let $old_gtid_pos= `SELECT @@GLOBAL.gtid_current_pos`
RESET MASTER;
--replace_result $old_gtid_pos OLD_GTID_POS
eval SET GLOBAL gtid_slave_pos= '$old_gtid_pos';
--connection server_2
--source include/start_slave.inc
--connection server_1
INSERT INTO t1 VALUES (4);
--save_master_pos
--connection server_2
SET sql_log_bin= 0;
CALL mtr.add_suppression("The binlog on the master is missing the GTID");
SET sql_log_bin= 1;
--let $slave_io_errno=1236
--source include/wait_for_slave_io_error.inc
STOP SLAVE SQL_THREAD;
--replace_result $old_gtid_pos OLD_GTID_POS
eval SET GLOBAL gtid_slave_pos= '$old_gtid_pos';
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
RESET SLAVE ALL;
RESET MASTER;
SET GLOBAL gtid_slave_pos= '0-2-10';
--connection server_1
--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_2,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
--connection server_2
INSERT INTO t1 VALUES (11);
--save_master_pos
--connection server_1
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
--let $slave_io_errno=1236
--source include/wait_for_slave_io_error.inc
--connection server_1
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_slave_pos= '0-2-10';
SET GLOBAL gtid_strict_mode= 0;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
RESET SLAVE ALL;
--let $old_gtid_pos= `SELECT @@GLOBAL.gtid_current_pos`
INSERT INTO t1 VALUES (12);
--save_master_pos
--connection server_2
INSERT INTO t1 VALUES (22);
--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_1,
master_user= 'root', master_use_gtid=CURRENT_POS;
START SLAVE;
SET sql_log_bin= 0;
CALL mtr.add_suppression("which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged");
SET sql_log_bin= 1;
--let $slave_io_errno=1236
--source include/wait_for_slave_io_error.inc
STOP SLAVE SQL_THREAD;
SET GLOBAL gtid_strict_mode= 0;
CHANGE MASTER TO master_use_gtid=SLAVE_POS;
--replace_result $old_gtid_pos OLD_GTID_POS
eval SET GLOBAL gtid_slave_pos= '$old_gtid_pos';
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
# Clean up.
--connection server_2
SET GLOBAL gtid_strict_mode= @slave_old_strict;
--connection server_1
DROP TABLE t1;
SET GLOBAL gtid_strict_mode= @master_old_strict;
--source include/rpl_end.inc
...@@ -1237,8 +1237,8 @@ rpl_binlog_state::append_pos(String *str) ...@@ -1237,8 +1237,8 @@ rpl_binlog_state::append_pos(String *str)
slave_connection_state::slave_connection_state() slave_connection_state::slave_connection_state()
{ {
my_hash_init(&hash, &my_charset_bin, 32, my_hash_init(&hash, &my_charset_bin, 32,
offsetof(rpl_gtid, domain_id), sizeof(uint32), NULL, my_free, offsetof(entry, gtid) + offsetof(rpl_gtid, domain_id),
HASH_UNIQUE); sizeof(uint32), NULL, my_free, HASH_UNIQUE);
} }
...@@ -1272,7 +1272,7 @@ slave_connection_state::load(char *slave_request, size_t len) ...@@ -1272,7 +1272,7 @@ slave_connection_state::load(char *slave_request, size_t len)
char *p, *end; char *p, *end;
uchar *rec; uchar *rec;
rpl_gtid *gtid; rpl_gtid *gtid;
const rpl_gtid *gtid2; const entry *e;
reset(); reset();
p= slave_request; p= slave_request;
...@@ -1281,27 +1281,28 @@ slave_connection_state::load(char *slave_request, size_t len) ...@@ -1281,27 +1281,28 @@ slave_connection_state::load(char *slave_request, size_t len)
return 0; return 0;
for (;;) for (;;)
{ {
if (!(rec= (uchar *)my_malloc(sizeof(*gtid), MYF(MY_WME)))) if (!(rec= (uchar *)my_malloc(sizeof(entry), MYF(MY_WME))))
{ {
my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*gtid)); my_error(ER_OUTOFMEMORY, MYF(0), sizeof(*gtid));
return 1; return 1;
} }
gtid= (rpl_gtid *)rec; gtid= &((entry *)rec)->gtid;
if (gtid_parser_helper(&p, end, gtid)) if (gtid_parser_helper(&p, end, gtid))
{ {
my_free(rec); my_free(rec);
my_error(ER_INCORRECT_GTID_STATE, MYF(0)); my_error(ER_INCORRECT_GTID_STATE, MYF(0));
return 1; return 1;
} }
if ((gtid2= (const rpl_gtid *) if ((e= (const entry *)
my_hash_search(&hash, (const uchar *)(&gtid->domain_id), 0))) my_hash_search(&hash, (const uchar *)(&gtid->domain_id), 0)))
{ {
my_error(ER_DUPLICATE_GTID_DOMAIN, MYF(0), gtid->domain_id, my_error(ER_DUPLICATE_GTID_DOMAIN, MYF(0), gtid->domain_id,
gtid->server_id, (ulonglong)gtid->seq_no, gtid2->domain_id, gtid->server_id, (ulonglong)gtid->seq_no, e->gtid.domain_id,
gtid2->server_id, (ulonglong)gtid2->seq_no, gtid->domain_id); e->gtid.server_id, (ulonglong)e->gtid.seq_no, gtid->domain_id);
my_free(rec); my_free(rec);
return 1; return 1;
} }
((entry *)rec)->flags= 0;
if (my_hash_insert(&hash, rec)) if (my_hash_insert(&hash, rec))
{ {
my_free(rec); my_free(rec);
...@@ -1357,30 +1358,42 @@ slave_connection_state::load(rpl_slave_state *state, ...@@ -1357,30 +1358,42 @@ slave_connection_state::load(rpl_slave_state *state,
} }
slave_connection_state::entry *
slave_connection_state::find_entry(uint32 domain_id)
{
return (entry *) my_hash_search(&hash, (const uchar *)(&domain_id), 0);
}
rpl_gtid * rpl_gtid *
slave_connection_state::find(uint32 domain_id) slave_connection_state::find(uint32 domain_id)
{ {
return (rpl_gtid *) my_hash_search(&hash, (const uchar *)(&domain_id), 0); entry *e= find_entry(domain_id);
if (!e)
return NULL;
return &e->gtid;
} }
int int
slave_connection_state::update(const rpl_gtid *in_gtid) slave_connection_state::update(const rpl_gtid *in_gtid)
{ {
rpl_gtid *new_gtid; entry *e;
uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0); uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0);
if (rec) if (rec)
{ {
memcpy(rec, in_gtid, sizeof(*in_gtid)); e= (entry *)rec;
e->gtid= *in_gtid;
return 0; return 0;
} }
if (!(new_gtid= (rpl_gtid *)my_malloc(sizeof(*new_gtid), MYF(MY_WME)))) if (!(e= (entry *)my_malloc(sizeof(*e), MYF(MY_WME))))
return 1; return 1;
memcpy(new_gtid, in_gtid, sizeof(*new_gtid)); e->gtid= *in_gtid;
if (my_hash_insert(&hash, (uchar *)new_gtid)) e->flags= 0;
if (my_hash_insert(&hash, (uchar *)e))
{ {
my_free(new_gtid); my_free(e);
return 1; return 1;
} }
...@@ -1394,7 +1407,7 @@ slave_connection_state::remove(const rpl_gtid *in_gtid) ...@@ -1394,7 +1407,7 @@ slave_connection_state::remove(const rpl_gtid *in_gtid)
uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0); uchar *rec= my_hash_search(&hash, (const uchar *)(&in_gtid->domain_id), 0);
#ifndef DBUG_OFF #ifndef DBUG_OFF
bool err; bool err;
rpl_gtid *slave_gtid= (rpl_gtid *)rec; rpl_gtid *slave_gtid= &((entry *)rec)->gtid;
DBUG_ASSERT(rec /* We should never try to remove not present domain_id. */); DBUG_ASSERT(rec /* We should never try to remove not present domain_id. */);
DBUG_ASSERT(slave_gtid->server_id == in_gtid->server_id); DBUG_ASSERT(slave_gtid->server_id == in_gtid->server_id);
DBUG_ASSERT(slave_gtid->seq_no == in_gtid->seq_no); DBUG_ASSERT(slave_gtid->seq_no == in_gtid->seq_no);
...@@ -1423,8 +1436,8 @@ slave_connection_state::append_to_string(String *out_str) ...@@ -1423,8 +1436,8 @@ slave_connection_state::append_to_string(String *out_str)
first= true; first= true;
for (i= 0; i < hash.records; ++i) for (i= 0; i < hash.records; ++i)
{ {
const rpl_gtid *gtid= (const rpl_gtid *)my_hash_element(&hash, i); const entry *e= (const entry *)my_hash_element(&hash, i);
if (rpl_slave_state_tostring_helper(out_str, gtid, &first)) if (rpl_slave_state_tostring_helper(out_str, &e->gtid, &first))
return 1; return 1;
} }
return 0; return 0;
......
...@@ -174,7 +174,14 @@ struct rpl_binlog_state ...@@ -174,7 +174,14 @@ struct rpl_binlog_state
*/ */
struct slave_connection_state struct slave_connection_state
{ {
/* Mapping from domain_id to the GTID requested for that domain. */ struct entry {
rpl_gtid gtid;
uint32 flags;
};
static const uint32 START_OWN_SLAVE_POS= 0x1;
static const uint32 START_ON_EMPTY_DOMAIN= 0x2;
/* Mapping from domain_id to the entry with GTID requested for that domain. */
HASH hash; HASH hash;
slave_connection_state(); slave_connection_state();
...@@ -185,6 +192,7 @@ struct slave_connection_state ...@@ -185,6 +192,7 @@ struct slave_connection_state
int load(const rpl_gtid *gtid_list, uint32 count); int load(const rpl_gtid *gtid_list, uint32 count);
int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra); int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
rpl_gtid *find(uint32 domain_id); rpl_gtid *find(uint32 domain_id);
entry *find_entry(uint32 domain_id);
int update(const rpl_gtid *in_gtid); int update(const rpl_gtid *in_gtid);
void remove(const rpl_gtid *gtid); void remove(const rpl_gtid *gtid);
ulong count() const { return hash.records; } ulong count() const { return hash.records; }
......
...@@ -6555,3 +6555,5 @@ ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO ...@@ -6555,3 +6555,5 @@ ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO
eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a transaction" eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a transaction"
ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO ER_STORED_FUNCTION_PREVENTS_SWITCH_GTID_DOMAIN_ID_SEQ_NO
eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger" eng "Cannot modify @@session.gtid_domain_id or @@session.gtid_seq_no inside a stored function or trigger"
ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2
eng "Connecting slave requested to start from GTID %u-%u-%llu, which is not in the master's binlog. Since the master's binlog contains GTIDs with higher sequence numbers, it probably means that the slave has diverged due to executing extra errorneous transactions"
...@@ -884,6 +884,28 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) ...@@ -884,6 +884,28 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev)
} }
static void
give_error_start_pos_missing_in_binlog(int *err, const char **errormsg,
rpl_gtid *error_gtid)
{
rpl_gtid binlog_gtid;
if (mysql_bin_log.lookup_domain_in_binlog_state(error_gtid->domain_id,
&binlog_gtid) &&
binlog_gtid.seq_no >= error_gtid->seq_no)
{
*errormsg= "Requested slave GTID state not found in binlog. The slave has "
"probably diverged due to executing errorneous transactions";
*err= ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2;
}
else
{
*errormsg= "Requested slave GTID state not found in binlog";
*err= ER_GTID_POSITION_NOT_FOUND_IN_BINLOG;
}
}
/* /*
Check the start GTID state requested by the slave against our binlog state. Check the start GTID state requested by the slave against our binlog state.
...@@ -894,43 +916,51 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) ...@@ -894,43 +916,51 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev)
static int static int
check_slave_start_position(THD *thd, slave_connection_state *st, check_slave_start_position(THD *thd, slave_connection_state *st,
const char **errormsg, rpl_gtid *error_gtid, const char **errormsg, rpl_gtid *error_gtid,
slave_connection_state *until_gtid_state, slave_connection_state *until_gtid_state)
HASH *fake_gtid_hash)
{ {
uint32 i; uint32 i;
int err; int err;
rpl_gtid **delete_list= NULL; slave_connection_state::entry **delete_list= NULL;
uint32 delete_idx= 0; uint32 delete_idx= 0;
bool slave_state_loaded= false;
if (rpl_load_gtid_slave_state(thd))
{
*errormsg= "Failed to load replication slave GTID state";
err= ER_CANNOT_LOAD_SLAVE_GTID_STATE;
goto end;
}
for (i= 0; i < st->hash.records; ++i) for (i= 0; i < st->hash.records; ++i)
{ {
rpl_gtid *slave_gtid= (rpl_gtid *)my_hash_element(&st->hash, i); slave_connection_state::entry *slave_gtid_entry=
(slave_connection_state::entry *)my_hash_element(&st->hash, i);
rpl_gtid *slave_gtid= &slave_gtid_entry->gtid;
rpl_gtid master_gtid; rpl_gtid master_gtid;
rpl_gtid master_replication_gtid; rpl_gtid master_replication_gtid;
rpl_gtid start_gtid; rpl_gtid start_gtid;
bool start_at_own_slave_pos=
rpl_global_gtid_slave_state.domain_to_gtid(slave_gtid->domain_id,
&master_replication_gtid) &&
slave_gtid->server_id == master_replication_gtid.server_id &&
slave_gtid->seq_no == master_replication_gtid.seq_no;
if (mysql_bin_log.find_in_binlog_state(slave_gtid->domain_id, if (mysql_bin_log.find_in_binlog_state(slave_gtid->domain_id,
slave_gtid->server_id, slave_gtid->server_id,
&master_gtid) && &master_gtid) &&
master_gtid.seq_no >= slave_gtid->seq_no) master_gtid.seq_no >= slave_gtid->seq_no)
continue;
if (!slave_state_loaded)
{ {
if (rpl_load_gtid_slave_state(thd)) /*
{ If connecting slave requests to start at the GTID we last applied when
*errormsg= "Failed to load replication slave GTID state"; we were ourselves a slave, then this GTID may not exist in our binlog
err= ER_CANNOT_LOAD_SLAVE_GTID_STATE; (in case of --log-slave-updates=0). So set the flag to disable the
goto end; error about missing GTID in the binlog in this case.
} */
slave_state_loaded= true; if (start_at_own_slave_pos)
slave_gtid_entry->flags|= slave_connection_state::START_OWN_SLAVE_POS;
continue;
} }
if (!rpl_global_gtid_slave_state.domain_to_gtid(slave_gtid->domain_id, if (!start_at_own_slave_pos)
&master_replication_gtid) ||
slave_gtid->server_id != master_replication_gtid.server_id ||
slave_gtid->seq_no != master_replication_gtid.seq_no)
{ {
rpl_gtid domain_gtid; rpl_gtid domain_gtid;
rpl_gtid *until_gtid; rpl_gtid *until_gtid;
...@@ -942,7 +972,12 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -942,7 +972,12 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
We do not have anything in this domain, neither in the binlog nor We do not have anything in this domain, neither in the binlog nor
in the slave state. So we are probably one master in a multi-master in the slave state. So we are probably one master in a multi-master
setup, and this domain is served by a different master. setup, and this domain is served by a different master.
But set a flag so that if we then ever _do_ happen to encounter
anything in this domain, then we will re-check that the requested
slave position exists, and give the error at that time if not.
*/ */
slave_gtid_entry->flags|= slave_connection_state::START_ON_EMPTY_DOMAIN;
continue; continue;
} }
...@@ -966,9 +1001,8 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -966,9 +1001,8 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
continue; continue;
} }
*errormsg= "Requested slave GTID state not found in binlog";
*error_gtid= *slave_gtid; *error_gtid= *slave_gtid;
err= ER_GTID_POSITION_NOT_FOUND_IN_BINLOG; give_error_start_pos_missing_in_binlog(&err, errormsg, error_gtid);
goto end; goto end;
} }
...@@ -999,7 +1033,6 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -999,7 +1033,6 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
&start_gtid) && &start_gtid) &&
start_gtid.seq_no > slave_gtid->seq_no) start_gtid.seq_no > slave_gtid->seq_no)
{ {
rpl_gtid *fake_gtid;
/* /*
Start replication within this domain at the first GTID that we logged Start replication within this domain at the first GTID that we logged
ourselves after becoming a master. ourselves after becoming a master.
...@@ -1009,20 +1042,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -1009,20 +1042,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
--gtid-strict-mode. --gtid-strict-mode.
*/ */
slave_gtid->server_id= global_system_variables.server_id; slave_gtid->server_id= global_system_variables.server_id;
if (!(fake_gtid= (rpl_gtid *)my_malloc(sizeof(*fake_gtid), MYF(0)))) slave_gtid_entry->flags|= slave_connection_state::START_OWN_SLAVE_POS;
{
*errormsg= "Out of memory while checking slave start position";
err= ER_OUT_OF_RESOURCES;
goto end;
}
*fake_gtid= *slave_gtid;
if (my_hash_insert(fake_gtid_hash, (uchar *)fake_gtid))
{
my_free(fake_gtid);
*errormsg= "Out of memory while checking slave start position";
err= ER_OUT_OF_RESOURCES;
goto end;
}
} }
else if (mysql_bin_log.lookup_domain_in_binlog_state(slave_gtid->domain_id, else if (mysql_bin_log.lookup_domain_in_binlog_state(slave_gtid->domain_id,
&start_gtid)) &start_gtid))
...@@ -1042,7 +1062,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -1042,7 +1062,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
*/ */
if (!delete_list) if (!delete_list)
{ {
if (!(delete_list= (rpl_gtid **) if (!(delete_list= (slave_connection_state::entry **)
my_malloc(sizeof(*delete_list) * st->hash.records, MYF(MY_WME)))) my_malloc(sizeof(*delete_list) * st->hash.records, MYF(MY_WME))))
{ {
*errormsg= "Out of memory while checking slave start position"; *errormsg= "Out of memory while checking slave start position";
...@@ -1050,7 +1070,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -1050,7 +1070,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
goto end; goto end;
} }
} }
delete_list[delete_idx++]= slave_gtid; delete_list[delete_idx++]= slave_gtid_entry;
} }
} }
...@@ -1058,7 +1078,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st, ...@@ -1058,7 +1078,7 @@ check_slave_start_position(THD *thd, slave_connection_state *st,
if (delete_list) if (delete_list)
{ {
for (i= 0; i < delete_idx; ++i) for (i= 0; i < delete_idx; ++i)
st->remove(delete_list[i]); st->remove(&(delete_list[i]->gtid));
} }
err= 0; err= 0;
...@@ -1482,7 +1502,7 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags, ...@@ -1482,7 +1502,7 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
enum_gtid_until_state *gtid_until_group, enum_gtid_until_state *gtid_until_group,
rpl_binlog_state *until_binlog_state, rpl_binlog_state *until_binlog_state,
bool slave_gtid_strict_mode, rpl_gtid *error_gtid, bool slave_gtid_strict_mode, rpl_gtid *error_gtid,
bool *send_fake_gtid_list, HASH *fake_gtid_hash) bool *send_fake_gtid_list)
{ {
my_off_t pos; my_off_t pos;
size_t len= packet->length(); size_t len= packet->length();
...@@ -1514,6 +1534,7 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags, ...@@ -1514,6 +1534,7 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
if (event_type == GTID_EVENT && using_gtid_state) if (event_type == GTID_EVENT && using_gtid_state)
{ {
uchar flags2; uchar flags2;
slave_connection_state::entry *gtid_entry;
rpl_gtid *gtid; rpl_gtid *gtid;
if (gtid_state->count() > 0 || until_gtid_state) if (gtid_state->count() > 0 || until_gtid_state)
...@@ -1551,9 +1572,28 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags, ...@@ -1551,9 +1572,28 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
if (gtid_state->count() > 0) if (gtid_state->count() > 0)
{ {
gtid= gtid_state->find(event_gtid.domain_id); gtid_entry= gtid_state->find_entry(event_gtid.domain_id);
if (gtid != NULL) if (gtid_entry != NULL)
{
gtid= &gtid_entry->gtid;
if (gtid_entry->flags & slave_connection_state::START_ON_EMPTY_DOMAIN)
{ {
rpl_gtid master_gtid;
if (!mysql_bin_log.find_in_binlog_state(gtid->domain_id,
gtid->server_id,
&master_gtid) ||
master_gtid.seq_no < gtid->seq_no)
{
int err;
const char *errormsg;
*error_gtid= *gtid;
give_error_start_pos_missing_in_binlog(&err, &errormsg, error_gtid);
my_errno= err;
return errormsg;
}
gtid_entry->flags&= ~(uint32)slave_connection_state::START_ON_EMPTY_DOMAIN;
}
/* Skip this event group if we have not yet reached slave start pos. */ /* Skip this event group if we have not yet reached slave start pos. */
if (event_gtid.server_id != gtid->server_id || if (event_gtid.server_id != gtid->server_id ||
event_gtid.seq_no <= gtid->seq_no) event_gtid.seq_no <= gtid->seq_no)
...@@ -1563,8 +1603,7 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags, ...@@ -1563,8 +1603,7 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
event_gtid.seq_no >= gtid->seq_no) event_gtid.seq_no >= gtid->seq_no)
{ {
if (slave_gtid_strict_mode && event_gtid.seq_no > gtid->seq_no && if (slave_gtid_strict_mode && event_gtid.seq_no > gtid->seq_no &&
!my_hash_search(fake_gtid_hash, !(gtid_entry->flags & slave_connection_state::START_OWN_SLAVE_POS))
(const uchar *)&event_gtid.domain_id, 0))
{ {
/* /*
In strict mode, it is an error if the slave requests to start In strict mode, it is an error if the slave requests to start
...@@ -1839,7 +1878,6 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ...@@ -1839,7 +1878,6 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
rpl_binlog_state until_binlog_state; rpl_binlog_state until_binlog_state;
bool slave_gtid_strict_mode= false; bool slave_gtid_strict_mode= false;
bool send_fake_gtid_list= false; bool send_fake_gtid_list= false;
HASH fake_gtid_hash;
uint8 current_checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF; uint8 current_checksum_alg= BINLOG_CHECKSUM_ALG_UNDEF;
int old_max_allowed_packet= thd->variables.max_allowed_packet; int old_max_allowed_packet= thd->variables.max_allowed_packet;
...@@ -1853,9 +1891,6 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ...@@ -1853,9 +1891,6 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
bzero((char*) &log,sizeof(log)); bzero((char*) &log,sizeof(log));
bzero(&error_gtid, sizeof(error_gtid)); bzero(&error_gtid, sizeof(error_gtid));
my_hash_init(&fake_gtid_hash, &my_charset_bin, 32,
offsetof(rpl_gtid, domain_id), sizeof(uint32), NULL, my_free,
HASH_UNIQUE);
/* /*
heartbeat_period from @master_heartbeat_period user variable heartbeat_period from @master_heartbeat_period user variable
*/ */
...@@ -1955,8 +1990,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ...@@ -1955,8 +1990,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
goto err; goto err;
} }
if ((error= check_slave_start_position(thd, &gtid_state, &errmsg, if ((error= check_slave_start_position(thd, &gtid_state, &errmsg,
&error_gtid, until_gtid_state, &error_gtid, until_gtid_state)))
&fake_gtid_hash)))
{ {
my_errno= error; my_errno= error;
goto err; goto err;
...@@ -2261,7 +2295,7 @@ impossible position"; ...@@ -2261,7 +2295,7 @@ impossible position";
until_gtid_state, &gtid_until_group, until_gtid_state, &gtid_until_group,
&until_binlog_state, &until_binlog_state,
slave_gtid_strict_mode, &error_gtid, slave_gtid_strict_mode, &error_gtid,
&send_fake_gtid_list, &fake_gtid_hash))) &send_fake_gtid_list)))
{ {
errmsg= tmp_msg; errmsg= tmp_msg;
goto err; goto err;
...@@ -2467,8 +2501,7 @@ impossible position"; ...@@ -2467,8 +2501,7 @@ impossible position";
&gtid_skip_group, until_gtid_state, &gtid_skip_group, until_gtid_state,
&gtid_until_group, &until_binlog_state, &gtid_until_group, &until_binlog_state,
slave_gtid_strict_mode, &error_gtid, slave_gtid_strict_mode, &error_gtid,
&send_fake_gtid_list, &send_fake_gtid_list)))
&fake_gtid_hash)))
{ {
errmsg= tmp_msg; errmsg= tmp_msg;
goto err; goto err;
...@@ -2558,7 +2591,6 @@ impossible position"; ...@@ -2558,7 +2591,6 @@ impossible position";
end: end:
end_io_cache(&log); end_io_cache(&log);
mysql_file_close(file, MYF(MY_WME)); mysql_file_close(file, MYF(MY_WME));
my_hash_free(&fake_gtid_hash);
RUN_HOOK(binlog_transmit, transmit_stop, (thd, flags)); RUN_HOOK(binlog_transmit, transmit_stop, (thd, flags));
my_eof(thd); my_eof(thd);
...@@ -2595,6 +2627,18 @@ err: ...@@ -2595,6 +2627,18 @@ err:
/* Use this error code so slave will know not to try reconnect. */ /* Use this error code so slave will know not to try reconnect. */
my_errno = ER_MASTER_FATAL_ERROR_READING_BINLOG; my_errno = ER_MASTER_FATAL_ERROR_READING_BINLOG;
} }
else if (my_errno == ER_GTID_POSITION_NOT_FOUND_IN_BINLOG2)
{
my_snprintf(error_text, sizeof(error_text),
"Error: connecting slave requested to start from GTID "
"%u-%u-%llu, which is not in the master's binlog. Since the "
"master's binlog contains GTIDs with higher sequence numbers, "
"it probably means that the slave has diverged due to "
"executing extra errorneous transactions",
error_gtid.domain_id, error_gtid.server_id, error_gtid.seq_no);
/* Use this error code so slave will know not to try reconnect. */
my_errno = ER_MASTER_FATAL_ERROR_READING_BINLOG;
}
else if (my_errno == ER_GTID_START_FROM_BINLOG_HOLE) else if (my_errno == ER_GTID_START_FROM_BINLOG_HOLE)
{ {
my_snprintf(error_text, sizeof(error_text), my_snprintf(error_text, sizeof(error_text),
...@@ -2629,7 +2673,6 @@ err: ...@@ -2629,7 +2673,6 @@ err:
mysql_mutex_unlock(&LOCK_thread_count); mysql_mutex_unlock(&LOCK_thread_count);
if (file >= 0) if (file >= 0)
mysql_file_close(file, MYF(MY_WME)); mysql_file_close(file, MYF(MY_WME));
my_hash_free(&fake_gtid_hash);
thd->variables.max_allowed_packet= old_max_allowed_packet; thd->variables.max_allowed_packet= old_max_allowed_packet;
my_message(my_errno, error_text, MYF(0)); my_message(my_errno, error_text, MYF(0));
...@@ -3890,7 +3933,7 @@ rpl_append_gtid_state(String *dest, bool use_binlog) ...@@ -3890,7 +3933,7 @@ rpl_append_gtid_state(String *dest, bool use_binlog)
/* /*
Load the current GITD position into a slave_connection_state, for use when Load the current GTID position into a slave_connection_state, for use when
connecting to a master server with GTID. connecting to a master server with GTID.
If the flag use_binlog is true, then the contents of the binary log (if If the flag use_binlog is true, then the contents of the binary log (if
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment