Commit f573b65e authored by Kristian Nielsen's avatar Kristian Nielsen

Merge MDEV-7847 and MDEV-7882 into 10.0.

Conflicts:
	mysql-test/suite/rpl/r/rpl_parallel.result
	sql/rpl_parallel.cc
parents 59df1009 880f2273
...@@ -1264,11 +1264,297 @@ a ...@@ -1264,11 +1264,297 @@ a
include/stop_slave.inc include/stop_slave.inc
SET GLOBAL slave_parallel_mode='conservative'; SET GLOBAL slave_parallel_mode='conservative';
include/start_slave.inc include/start_slave.inc
*** MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging ***
*** MDEV-7882: Excessive transaction retry in parallel replication ***
CREATE TABLE t7 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
CREATE TABLE t8 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=40;
SELECT @old_retries:=@@GLOBAL.slave_transaction_retries;
@old_retries:=@@GLOBAL.slave_transaction_retries
10
SET GLOBAL slave_transaction_retries= 5;
INSERT INTO t7 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 42;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
INSERT INTO t8 VALUES (1,1);
INSERT INTO t8 VALUES (2,2);
INSERT INTO t8 VALUES (3,3);
INSERT INTO t8 VALUES (4,4);
INSERT INTO t8 VALUES (5,5);
INSERT INTO t8 VALUES (6,6);
INSERT INTO t8 VALUES (7,7);
INSERT INTO t8 VALUES (8,8);
UPDATE t7 SET b=9 WHERE a=3;
UPDATE t7 SET b=10 WHERE a=3;
UPDATE t7 SET b=11 WHERE a=3;
INSERT INTO t8 VALUES (12,12);
INSERT INTO t8 VALUES (13,13);
UPDATE t7 SET b=14 WHERE a=3;
UPDATE t7 SET b=15 WHERE a=3;
INSERT INTO t8 VALUES (16,16);
UPDATE t7 SET b=17 WHERE a=3;
INSERT INTO t8 VALUES (18,18);
INSERT INTO t8 VALUES (19,19);
UPDATE t7 SET b=20 WHERE a=3;
INSERT INTO t8 VALUES (21,21);
UPDATE t7 SET b=22 WHERE a=3;
INSERT INTO t8 VALUES (23,24);
INSERT INTO t8 VALUES (24,24);
UPDATE t7 SET b=25 WHERE a=3;
INSERT INTO t8 VALUES (26,26);
UPDATE t7 SET b=27 WHERE a=3;
BEGIN;
INSERT INTO t8 VALUES (28,28);
INSERT INTO t8 VALUES (29,28), (30,28);
INSERT INTO t8 VALUES (31,28);
INSERT INTO t8 VALUES (32,28);
INSERT INTO t8 VALUES (33,28);
INSERT INTO t8 VALUES (34,28);
INSERT INTO t8 VALUES (35,28);
INSERT INTO t8 VALUES (36,28);
INSERT INTO t8 VALUES (37,28);
INSERT INTO t8 VALUES (38,28);
INSERT INTO t8 VALUES (39,28);
INSERT INTO t8 VALUES (40,28);
INSERT INTO t8 VALUES (41,28);
INSERT INTO t8 VALUES (42,28);
COMMIT;
SET @commit_id=43;
INSERT INTO t8 VALUES (43,43);
INSERT INTO t8 VALUES (44,44);
UPDATE t7 SET b=45 WHERE a=3;
INSERT INTO t8 VALUES (46,46);
INSERT INTO t8 VALUES (47,47);
UPDATE t7 SET b=48 WHERE a=3;
INSERT INTO t8 VALUES (49,49);
INSERT INTO t8 VALUES (50,50);
SET @commit_id=44;
INSERT INTO t8 VALUES (51,51);
INSERT INTO t8 VALUES (52,52);
UPDATE t7 SET b=53 WHERE a=3;
INSERT INTO t8 VALUES (54,54);
INSERT INTO t8 VALUES (55,55);
UPDATE t7 SET b=56 WHERE a=3;
INSERT INTO t8 VALUES (57,57);
UPDATE t7 SET b=58 WHERE a=3;
INSERT INTO t8 VALUES (58,58);
INSERT INTO t8 VALUES (59,59);
INSERT INTO t8 VALUES (60,60);
INSERT INTO t8 VALUES (61,61);
UPDATE t7 SET b=62 WHERE a=3;
INSERT INTO t8 VALUES (63,63);
INSERT INTO t8 VALUES (64,64);
INSERT INTO t8 VALUES (65,65);
INSERT INTO t8 VALUES (66,66);
UPDATE t7 SET b=67 WHERE a=3;
INSERT INTO t8 VALUES (68,68);
UPDATE t7 SET b=69 WHERE a=3;
UPDATE t7 SET b=70 WHERE a=3;
UPDATE t7 SET b=71 WHERE a=3;
INSERT INTO t8 VALUES (72,72);
UPDATE t7 SET b=73 WHERE a=3;
UPDATE t7 SET b=74 WHERE a=3;
UPDATE t7 SET b=75 WHERE a=3;
UPDATE t7 SET b=76 WHERE a=3;
INSERT INTO t8 VALUES (77,77);
UPDATE t7 SET b=78 WHERE a=3;
INSERT INTO t8 VALUES (79,79);
UPDATE t7 SET b=80 WHERE a=3;
INSERT INTO t8 VALUES (81,81);
UPDATE t7 SET b=82 WHERE a=3;
INSERT INTO t8 VALUES (83,83);
UPDATE t7 SET b=84 WHERE a=3;
SET @commit_id=45;
INSERT INTO t8 VALUES (85,85);
UPDATE t7 SET b=86 WHERE a=3;
INSERT INTO t8 VALUES (87,87);
SET @commit_id=46;
INSERT INTO t8 VALUES (88,88);
INSERT INTO t8 VALUES (89,89);
INSERT INTO t8 VALUES (90,90);
SET SESSION debug_dbug=@old_dbug;
INSERT INTO t8 VALUES (91,91);
INSERT INTO t8 VALUES (92,92);
INSERT INTO t8 VALUES (93,93);
INSERT INTO t8 VALUES (94,94);
INSERT INTO t8 VALUES (95,95);
INSERT INTO t8 VALUES (96,96);
INSERT INTO t8 VALUES (97,97);
INSERT INTO t8 VALUES (98,98);
INSERT INTO t8 VALUES (99,99);
SELECT * FROM t7 ORDER BY a;
a b
1 1
2 2
3 86
4 4
5 5
SELECT * FROM t8 ORDER BY a;
a b
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
12 12
13 13
16 16
18 18
19 19
21 21
23 24
24 24
26 26
28 28
29 28
30 28
31 28
32 28
33 28
34 28
35 28
36 28
37 28
38 28
39 28
40 28
41 28
42 28
43 43
44 44
46 46
47 47
49 49
50 50
51 51
52 52
54 54
55 55
57 57
58 58
59 59
60 60
61 61
63 63
64 64
65 65
66 66
68 68
72 72
77 77
79 79
81 81
83 83
85 85
87 87
88 88
89 89
90 90
91 91
92 92
93 93
94 94
95 95
96 96
97 97
98 98
99 99
include/save_master_gtid.inc
include/start_slave.inc
include/sync_with_master_gtid.inc
SELECT * FROM t7 ORDER BY a;
a b
1 1
2 2
3 86
4 4
5 5
SELECT * FROM t8 ORDER BY a;
a b
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
12 12
13 13
16 16
18 18
19 19
21 21
23 24
24 24
26 26
28 28
29 28
30 28
31 28
32 28
33 28
34 28
35 28
36 28
37 28
38 28
39 28
40 28
41 28
42 28
43 43
44 44
46 46
47 47
49 49
50 50
51 51
52 52
54 54
55 55
57 57
58 58
59 59
60 60
61 61
63 63
64 64
65 65
66 66
68 68
72 72
77 77
79 79
81 81
83 83
85 85
87 87
88 88
89 89
90 90
91 91
92 92
93 93
94 94
95 95
96 96
97 97
98 98
99 99
include/stop_slave.inc
SET GLOBAL slave_transaction_retries= @old_retries;
include/start_slave.inc
include/stop_slave.inc include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc include/start_slave.inc
SET DEBUG_SYNC= 'RESET'; SET DEBUG_SYNC= 'RESET';
DROP function foo; DROP function foo;
DROP TABLE t1,t2,t3,t4,t5,t6; DROP TABLE t1,t2,t3,t4,t5,t6,t7,t8;
SET DEBUG_SYNC= 'RESET'; SET DEBUG_SYNC= 'RESET';
include/rpl_end.inc include/rpl_end.inc
...@@ -1959,6 +1959,208 @@ SET GLOBAL slave_parallel_mode='conservative'; ...@@ -1959,6 +1959,208 @@ SET GLOBAL slave_parallel_mode='conservative';
--source include/start_slave.inc --source include/start_slave.inc
--echo *** MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging ***
--echo *** MDEV-7882: Excessive transaction retry in parallel replication ***
--connection server_1
CREATE TABLE t7 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
CREATE TABLE t8 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=40;
SELECT @old_retries:=@@GLOBAL.slave_transaction_retries;
SET GLOBAL slave_transaction_retries= 5;
# Using dbug error injection, we artificially create event groups with a lot of
# conflicting transactions in each event group. The bugs were originally seen
# "in the wild" with transactions that did not conflict on the master, and only
# conflicted very rarely on the slave (maybe some edge case with InnoDB btree
# page splits or something like that). The event groups here loosely reflect
# the structure of the original failure's group commits.
--connection server_1
INSERT INTO t7 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 42;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
INSERT INTO t8 VALUES (1,1);
INSERT INTO t8 VALUES (2,2);
INSERT INTO t8 VALUES (3,3);
INSERT INTO t8 VALUES (4,4);
INSERT INTO t8 VALUES (5,5);
INSERT INTO t8 VALUES (6,6);
INSERT INTO t8 VALUES (7,7);
INSERT INTO t8 VALUES (8,8);
UPDATE t7 SET b=9 WHERE a=3;
UPDATE t7 SET b=10 WHERE a=3;
UPDATE t7 SET b=11 WHERE a=3;
INSERT INTO t8 VALUES (12,12);
INSERT INTO t8 VALUES (13,13);
UPDATE t7 SET b=14 WHERE a=3;
UPDATE t7 SET b=15 WHERE a=3;
INSERT INTO t8 VALUES (16,16);
UPDATE t7 SET b=17 WHERE a=3;
INSERT INTO t8 VALUES (18,18);
INSERT INTO t8 VALUES (19,19);
UPDATE t7 SET b=20 WHERE a=3;
INSERT INTO t8 VALUES (21,21);
UPDATE t7 SET b=22 WHERE a=3;
INSERT INTO t8 VALUES (23,24);
INSERT INTO t8 VALUES (24,24);
UPDATE t7 SET b=25 WHERE a=3;
INSERT INTO t8 VALUES (26,26);
UPDATE t7 SET b=27 WHERE a=3;
BEGIN;
INSERT INTO t8 VALUES (28,28);
INSERT INTO t8 VALUES (29,28), (30,28);
INSERT INTO t8 VALUES (31,28);
INSERT INTO t8 VALUES (32,28);
INSERT INTO t8 VALUES (33,28);
INSERT INTO t8 VALUES (34,28);
INSERT INTO t8 VALUES (35,28);
INSERT INTO t8 VALUES (36,28);
INSERT INTO t8 VALUES (37,28);
INSERT INTO t8 VALUES (38,28);
INSERT INTO t8 VALUES (39,28);
INSERT INTO t8 VALUES (40,28);
INSERT INTO t8 VALUES (41,28);
INSERT INTO t8 VALUES (42,28);
COMMIT;
SET @commit_id=43;
INSERT INTO t8 VALUES (43,43);
INSERT INTO t8 VALUES (44,44);
UPDATE t7 SET b=45 WHERE a=3;
INSERT INTO t8 VALUES (46,46);
INSERT INTO t8 VALUES (47,47);
UPDATE t7 SET b=48 WHERE a=3;
INSERT INTO t8 VALUES (49,49);
INSERT INTO t8 VALUES (50,50);
SET @commit_id=44;
INSERT INTO t8 VALUES (51,51);
INSERT INTO t8 VALUES (52,52);
UPDATE t7 SET b=53 WHERE a=3;
INSERT INTO t8 VALUES (54,54);
INSERT INTO t8 VALUES (55,55);
UPDATE t7 SET b=56 WHERE a=3;
INSERT INTO t8 VALUES (57,57);
UPDATE t7 SET b=58 WHERE a=3;
INSERT INTO t8 VALUES (58,58);
INSERT INTO t8 VALUES (59,59);
INSERT INTO t8 VALUES (60,60);
INSERT INTO t8 VALUES (61,61);
UPDATE t7 SET b=62 WHERE a=3;
INSERT INTO t8 VALUES (63,63);
INSERT INTO t8 VALUES (64,64);
INSERT INTO t8 VALUES (65,65);
INSERT INTO t8 VALUES (66,66);
UPDATE t7 SET b=67 WHERE a=3;
INSERT INTO t8 VALUES (68,68);
UPDATE t7 SET b=69 WHERE a=3;
UPDATE t7 SET b=70 WHERE a=3;
UPDATE t7 SET b=71 WHERE a=3;
INSERT INTO t8 VALUES (72,72);
UPDATE t7 SET b=73 WHERE a=3;
UPDATE t7 SET b=74 WHERE a=3;
UPDATE t7 SET b=75 WHERE a=3;
UPDATE t7 SET b=76 WHERE a=3;
INSERT INTO t8 VALUES (77,77);
UPDATE t7 SET b=78 WHERE a=3;
INSERT INTO t8 VALUES (79,79);
UPDATE t7 SET b=80 WHERE a=3;
INSERT INTO t8 VALUES (81,81);
UPDATE t7 SET b=82 WHERE a=3;
INSERT INTO t8 VALUES (83,83);
UPDATE t7 SET b=84 WHERE a=3;
SET @commit_id=45;
INSERT INTO t8 VALUES (85,85);
UPDATE t7 SET b=86 WHERE a=3;
INSERT INTO t8 VALUES (87,87);
SET @commit_id=46;
INSERT INTO t8 VALUES (88,88);
INSERT INTO t8 VALUES (89,89);
INSERT INTO t8 VALUES (90,90);
SET SESSION debug_dbug=@old_dbug;
INSERT INTO t8 VALUES (91,91);
INSERT INTO t8 VALUES (92,92);
INSERT INTO t8 VALUES (93,93);
INSERT INTO t8 VALUES (94,94);
INSERT INTO t8 VALUES (95,95);
INSERT INTO t8 VALUES (96,96);
INSERT INTO t8 VALUES (97,97);
INSERT INTO t8 VALUES (98,98);
INSERT INTO t8 VALUES (99,99);
SELECT * FROM t7 ORDER BY a;
SELECT * FROM t8 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t7 ORDER BY a;
SELECT * FROM t8 ORDER BY a;
--source include/stop_slave.inc
SET GLOBAL slave_transaction_retries= @old_retries;
--source include/start_slave.inc
# Clean up. # Clean up.
--connection server_2 --connection server_2
--source include/stop_slave.inc --source include/stop_slave.inc
...@@ -1968,7 +2170,7 @@ SET DEBUG_SYNC= 'RESET'; ...@@ -1968,7 +2170,7 @@ SET DEBUG_SYNC= 'RESET';
--connection server_1 --connection server_1
DROP function foo; DROP function foo;
DROP TABLE t1,t2,t3,t4,t5,t6; DROP TABLE t1,t2,t3,t4,t5,t6,t7,t8;
SET DEBUG_SYNC= 'RESET'; SET DEBUG_SYNC= 'RESET';
--source include/rpl_end.inc --source include/rpl_end.inc
...@@ -7360,6 +7360,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) ...@@ -7360,6 +7360,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
if (likely(is_open())) // Should always be true if (likely(is_open())) // Should always be true
{ {
commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id); commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
DBUG_EXECUTE_IF("binlog_force_commit_id",
{
const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") };
bool null_value;
user_var_entry *entry=
(user_var_entry*) my_hash_search(&leader->thd->user_vars,
(uchar*) name.str, name.length);
commit_id= entry->val_int(&null_value);
});
/* /*
Commit every transaction in the queue. Commit every transaction in the queue.
......
...@@ -112,6 +112,7 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, ...@@ -112,6 +112,7 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
wait_for_commit *wfc= &rgi->commit_orderer; wait_for_commit *wfc= &rgi->commit_orderer;
int err; int err;
thd->get_stmt_da()->set_overwrite_status(true);
/* /*
Remove any left-over registration to wait for a prior commit to Remove any left-over registration to wait for a prior commit to
complete. Normally, such wait would already have been removed at complete. Normally, such wait would already have been removed at
...@@ -128,14 +129,14 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, ...@@ -128,14 +129,14 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
for us to complete and rely on this also ensuring that any other for us to complete and rely on this also ensuring that any other
event in the group has completed. event in the group has completed.
But in the error case, we have to abort anyway, and it seems best And in the error case, correct GCO lifetime relies on the fact that once
to just complete as quickly as possible with unregister. Anyone the last event group in the GCO has executed wait_for_prior_commit(),
waiting for us will in any case receive the error back from their all earlier event groups have also committed; this way no more
wait_for_prior_commit() call. mark_start_commit() calls can be made and it is safe to de-allocate
the GCO.
*/ */
if (rgi->worker_error) err= wfc->wait_for_prior_commit(thd);
wfc->unregister_wait_for_prior_commit(); if (unlikely(err) && !rgi->worker_error)
else if ((err= wfc->wait_for_prior_commit(thd)))
signal_error_to_sql_driver_thread(thd, rgi, err); signal_error_to_sql_driver_thread(thd, rgi, err);
thd->wait_for_commit_ptr= NULL; thd->wait_for_commit_ptr= NULL;
...@@ -192,6 +193,10 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, ...@@ -192,6 +193,10 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
thd->clear_error(); thd->clear_error();
thd->reset_killed(); thd->reset_killed();
/*
Would do thd->get_stmt_da()->set_overwrite_status(false) here, but
reset_diagnostics_area() already does that.
*/
thd->get_stmt_da()->reset_diagnostics_area(); thd->get_stmt_da()->reset_diagnostics_area();
wfc->wakeup_subsequent_commits(rgi->worker_error); wfc->wakeup_subsequent_commits(rgi->worker_error);
} }
...@@ -385,16 +390,10 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt, ...@@ -385,16 +390,10 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
register_wait_for_prior_event_group_commit(rgi, entry); register_wait_for_prior_event_group_commit(rgi, entry);
mysql_mutex_unlock(&entry->LOCK_parallel_entry); mysql_mutex_unlock(&entry->LOCK_parallel_entry);
if (rgi->speculation != rpl_group_info::SPECULATE_OPTIMISTIC)
break;
/* /*
We speculatively tried to run this in parallel with prior event groups, Let us wait for all prior transactions to complete before trying again.
but it did not work for some reason. This way, we avoid repeatedly conflicting with and getting deadlock
killed by the same earlier transaction.
So let us wait for all prior transactions to complete before trying
again. This way, we avoid repeatedly retrying and failing a small
transaction that conflicts with a prior long-running one.
*/ */
if (!(err= thd->wait_for_prior_commit())) if (!(err= thd->wait_for_prior_commit()))
{ {
...@@ -800,8 +799,7 @@ handle_rpl_parallel_thread(void *arg) ...@@ -800,8 +799,7 @@ handle_rpl_parallel_thread(void *arg)
if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id)) if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id))
skip_event_group= true; skip_event_group= true;
else register_wait_for_prior_event_group_commit(rgi, entry);
register_wait_for_prior_event_group_commit(rgi, entry);
unlock_or_exit_cond(thd, &entry->LOCK_parallel_entry, unlock_or_exit_cond(thd, &entry->LOCK_parallel_entry,
&did_enter_cond, &old_stage); &did_enter_cond, &old_stage);
...@@ -888,7 +886,9 @@ handle_rpl_parallel_thread(void *arg) ...@@ -888,7 +886,9 @@ handle_rpl_parallel_thread(void *arg)
else else
{ {
delete qev->ev; delete qev->ev;
thd->get_stmt_da()->set_overwrite_status(true);
err= thd->wait_for_prior_commit(); err= thd->wait_for_prior_commit();
thd->get_stmt_da()->set_overwrite_status(false);
} }
end_of_group= end_of_group=
......
...@@ -53,7 +53,7 @@ struct group_commit_orderer { ...@@ -53,7 +53,7 @@ struct group_commit_orderer {
group_commit_orderer *prev_gco; group_commit_orderer *prev_gco;
group_commit_orderer *next_gco; group_commit_orderer *next_gco;
/* /*
The sub_id of last event group in this the previous GCO. The sub_id of last event group in the previous GCO.
Only valid if prev_gco != NULL. Only valid if prev_gco != NULL.
*/ */
uint64 prior_sub_id; uint64 prior_sub_id;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment