Commit 7bffe468 authored by sjaakola's avatar sjaakola Committed by Jan Lindström

MDEV-21910 Deadlock between BF abort and manual KILL command

When high priority replication slave applier encounters lock conflict in innodb,
it will force the conflicting lock holder transaction (victim) to rollback.
This is a must in multi-master sychronous replication model to avoid cluster lock-up.
This high priority victim abort (aka "brute force" (BF) abort), is started
from innodb lock manager while holding the victim's transaction's (trx) mutex.
Depending on the execution state of the victim transaction, it may happen that the
BF abort will call for THD::awake() to wake up the victim transaction for the rollback.
Now, if BF abort requires THD::awake() to be called, then the applier thread executed
locking protocol of: victim trx mutex -> victim THD::LOCK_thd_data

If, at the same time another DBMS super user issues KILL command to abort the same victim,
it will execute locking protocol of: victim THD::LOCK_thd_data  -> victim trx mutex.
These two locking protocol acquire mutexes in opposite order, hence unresolvable mutex locking
deadlock may occur.

The fix in this commit adds THD::wsrep_aborter flag to synchronize who can kill the victim
This flag is set both when BF is called for from innodb and by KILL command.
Either path of victim killing will bail out if victim's wsrep_killed is already
set to avoid mutex conflicts with the other aborter execution. THD::wsrep_aborter
records the aborter THD's ID. This is needed to preserve the right to kill
the victim from different locations for the same aborter thread.
It is also good error logging, to see who is reponsible for the abort.

A new test case was added in galera.galera_bf_kill_debug.test for scenario where
wsrep applier thread and manual KILL command try to kill same idle victim
parent 4ec032b4
......@@ -87,6 +87,7 @@ extern struct wsrep_service_st {
ulong (*wsrep_OSU_method_get_func)(const MYSQL_THD thd);
my_bool (*wsrep_thd_has_ignored_error_func)(const MYSQL_THD thd);
void (*wsrep_thd_set_ignored_error_func)(MYSQL_THD thd, my_bool val);
bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd);
} *wsrep_service;
#define MYSQL_SERVICE_WSREP_INCLUDED
......@@ -130,6 +131,7 @@ extern struct wsrep_service_st {
#define wsrep_OSU_method_get(T) wsrep_service->wsrep_OSU_method_get_func(T)
#define wsrep_thd_has_ignored_error(T) wsrep_service->wsrep_thd_has_ignored_error_func(T)
#define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V)
#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2)
#else
#define MYSQL_SERVICE_WSREP_STATIC_INCLUDED
......@@ -181,6 +183,8 @@ extern "C" my_bool wsrep_thd_is_local(const MYSQL_THD thd);
/* Return true if thd is in high priority mode */
/* todo: rename to is_high_priority() */
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
/* set wsrep_aborter for the target THD */
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
/* Return true if thd is in TOI mode */
extern "C" my_bool wsrep_thd_is_toi(const MYSQL_THD thd);
/* Return true if thd is in replicating TOI mode */
......@@ -224,5 +228,6 @@ extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd);
extern "C" my_bool wsrep_thd_has_ignored_error(const MYSQL_THD thd);
extern "C" void wsrep_thd_set_ignored_error(MYSQL_THD thd, my_bool val);
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
#endif
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */
......@@ -69,21 +69,5 @@ select * from t1;
a b
2 1
disconnect node_2a;
drop table t1;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
CREATE TABLE t1 (i int primary key);
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
INSERT INTO t1 VALUES (1);
connection node_2;
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
connection node_2a;
connection node_2;
select * from t1;
i
1
disconnect node_2a;
connection node_2;
connection node_1;
drop table t1;
connection node_2;
connection node_1;
connection node_2;
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
insert into t1 values (NULL,1);
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
truncate t1;
insert into t1 values (1,0);
begin;
update t1 set b=2 where a=1;
connection node_2;
set session wsrep_sync_wait=0;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2b;
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
connection node_1;
select * from t1;
a b
1 0
update t1 set b= 1 where a=1;
connection node_2b;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
connection node_2;
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
connection node_2b;
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
SET GLOBAL debug_dbug = "";
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
SET DEBUG_SYNC = "now SIGNAL continue_kill";
connection node_2;
connection node_2a;
select * from t1;
connection node_2;
SET DEBUG_SYNC = "RESET";
drop table t1;
disconnect node_2a;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
CREATE TABLE t1 (i int primary key);
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
INSERT INTO t1 VALUES (1);
connection node_2;
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
connection node_2a;
connection node_2;
select * from t1;
i
1
disconnect node_2a;
connection node_1;
drop table t1;
connection node_2;
connection node_1;
connection node_2;
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
connection node_1;
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2;
......
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
#
# Test case 1: Start a transaction on node_2a and kill it
......@@ -135,56 +133,9 @@ update t1 set a =5, b=2;
--eval KILL $k_thread
--enable_query_log
select * from t1;
--disconnect node_2a
--connection node_1
drop table t1;
#
# Test case 7:
# run a transaction in node 2, and set a sync point to pause the transaction
# in commit phase.
# Through another connection to node 2, kill the committing transaction by
# KILL QUERY command
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
--let $connection_id = `SELECT CONNECTION_ID()`
CREATE TABLE t1 (i int primary key);
# Set up sync point
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
# Send insert which will block in the sync point above
--send INSERT INTO t1 VALUES (1)
--connection node_2
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
--disable_query_log
--disable_result_log
# victim has passed the point of no return, kill is not possible anymore
--eval KILL QUERY $connection_id
--enable_result_log
--enable_query_log
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
--connection node_2a
--error 0,1213
--reap
--connection node_2
# victim was able to complete the INSERT
select * from t1;
--disconnect node_2a
--connection node_2
drop table t1;
!include ../galera_2nodes.cnf
[mysqld.1]
wsrep-debug=SERVER
[mysqld.2]
wsrep-debug=SERVER
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
#
# Test case 7:
# 1. Start a transaction on node_2,
# and leave it pending while holding a row locked
# 2. set sync point pause applier
# 3. send a conflicting write on node_1, it will pause
# at the sync point
# 4. though another connection to node_2, kill the local
# transaction
#
--connection node_2
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
insert into t1 values (NULL,1);
#
# connection node_2a runs a local transaction, that is victim of BF abort
# and victim of KILL command by connection node_2
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
truncate t1;
insert into t1 values (1,0);
# start a transaction that will conflict with later applier
begin;
update t1 set b=2 where a=1;
--connection node_2
set session wsrep_sync_wait=0;
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
--source include/wait_condition.inc
--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
# connection node_2b is for controlling debug syn points
# first set a sync point for applier, to pause during BF aborting
# and before THD::awake would be called
#
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2b
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
#
# replicate an update, which will BF abort the victim node_2a
# however, while applier in node 2 is handling the abort,
# it will pause in sync point set by node_2b
#
--connection node_1
select * from t1;
update t1 set b= 1 where a=1;
#
# wait until the applying of above update has reached the sync point
# in node 2
#
--connection node_2b
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
--connection node_2
#
# pause KILL execution before awake
#
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
--disable_query_log
--send_eval KILL $k_thread
--enable_query_log
--connection node_2b
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
# release applier and KILL operator
SET GLOBAL debug_dbug = "";
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
SET DEBUG_SYNC = "now SIGNAL continue_kill";
--connection node_2
--reap
--connection node_2a
--error 0,1213
select * from t1;
--connection node_2
SET DEBUG_SYNC = "RESET";
drop table t1;
--disconnect node_2a
#
# Test case 7:
# run a transaction in node 2, and set a sync point to pause the transaction
# in commit phase.
# Through another connection to node 2, kill the committing transaction by
# KILL QUERY command
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
--let $connection_id = `SELECT CONNECTION_ID()`
CREATE TABLE t1 (i int primary key);
# Set up sync point
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
# Send insert which will block in the sync point above
--send INSERT INTO t1 VALUES (1)
--connection node_2
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
--disable_query_log
--disable_result_log
# victim has passed the point of no return, kill is not possible anymore
--eval KILL QUERY $connection_id
--enable_result_log
--enable_query_log
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
--connection node_2a
--error 0,1213
--reap
--connection node_2
# victim was able to complete the INSERT
select * from t1;
--disconnect node_2a
--connection node_1
drop table t1;
......@@ -2,8 +2,10 @@
--source include/have_innodb.inc
--source include/big_test.inc
--connection node_1
--connection node_2
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
--connection node_1
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2;
......
......@@ -114,152 +114,161 @@ and (end_event_id <= @marker_end)
) all_events
order by relative_event_id asc;
relative_event_id relative_end_event_id event_name comment nesting_event_type relative_nesting_event_id
0 20 statement/sql/select select "MARKER_BEGIN" as marker NULL NULL
1 4 stage/sql/starting (stage) STATEMENT 0
0 21 statement/sql/select select "MARKER_BEGIN" as marker NULL NULL
1 5 stage/sql/starting (stage) STATEMENT 0
2 2 wait/io/socket/sql/client_connection recv STAGE 1
3 3 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 1
4 4 wait/io/file/sql/query_log write STAGE 1
5 5 stage/sql/checking permissions (stage) STATEMENT 0
6 6 stage/sql/Opening tables (stage) STATEMENT 0
7 7 stage/sql/After opening tables (stage) STATEMENT 0
8 8 stage/sql/init (stage) STATEMENT 0
9 9 stage/sql/Optimizing (stage) STATEMENT 0
10 10 stage/sql/Executing (stage) STATEMENT 0
11 11 stage/sql/End of update loop (stage) STATEMENT 0
12 12 stage/sql/Query end (stage) STATEMENT 0
13 13 stage/sql/Commit (stage) STATEMENT 0
14 14 stage/sql/closing tables (stage) STATEMENT 0
15 15 stage/sql/Starting cleanup (stage) STATEMENT 0
16 16 stage/sql/Freeing items (stage) STATEMENT 0
17 17 wait/io/socket/sql/client_connection send STATEMENT 0
18 18 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 0
19 20 stage/sql/Reset for next command (stage) STATEMENT 0
20 20 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 19
21 21 idle idle NULL NULL
22 42 statement/sql/select select "This is simple statement one" as payload NULL NULL
23 26 stage/sql/starting (stage) STATEMENT 22
24 24 wait/io/socket/sql/client_connection recv STAGE 23
25 25 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 23
26 26 wait/io/file/sql/query_log write STAGE 23
27 27 stage/sql/checking permissions (stage) STATEMENT 22
28 28 stage/sql/Opening tables (stage) STATEMENT 22
29 29 stage/sql/After opening tables (stage) STATEMENT 22
30 30 stage/sql/init (stage) STATEMENT 22
31 31 stage/sql/Optimizing (stage) STATEMENT 22
32 32 stage/sql/Executing (stage) STATEMENT 22
33 33 stage/sql/End of update loop (stage) STATEMENT 22
34 34 stage/sql/Query end (stage) STATEMENT 22
35 35 stage/sql/Commit (stage) STATEMENT 22
36 36 stage/sql/closing tables (stage) STATEMENT 22
37 37 stage/sql/Starting cleanup (stage) STATEMENT 22
38 38 stage/sql/Freeing items (stage) STATEMENT 22
39 39 wait/io/socket/sql/client_connection send STATEMENT 22
40 40 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 22
41 42 stage/sql/Reset for next command (stage) STATEMENT 22
42 42 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 41
43 43 idle idle NULL NULL
44 64 statement/sql/select select "This is simple statement two" as payload NULL NULL
45 48 stage/sql/starting (stage) STATEMENT 44
46 46 wait/io/socket/sql/client_connection recv STAGE 45
47 47 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 45
48 48 wait/io/file/sql/query_log write STAGE 45
49 49 stage/sql/checking permissions (stage) STATEMENT 44
50 50 stage/sql/Opening tables (stage) STATEMENT 44
51 51 stage/sql/After opening tables (stage) STATEMENT 44
52 52 stage/sql/init (stage) STATEMENT 44
53 53 stage/sql/Optimizing (stage) STATEMENT 44
54 54 stage/sql/Executing (stage) STATEMENT 44
55 55 stage/sql/End of update loop (stage) STATEMENT 44
56 56 stage/sql/Query end (stage) STATEMENT 44
57 57 stage/sql/Commit (stage) STATEMENT 44
58 58 stage/sql/closing tables (stage) STATEMENT 44
59 59 stage/sql/Starting cleanup (stage) STATEMENT 44
60 60 stage/sql/Freeing items (stage) STATEMENT 44
61 61 wait/io/socket/sql/client_connection send STATEMENT 44
62 62 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 44
63 64 stage/sql/Reset for next command (stage) STATEMENT 44
64 64 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 63
65 65 idle idle NULL NULL
66 85 statement/sql/select select "This is the first part of a multi query" as payload;
4 4 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 1
5 5 wait/io/file/sql/query_log write STAGE 1
6 6 stage/sql/checking permissions (stage) STATEMENT 0
7 7 stage/sql/Opening tables (stage) STATEMENT 0
8 8 stage/sql/After opening tables (stage) STATEMENT 0
9 9 stage/sql/init (stage) STATEMENT 0
10 10 stage/sql/Optimizing (stage) STATEMENT 0
11 11 stage/sql/Executing (stage) STATEMENT 0
12 12 stage/sql/End of update loop (stage) STATEMENT 0
13 13 stage/sql/Query end (stage) STATEMENT 0
14 14 stage/sql/Commit (stage) STATEMENT 0
15 15 stage/sql/closing tables (stage) STATEMENT 0
16 16 stage/sql/Starting cleanup (stage) STATEMENT 0
17 17 stage/sql/Freeing items (stage) STATEMENT 0
18 18 wait/io/socket/sql/client_connection send STATEMENT 0
19 19 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 0
20 21 stage/sql/Reset for next command (stage) STATEMENT 0
21 21 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 20
22 22 wait/synch/mutex/sql/THD::LOCK_thd_data lock STATEMENT 0
23 23 idle idle NULL NULL
24 45 statement/sql/select select "This is simple statement one" as payload NULL NULL
25 29 stage/sql/starting (stage) STATEMENT 24
26 26 wait/io/socket/sql/client_connection recv STAGE 25
27 27 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 25
28 28 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 25
29 29 wait/io/file/sql/query_log write STAGE 25
30 30 stage/sql/checking permissions (stage) STATEMENT 24
31 31 stage/sql/Opening tables (stage) STATEMENT 24
32 32 stage/sql/After opening tables (stage) STATEMENT 24
33 33 stage/sql/init (stage) STATEMENT 24
34 34 stage/sql/Optimizing (stage) STATEMENT 24
35 35 stage/sql/Executing (stage) STATEMENT 24
36 36 stage/sql/End of update loop (stage) STATEMENT 24
37 37 stage/sql/Query end (stage) STATEMENT 24
38 38 stage/sql/Commit (stage) STATEMENT 24
39 39 stage/sql/closing tables (stage) STATEMENT 24
40 40 stage/sql/Starting cleanup (stage) STATEMENT 24
41 41 stage/sql/Freeing items (stage) STATEMENT 24
42 42 wait/io/socket/sql/client_connection send STATEMENT 24
43 43 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 24
44 45 stage/sql/Reset for next command (stage) STATEMENT 24
45 45 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 44
46 46 wait/synch/mutex/sql/THD::LOCK_thd_data lock STATEMENT 24
47 47 idle idle NULL NULL
48 69 statement/sql/select select "This is simple statement two" as payload NULL NULL
49 53 stage/sql/starting (stage) STATEMENT 48
50 50 wait/io/socket/sql/client_connection recv STAGE 49
51 51 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 49
52 52 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 49
53 53 wait/io/file/sql/query_log write STAGE 49
54 54 stage/sql/checking permissions (stage) STATEMENT 48
55 55 stage/sql/Opening tables (stage) STATEMENT 48
56 56 stage/sql/After opening tables (stage) STATEMENT 48
57 57 stage/sql/init (stage) STATEMENT 48
58 58 stage/sql/Optimizing (stage) STATEMENT 48
59 59 stage/sql/Executing (stage) STATEMENT 48
60 60 stage/sql/End of update loop (stage) STATEMENT 48
61 61 stage/sql/Query end (stage) STATEMENT 48
62 62 stage/sql/Commit (stage) STATEMENT 48
63 63 stage/sql/closing tables (stage) STATEMENT 48
64 64 stage/sql/Starting cleanup (stage) STATEMENT 48
65 65 stage/sql/Freeing items (stage) STATEMENT 48
66 66 wait/io/socket/sql/client_connection send STATEMENT 48
67 67 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 48
68 69 stage/sql/Reset for next command (stage) STATEMENT 48
69 69 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 68
70 70 wait/synch/mutex/sql/THD::LOCK_thd_data lock STATEMENT 48
71 71 idle idle NULL NULL
72 92 statement/sql/select select "This is the first part of a multi query" as payload;
select "And this is the second part of a multi query" as payload;
select "With a third part to make things complete" as payload NULL NULL
67 71 stage/sql/starting (stage) STATEMENT 66
68 68 wait/io/socket/sql/client_connection recv STAGE 67
69 69 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 67
70 70 wait/io/file/sql/query_log write STAGE 67
71 71 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 67
72 72 stage/sql/checking permissions (stage) STATEMENT 66
73 73 stage/sql/Opening tables (stage) STATEMENT 66
74 74 stage/sql/After opening tables (stage) STATEMENT 66
75 75 stage/sql/init (stage) STATEMENT 66
76 76 stage/sql/Optimizing (stage) STATEMENT 66
77 77 stage/sql/Executing (stage) STATEMENT 66
78 78 stage/sql/End of update loop (stage) STATEMENT 66
79 79 stage/sql/Query end (stage) STATEMENT 66
80 80 stage/sql/Commit (stage) STATEMENT 66
81 81 stage/sql/closing tables (stage) STATEMENT 66
82 82 stage/sql/Starting cleanup (stage) STATEMENT 66
83 85 stage/sql/Freeing items (stage) STATEMENT 66
84 84 wait/io/socket/sql/client_connection send STAGE 83
85 85 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STAGE 83
86 103 statement/sql/select select "And this is the second part of a multi query" as payload;
73 78 stage/sql/starting (stage) STATEMENT 72
74 74 wait/io/socket/sql/client_connection recv STAGE 73
75 75 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 73
76 76 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 73
77 77 wait/io/file/sql/query_log write STAGE 73
78 78 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 73
79 79 stage/sql/checking permissions (stage) STATEMENT 72
80 80 stage/sql/Opening tables (stage) STATEMENT 72
81 81 stage/sql/After opening tables (stage) STATEMENT 72
82 82 stage/sql/init (stage) STATEMENT 72
83 83 stage/sql/Optimizing (stage) STATEMENT 72
84 84 stage/sql/Executing (stage) STATEMENT 72
85 85 stage/sql/End of update loop (stage) STATEMENT 72
86 86 stage/sql/Query end (stage) STATEMENT 72
87 87 stage/sql/Commit (stage) STATEMENT 72
88 88 stage/sql/closing tables (stage) STATEMENT 72
89 89 stage/sql/Starting cleanup (stage) STATEMENT 72
90 92 stage/sql/Freeing items (stage) STATEMENT 72
91 91 wait/io/socket/sql/client_connection send STAGE 90
92 92 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STAGE 90
93 110 statement/sql/select select "And this is the second part of a multi query" as payload;
select "With a third part to make things complete" as payload NULL NULL
87 89 stage/sql/starting (stage) STATEMENT 86
88 88 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 87
89 89 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 87
90 90 stage/sql/checking permissions (stage) STATEMENT 86
91 91 stage/sql/Opening tables (stage) STATEMENT 86
92 92 stage/sql/After opening tables (stage) STATEMENT 86
93 93 stage/sql/init (stage) STATEMENT 86
94 94 stage/sql/Optimizing (stage) STATEMENT 86
95 95 stage/sql/Executing (stage) STATEMENT 86
96 96 stage/sql/End of update loop (stage) STATEMENT 86
97 97 stage/sql/Query end (stage) STATEMENT 86
98 98 stage/sql/Commit (stage) STATEMENT 86
99 99 stage/sql/closing tables (stage) STATEMENT 86
100 100 stage/sql/Starting cleanup (stage) STATEMENT 86
101 103 stage/sql/Freeing items (stage) STATEMENT 86
102 102 wait/io/socket/sql/client_connection send STAGE 101
103 103 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STAGE 101
104 122 statement/sql/select select "With a third part to make things complete" as payload NULL NULL
105 106 stage/sql/starting (stage) STATEMENT 104
106 106 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 105
107 107 stage/sql/checking permissions (stage) STATEMENT 104
108 108 stage/sql/Opening tables (stage) STATEMENT 104
109 109 stage/sql/After opening tables (stage) STATEMENT 104
110 110 stage/sql/init (stage) STATEMENT 104
111 111 stage/sql/Optimizing (stage) STATEMENT 104
112 112 stage/sql/Executing (stage) STATEMENT 104
113 113 stage/sql/End of update loop (stage) STATEMENT 104
114 114 stage/sql/Query end (stage) STATEMENT 104
115 115 stage/sql/Commit (stage) STATEMENT 104
116 116 stage/sql/closing tables (stage) STATEMENT 104
117 117 stage/sql/Starting cleanup (stage) STATEMENT 104
118 118 stage/sql/Freeing items (stage) STATEMENT 104
119 119 wait/io/socket/sql/client_connection send STATEMENT 104
120 120 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 104
121 122 stage/sql/Reset for next command (stage) STATEMENT 104
122 122 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 121
123 123 idle idle NULL NULL
124 144 statement/sql/select select "MARKER_END" as marker NULL NULL
125 128 stage/sql/starting (stage) STATEMENT 124
126 126 wait/io/socket/sql/client_connection recv STAGE 125
127 127 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 125
128 128 wait/io/file/sql/query_log write STAGE 125
129 129 stage/sql/checking permissions (stage) STATEMENT 124
130 130 stage/sql/Opening tables (stage) STATEMENT 124
131 131 stage/sql/After opening tables (stage) STATEMENT 124
132 132 stage/sql/init (stage) STATEMENT 124
133 133 stage/sql/Optimizing (stage) STATEMENT 124
134 134 stage/sql/Executing (stage) STATEMENT 124
135 135 stage/sql/End of update loop (stage) STATEMENT 124
136 136 stage/sql/Query end (stage) STATEMENT 124
137 137 stage/sql/Commit (stage) STATEMENT 124
138 138 stage/sql/closing tables (stage) STATEMENT 124
139 139 stage/sql/Starting cleanup (stage) STATEMENT 124
140 140 stage/sql/Freeing items (stage) STATEMENT 124
141 141 wait/io/socket/sql/client_connection send STATEMENT 124
142 142 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 124
143 144 stage/sql/Reset for next command (stage) STATEMENT 124
144 144 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 143
94 96 stage/sql/starting (stage) STATEMENT 93
95 95 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 94
96 96 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 94
97 97 stage/sql/checking permissions (stage) STATEMENT 93
98 98 stage/sql/Opening tables (stage) STATEMENT 93
99 99 stage/sql/After opening tables (stage) STATEMENT 93
100 100 stage/sql/init (stage) STATEMENT 93
101 101 stage/sql/Optimizing (stage) STATEMENT 93
102 102 stage/sql/Executing (stage) STATEMENT 93
103 103 stage/sql/End of update loop (stage) STATEMENT 93
104 104 stage/sql/Query end (stage) STATEMENT 93
105 105 stage/sql/Commit (stage) STATEMENT 93
106 106 stage/sql/closing tables (stage) STATEMENT 93
107 107 stage/sql/Starting cleanup (stage) STATEMENT 93
108 110 stage/sql/Freeing items (stage) STATEMENT 93
109 109 wait/io/socket/sql/client_connection send STAGE 108
110 110 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STAGE 108
111 129 statement/sql/select select "With a third part to make things complete" as payload NULL NULL
112 113 stage/sql/starting (stage) STATEMENT 111
113 113 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 112
114 114 stage/sql/checking permissions (stage) STATEMENT 111
115 115 stage/sql/Opening tables (stage) STATEMENT 111
116 116 stage/sql/After opening tables (stage) STATEMENT 111
117 117 stage/sql/init (stage) STATEMENT 111
118 118 stage/sql/Optimizing (stage) STATEMENT 111
119 119 stage/sql/Executing (stage) STATEMENT 111
120 120 stage/sql/End of update loop (stage) STATEMENT 111
121 121 stage/sql/Query end (stage) STATEMENT 111
122 122 stage/sql/Commit (stage) STATEMENT 111
123 123 stage/sql/closing tables (stage) STATEMENT 111
124 124 stage/sql/Starting cleanup (stage) STATEMENT 111
125 125 stage/sql/Freeing items (stage) STATEMENT 111
126 126 wait/io/socket/sql/client_connection send STATEMENT 111
127 127 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 111
128 129 stage/sql/Reset for next command (stage) STATEMENT 111
129 129 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 128
130 130 wait/synch/mutex/sql/THD::LOCK_thd_data lock STATEMENT 111
131 131 idle idle NULL NULL
132 153 statement/sql/select select "MARKER_END" as marker NULL NULL
133 137 stage/sql/starting (stage) STATEMENT 132
134 134 wait/io/socket/sql/client_connection recv STAGE 133
135 135 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 133
136 136 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 133
137 137 wait/io/file/sql/query_log write STAGE 133
138 138 stage/sql/checking permissions (stage) STATEMENT 132
139 139 stage/sql/Opening tables (stage) STATEMENT 132
140 140 stage/sql/After opening tables (stage) STATEMENT 132
141 141 stage/sql/init (stage) STATEMENT 132
142 142 stage/sql/Optimizing (stage) STATEMENT 132
143 143 stage/sql/Executing (stage) STATEMENT 132
144 144 stage/sql/End of update loop (stage) STATEMENT 132
145 145 stage/sql/Query end (stage) STATEMENT 132
146 146 stage/sql/Commit (stage) STATEMENT 132
147 147 stage/sql/closing tables (stage) STATEMENT 132
148 148 stage/sql/Starting cleanup (stage) STATEMENT 132
149 149 stage/sql/Freeing items (stage) STATEMENT 132
150 150 wait/io/socket/sql/client_connection send STATEMENT 132
151 151 wait/synch/mutex/sql/THD::LOCK_thd_kill lock STATEMENT 132
152 153 stage/sql/Reset for next command (stage) STATEMENT 132
153 153 wait/synch/mutex/sql/THD::LOCK_thd_data lock STAGE 152
disconnect con1;
......@@ -199,6 +199,16 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
my_bool signal)
{
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
{
const char act[]=
"now "
"SIGNAL sync.before_wsrep_thd_abort_reached "
"WAIT_FOR signal.before_wsrep_thd_abort";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);
my_bool ret= wsrep_bf_abort(bf_thd, victim_thd);
/*
Send awake signal if victim was BF aborted or does not
......@@ -210,10 +220,22 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill);
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
{
WSREP_DEBUG("victim is killed already by %llu, skipping awake",
victim_thd->wsrep_aborter);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
return false;
}
mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
victim_thd->wsrep_aborter= bf_thd->thread_id;
victim_thd->awake_no_mutex(KILL_QUERY);
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
} else {
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
}
return ret;
}
......@@ -339,3 +361,15 @@ extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd)
else
return(global_system_variables.wsrep_OSU_method);
}
extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd)
{
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter called");
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
{
return true;
}
victim_thd->wsrep_aborter = bf_thd->thread_id;
return false;
}
\ No newline at end of file
......@@ -706,6 +706,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
wsrep_affected_rows(0),
wsrep_has_ignored_error(false),
wsrep_ignore_table(false),
wsrep_aborter(0),
/* wsrep-lib */
m_wsrep_next_trx_id(WSREP_UNDEFINED_TRX_ID),
......@@ -1308,6 +1309,7 @@ void THD::init()
wsrep_rbr_buf = NULL;
wsrep_affected_rows = 0;
m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID;
wsrep_aborter = 0;
#endif /* WITH_WSREP */
if (variables.sql_log_bin)
......@@ -2139,11 +2141,19 @@ void THD::reset_killed()
DBUG_ENTER("reset_killed");
if (killed != NOT_KILLED)
{
mysql_mutex_assert_not_owner(&LOCK_thd_kill);
mysql_mutex_lock(&LOCK_thd_kill);
killed= NOT_KILLED;
killed_err= 0;
mysql_mutex_unlock(&LOCK_thd_kill);
}
#ifdef WITH_WSREP
mysql_mutex_assert_not_owner(&LOCK_thd_data);
mysql_mutex_lock(&LOCK_thd_data);
wsrep_aborter= 0;
mysql_mutex_unlock(&LOCK_thd_data);
#endif /* WITH_WSREP */
DBUG_VOID_RETURN;
}
......
......@@ -5013,7 +5013,8 @@ class THD: public THD_count, /* this must be first */
table updates from being replicated to other nodes via galera replication.
*/
bool wsrep_ignore_table;
/* thread who has started kill for this THD protected by LOCK_thd_data*/
my_thread_id wsrep_aborter;
/*
Transaction id:
......
......@@ -9149,7 +9149,6 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
uint error= (type == KILL_TYPE_QUERY ? ER_NO_SUCH_QUERY : ER_NO_SUCH_THREAD);
DBUG_ENTER("kill_one_thread");
DBUG_PRINT("enter", ("id: %lld signal: %u", id, (uint) kill_signal));
WSREP_DEBUG("kill_one_thread %llu", thd->thread_id);
if (id && (tmp= find_thread_by_id(id, type == KILL_TYPE_QUERY)))
{
/*
......@@ -9182,12 +9181,29 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
thd->security_ctx->user_matches(tmp->security_ctx))
#endif /* WITH_WSREP */
{
#ifdef WITH_WSREP
DEBUG_SYNC(thd, "before_awake_no_mutex");
if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id)
{
/* victim is in hit list already, bail out */
WSREP_DEBUG("victim has wsrep aborter: %lu, skipping awake()",
tmp->wsrep_aborter);
error= 0;
}
else
#endif /* WITH_WSREP */
{
WSREP_DEBUG("kill_one_thread %llu, victim: %llu wsrep_aborter %llu by signal %d",
thd->thread_id, id, tmp->wsrep_aborter, kill_signal);
tmp->awake_no_mutex(kill_signal);
error=0;
WSREP_DEBUG("victim: %llu taken care of", id);
error= 0;
}
}
else
error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR :
ER_KILL_DENIED_ERROR);
if (WSREP(tmp)) mysql_mutex_unlock(&tmp->LOCK_thd_data);
mysql_mutex_unlock(&tmp->LOCK_thd_kill);
}
......
......@@ -177,7 +177,8 @@ static struct wsrep_service_st wsrep_handler = {
wsrep_thd_is_applying,
wsrep_OSU_method_get,
wsrep_thd_has_ignored_error,
wsrep_thd_set_ignored_error
wsrep_thd_set_ignored_error,
wsrep_thd_set_wsrep_aborter
};
static struct thd_specifics_service_st thd_specifics_handler=
......
......@@ -150,3 +150,5 @@ void wsrep_thd_set_ignored_error(THD*, my_bool)
{ }
ulong wsrep_OSU_method_get(const THD*)
{ return 0;}
bool wsrep_thd_set_wsrep_aborter(THD*, THD*)
{ return 0;}
\ No newline at end of file
......@@ -19020,10 +19020,17 @@ wsrep_innobase_kill_one_trx(THD* bf_thd, trx_t *victim_trx,
/* Mark transaction as a victim for Galera abort */
victim_trx->lock.was_chosen_as_wsrep_victim= true;
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
{
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
wsrep_thd_UNLOCK(thd);
DBUG_RETURN(0);
}
/* Note that we need to release this as it will be acquired
below in wsrep-lib */
wsrep_thd_UNLOCK(thd);
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
if (wsrep_thd_bf_abort(bf_thd, thd, signal))
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment