Commit f229ac07 authored by Dmitry Lenev's avatar Dmitry Lenev

Fix for bug #50998 "Deadlock in MDL code during test

rqg_mdl_stability".

When start of statement's waiting on a metadata lock 
created more than one loop in waiters graph server might 
have entered deadlock condition.

The problem was that in the case described above MDL deadlock 
detector had to perform several searches for deadlock but
forgot to reset Deadlock_detection_context before performing 
new search. 
Failure to do so has broken assumption in code resposible for 
choosing victim that if Deadlock_detection_context::victim
is set we also have read lock on m_waiting_for_lock for this
context. As result this lock could have been unlocked more
times than it was acquired which corrupted rwlock's state
which led to server deadlock.

This fix ensures that such reset is done before each attempt
to find a deadlock.
parent 73ef80f4
...@@ -2322,3 +2322,56 @@ set debug_sync= 'now SIGNAL go'; ...@@ -2322,3 +2322,56 @@ set debug_sync= 'now SIGNAL go';
# Reaping TRUNCATE TABLE. # Reaping TRUNCATE TABLE.
set debug_sync= 'RESET'; set debug_sync= 'RESET';
drop table t1; drop table t1;
#
# Test for bug #50998 "Deadlock in MDL code during test
# rqg_mdl_stability".
# Also provides coverage for the case when addition of
# waiting statement adds several loops in the waiters
# graph and therefore several searches for deadlock
# should be performed.
drop table if exists t1;
set debug_sync= 'RESET';
create table t1 (i int);
# Switching to connection 'con1'.
begin;
select * from t1;
i
# Switching to connection 'con2'.
begin;
select * from t1;
i
# Switching to connection 'default'.
# Start ALTER TABLE which will acquire SNW lock and
# table lock and get blocked on sync point.
set debug_sync= 'thr_multi_lock_after_thr_lock SIGNAL parked WAIT_FOR go';
# Sending:
alter table t1 add column j int;
# Switching to connection 'con1'.
# Wait until ALTER TABLE gets blocked on a sync point.
set debug_sync= 'now WAIT_FOR parked';
# Sending:
insert into t1 values (1);
# Switching to connection 'con2'.
# Sending:
insert into t1 values (1);
# Switching to connection 'con3'.
# Wait until both 'con1' and 'con2' are blocked trying to acquire
# SW lock on the table.
# Unblock ALTER TABLE. Since it will try to upgrade SNW to X lock
# deadlock with two loops in waiting graph will occur. Both loops
# should be found and DML statements in both 'con1' and 'con2'
# should be aborted with ER_LOCK_DEADLOCK errors.
set debug_sync= 'now SIGNAL go';
# Switching to connection 'con1'.
# Reaping INSERT. It should end with ER_LOCK_DEADLOCK error and
# not wait indefinitely (as it happened before the bugfix).
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
commit;
# Switching to connection 'con2'.
# Reaping INSERT.
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
commit;
# Switching to connection 'default'.
# Reap ALTER TABLE.
set debug_sync= 'RESET';
drop table t1;
...@@ -3374,6 +3374,95 @@ set debug_sync= 'RESET'; ...@@ -3374,6 +3374,95 @@ set debug_sync= 'RESET';
drop table t1; drop table t1;
--echo #
--echo # Test for bug #50998 "Deadlock in MDL code during test
--echo # rqg_mdl_stability".
--echo # Also provides coverage for the case when addition of
--echo # waiting statement adds several loops in the waiters
--echo # graph and therefore several searches for deadlock
--echo # should be performed.
--disable_warnings
drop table if exists t1;
--enable_warnings
set debug_sync= 'RESET';
connect (con1,localhost,root);
connect (con2,localhost,root);
connect (con3,localhost,root);
connection default;
create table t1 (i int);
--echo # Switching to connection 'con1'.
connection con1;
begin;
select * from t1;
--echo # Switching to connection 'con2'.
connection con2;
begin;
select * from t1;
--echo # Switching to connection 'default'.
connection default;
--echo # Start ALTER TABLE which will acquire SNW lock and
--echo # table lock and get blocked on sync point.
set debug_sync= 'thr_multi_lock_after_thr_lock SIGNAL parked WAIT_FOR go';
--echo # Sending:
--send alter table t1 add column j int
--echo # Switching to connection 'con1'.
connection con1;
--echo # Wait until ALTER TABLE gets blocked on a sync point.
set debug_sync= 'now WAIT_FOR parked';
--echo # Sending:
--send insert into t1 values (1)
--echo # Switching to connection 'con2'.
connection con2;
--echo # Sending:
--send insert into t1 values (1)
--echo # Switching to connection 'con3'.
connection con3;
--echo # Wait until both 'con1' and 'con2' are blocked trying to acquire
--echo # SW lock on the table.
let $wait_condition=
select count(*) = 2 from information_schema.processlist
where state = "Waiting for table" and info = "insert into t1 values (1)";
--source include/wait_condition.inc
--echo # Unblock ALTER TABLE. Since it will try to upgrade SNW to X lock
--echo # deadlock with two loops in waiting graph will occur. Both loops
--echo # should be found and DML statements in both 'con1' and 'con2'
--echo # should be aborted with ER_LOCK_DEADLOCK errors.
set debug_sync= 'now SIGNAL go';
--echo # Switching to connection 'con1'.
connection con1;
--echo # Reaping INSERT. It should end with ER_LOCK_DEADLOCK error and
--echo # not wait indefinitely (as it happened before the bugfix).
--error ER_LOCK_DEADLOCK
--reap
commit;
--echo # Switching to connection 'con2'.
connection con2;
--echo # Reaping INSERT.
--error ER_LOCK_DEADLOCK
--reap
commit;
--echo # Switching to connection 'default'.
connection default;
--echo # Reap ALTER TABLE.
--reap
disconnect con1;
disconnect con2;
disconnect con3;
connection default;
set debug_sync= 'RESET';
drop table t1;
# Check that all connections opened by test cases in this file are really # Check that all connections opened by test cases in this file are really
# gone so execution of other tests won't be affected by their presence. # gone so execution of other tests won't be affected by their presence.
--source include/wait_until_count_sessions.inc --source include/wait_until_count_sessions.inc
...@@ -1755,10 +1755,15 @@ bool MDL_context::find_deadlock(Deadlock_detection_context *deadlock_ctx) ...@@ -1755,10 +1755,15 @@ bool MDL_context::find_deadlock(Deadlock_detection_context *deadlock_ctx)
bool MDL_context::find_deadlock() bool MDL_context::find_deadlock()
{ {
Deadlock_detection_context deadlock_ctx(this);
while (1) while (1)
{ {
/*
The fact that we use fresh instance of deadlock_ctx for each
search performed by find_deadlock() below is important, code
responsible for victim selection relies on this.
*/
Deadlock_detection_context deadlock_ctx(this);
if (! find_deadlock(&deadlock_ctx)) if (! find_deadlock(&deadlock_ctx))
{ {
/* No deadlocks are found! */ /* No deadlocks are found! */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment