Commit 86c23fa7 authored by Dmitry Lenev's avatar Dmitry Lenev

Fix for bug #45143 "All connections hang on concurrent ALTER TABLE".

Concurrent execution of statements which require non-table-level
write locks on several instances of the same table (such as
SELECT ... FOR UPDATE which uses same InnoDB table twice or a DML
statement which invokes trigger which tries to update same InnoDB
table directly and through stored function) and statements which
required table-level locks on this table (e.g. LOCK TABLE ... WRITE,
ALTER TABLE, ...) might have resulted in a deadlock.

The problem occured when a thread tried to acquire write lock
(TL_WRITE_ALLOW_WRITE) on the table but had to wait since there was
a pending write lock (TL_WRITE, TL_WRITE_ALLOW_READ) on this table
and we failed to detect that this thread already had another instance
of write lock on it (so in fact we were trying to acquire recursive
lock) because there was also another thread holding write lock on the
table (also TL_WRITE_ALLOW_WRITE). When the latter thread released
its lock neither the first thread nor the thread trying to acquire
TL_WRITE/TL_WRITE_ALLOW_READ were woken up (as table was still write
locked by the first thread) so we ended up with a deadlock.

This patch solves this problem by ensuring that thread which
already has write lock on the table won't wait when it tries
to acquire second write lock on the same table.

mysql-test/r/lock_sync.result:
  Added test case for bug #45143 "All connections hang on concurrent
  ALTER TABLE".
mysql-test/t/lock_sync.test:
  Added test case for bug #45143 "All connections hang on concurrent
  ALTER TABLE".
mysys/thr_lock.c:
  Ensured that thread can acquire write lock on the table without
  waiting if it already has write lock on it even if there are other
  threads holding write locks on this table (this is normal situation
  for, e.g., TL_WRITE_ALLOW_WRITE type of lock).
  
  Adjusted comments to better explain why it is OK to do so and added
  asserts to prevent introduction of scenarios in which this can cause
  problems.
parent 90e2ad95
#
# Test for bug #45143 "All connections hang on concurrent ALTER TABLE".
#
# Concurrent execution of statements which required weak write lock
# (TL_WRITE_ALLOW_WRITE) on several instances of the same table and
# statements which tried to acquire stronger write lock (TL_WRITE,
# TL_WRITE_ALLOW_READ) on this table might have led to deadlock.
drop table if exists t1;
# Create auxiliary connections used through the test.
# Reset DEBUG_SYNC facility before using it.
set debug_sync= 'RESET';
# Turn off logging so calls to locking subsystem performed
# for general_log table won't interfere with our test.
set @old_general_log = @@global.general_log;
set @@global.general_log= OFF;
create table t1 (i int) engine=InnoDB;
insert into t1 values (1);
# Prepare user lock which will be used for resuming execution of
# the first statement after it acquires TL_WRITE_ALLOW_WRITE lock.
select get_lock("lock_bug45143_wait", 0);
get_lock("lock_bug45143_wait", 0)
1
# Switch to connection 'con_bug45143_1'.
# Sending:
insert into t1 values (get_lock("lock_bug45143_wait", 100));;
# Switch to connection 'con_bug45143_2'.
# Wait until the above INSERT takes TL_WRITE_ALLOW_WRITE lock on 't1'
# and then gets blocked on user lock 'lock_bug45143_wait'.
# Ensure that upcoming SELECT waits after acquiring TL_WRITE_ALLOW_WRITE
# lock for the first instance of 't1'.
set debug_sync='thr_multi_lock_after_thr_lock SIGNAL parked WAIT_FOR go';
# Sending:
select count(*) > 0 from t1 as a, t1 as b for update;;
# Switch to connection 'con_bug45143_3'.
# Wait until the above SELECT ... FOR UPDATE is blocked after
# acquiring lock for the the first instance of 't1'.
set debug_sync= 'now WAIT_FOR parked';
# Send LOCK TABLE statement which will try to get TL_WRITE lock on 't1':
lock table t1 write;;
# Switch to connection 'default'.
# Wait until this LOCK TABLES statement starts waiting for table lock.
# Allow SELECT ... FOR UPDATE to resume.
# Since it already has TL_WRITE_ALLOW_WRITE lock on the first instance
# of 't1' it should be able to get lock on the second instance without
# waiting, even although there is another thread which has such lock
# on this table and also there is a thread waiting for a TL_WRITE on it.
set debug_sync= 'now SIGNAL go';
# Switch to connection 'con_bug45143_2'.
# Reap SELECT ... FOR UPDATE
count(*) > 0
1
# Switch to connection 'default'.
# Resume execution of the INSERT statement.
select release_lock("lock_bug45143_wait");
release_lock("lock_bug45143_wait")
1
# Switch to connection 'con_bug45143_1'.
# Reap INSERT statement.
# Switch to connection 'con_bug45143_3'.
# Reap LOCK TABLES statement.
unlock tables;
# Switch to connection 'default'.
# Do clean-up.
set debug_sync= 'RESET';
set @@global.general_log= @old_general_log;
drop table t1;
#
# Locking related tests which use DEBUG_SYNC facility.
#
--source include/have_debug_sync.inc
# We need InnoDB to be able use TL_WRITE_ALLOW_WRITE type of locks in our tests.
--source include/have_innodb.inc
# Save the initial number of concurrent sessions.
--source include/count_sessions.inc
--echo #
--echo # Test for bug #45143 "All connections hang on concurrent ALTER TABLE".
--echo #
--echo # Concurrent execution of statements which required weak write lock
--echo # (TL_WRITE_ALLOW_WRITE) on several instances of the same table and
--echo # statements which tried to acquire stronger write lock (TL_WRITE,
--echo # TL_WRITE_ALLOW_READ) on this table might have led to deadlock.
--disable_warnings
drop table if exists t1;
--enable_warnings
--echo # Create auxiliary connections used through the test.
connect (con_bug45143_1,localhost,root,,test,,);
connect (con_bug45143_3,localhost,root,,test,,);
connect (con_bug45143_2,localhost,root,,test,,);
connection default;
--echo # Reset DEBUG_SYNC facility before using it.
set debug_sync= 'RESET';
--echo # Turn off logging so calls to locking subsystem performed
--echo # for general_log table won't interfere with our test.
set @old_general_log = @@global.general_log;
set @@global.general_log= OFF;
create table t1 (i int) engine=InnoDB;
insert into t1 values (1);
--echo # Prepare user lock which will be used for resuming execution of
--echo # the first statement after it acquires TL_WRITE_ALLOW_WRITE lock.
select get_lock("lock_bug45143_wait", 0);
--echo # Switch to connection 'con_bug45143_1'.
connection con_bug45143_1;
--echo # Sending:
--send insert into t1 values (get_lock("lock_bug45143_wait", 100));
--echo # Switch to connection 'con_bug45143_2'.
connection con_bug45143_2;
--echo # Wait until the above INSERT takes TL_WRITE_ALLOW_WRITE lock on 't1'
--echo # and then gets blocked on user lock 'lock_bug45143_wait'.
let $wait_condition= select count(*)= 1 from information_schema.processlist
where state= 'User lock' and
info='insert into t1 values (get_lock("lock_bug45143_wait", 100))';
--source include/wait_condition.inc
--echo # Ensure that upcoming SELECT waits after acquiring TL_WRITE_ALLOW_WRITE
--echo # lock for the first instance of 't1'.
set debug_sync='thr_multi_lock_after_thr_lock SIGNAL parked WAIT_FOR go';
--echo # Sending:
--send select count(*) > 0 from t1 as a, t1 as b for update;
--echo # Switch to connection 'con_bug45143_3'.
connection con_bug45143_3;
--echo # Wait until the above SELECT ... FOR UPDATE is blocked after
--echo # acquiring lock for the the first instance of 't1'.
set debug_sync= 'now WAIT_FOR parked';
--echo # Send LOCK TABLE statement which will try to get TL_WRITE lock on 't1':
--send lock table t1 write;
--echo # Switch to connection 'default'.
connection default;
--echo # Wait until this LOCK TABLES statement starts waiting for table lock.
let $wait_condition= select count(*)= 1 from information_schema.processlist
where state= 'Locked' and
info='lock table t1 write';
--source include/wait_condition.inc
--echo # Allow SELECT ... FOR UPDATE to resume.
--echo # Since it already has TL_WRITE_ALLOW_WRITE lock on the first instance
--echo # of 't1' it should be able to get lock on the second instance without
--echo # waiting, even although there is another thread which has such lock
--echo # on this table and also there is a thread waiting for a TL_WRITE on it.
set debug_sync= 'now SIGNAL go';
--echo # Switch to connection 'con_bug45143_2'.
connection con_bug45143_2;
--echo # Reap SELECT ... FOR UPDATE
--reap
--echo # Switch to connection 'default'.
connection default;
--echo # Resume execution of the INSERT statement.
select release_lock("lock_bug45143_wait");
--echo # Switch to connection 'con_bug45143_1'.
connection con_bug45143_1;
--echo # Reap INSERT statement.
--reap
--echo # Switch to connection 'con_bug45143_3'.
connection con_bug45143_3;
--echo # Reap LOCK TABLES statement.
--reap
unlock tables;
--echo # Switch to connection 'default'.
connection default;
--echo # Do clean-up.
disconnect con_bug45143_1;
disconnect con_bug45143_2;
disconnect con_bug45143_3;
set debug_sync= 'RESET';
set @@global.general_log= @old_general_log;
drop table t1;
# Check that all connections opened by test cases in this file are really
# gone so execution of other tests won't be affected by their presence.
--source include/wait_until_count_sessions.inc
...@@ -362,7 +362,7 @@ void thr_lock_data_init(THR_LOCK *lock,THR_LOCK_DATA *data, void *param) ...@@ -362,7 +362,7 @@ void thr_lock_data_init(THR_LOCK *lock,THR_LOCK_DATA *data, void *param)
static inline my_bool static inline my_bool
have_old_read_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner) has_old_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner)
{ {
for ( ; data ; data=data->next) for ( ; data ; data=data->next)
{ {
...@@ -572,7 +572,7 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, ...@@ -572,7 +572,7 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner,
else if (!lock->write_wait.data || else if (!lock->write_wait.data ||
lock->write_wait.data->type <= TL_WRITE_LOW_PRIORITY || lock->write_wait.data->type <= TL_WRITE_LOW_PRIORITY ||
lock_type == TL_READ_HIGH_PRIORITY || lock_type == TL_READ_HIGH_PRIORITY ||
have_old_read_lock(lock->read.data, data->owner)) has_old_lock(lock->read.data, data->owner)) /* Has old read lock */
{ /* No important write-locks */ { /* No important write-locks */
(*lock->read.last)=data; /* Add to running FIFO */ (*lock->read.last)=data; /* Add to running FIFO */
data->prev=lock->read.last; data->prev=lock->read.last;
...@@ -642,14 +642,36 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, ...@@ -642,14 +642,36 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner,
} }
/* /*
The following test will not work if the old lock was a The idea is to allow us to get a lock at once if we already have
TL_WRITE_ALLOW_WRITE, TL_WRITE_ALLOW_READ or TL_WRITE_DELAYED in a write lock or if there is no pending write locks and if all
the same thread, but this will never happen within MySQL. write locks are of TL_WRITE_ALLOW_WRITE type.
Note that, since lock requests for the same table are sorted in
such way that requests with higher thr_lock_type value come first,
lock being requested usually has equal or "weaker" type than one
which thread might have already acquired.
The exceptions are situations when:
- old lock type is TL_WRITE_ALLOW_READ and new lock type is
TL_WRITE_ALLOW_WRITE
- when old lock type is TL_WRITE_DELAYED
But these should never happen within MySQL.
Therefore it is OK to allow acquiring write lock on the table if
this thread already holds some write lock on it.
(INSERT INTO t1 VALUES (f1()), where f1() is stored function which
tries to update t1, is an example of statement which requests two
different types of write lock on the same table).
*/ */
if (thr_lock_owner_equal(data->owner, lock->write.data->owner) || DBUG_ASSERT(! has_old_lock(lock->write.data, data->owner) ||
(lock_type == TL_WRITE_ALLOW_WRITE && (lock_type <= lock->write.data->type &&
!lock->write_wait.data && ! ((lock_type < TL_WRITE_ALLOW_READ &&
lock->write.data->type == TL_WRITE_ALLOW_WRITE)) lock->write.data->type == TL_WRITE_ALLOW_READ) ||
lock->write.data->type == TL_WRITE_DELAYED)));
if ((lock_type == TL_WRITE_ALLOW_WRITE &&
! lock->write_wait.data &&
lock->write.data->type == TL_WRITE_ALLOW_WRITE) ||
has_old_lock(lock->write.data, data->owner))
{ {
/* /*
We have already got a write lock or all locks are We have already got a write lock or all locks are
...@@ -998,6 +1020,7 @@ thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_OWNER *owner) ...@@ -998,6 +1020,7 @@ thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_OWNER *owner)
thr_multi_unlock(data,(uint) (pos-data)); thr_multi_unlock(data,(uint) (pos-data));
DBUG_RETURN(result); DBUG_RETURN(result);
} }
DEBUG_SYNC_C("thr_multi_lock_after_thr_lock");
#ifdef MAIN #ifdef MAIN
printf("Thread: %s Got lock: 0x%lx type: %d\n",my_thread_name(), printf("Thread: %s Got lock: 0x%lx type: %d\n",my_thread_name(),
(long) pos[0]->lock, pos[0]->type); fflush(stdout); (long) pos[0]->lock, pos[0]->type); fflush(stdout);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment