Commit 1c7d4f8d authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25016 Race condition between lock_sys_t::cancel() and page split or merge

In commit 8d16da14 (MDEV-24789)
we accidentally introduced a race condition. During the time a
waiting lock request is being removed, the request might be
moved to another page due to a concurrent page split or merge.
To prevent this, we must hold exclusive lock_sys.latch when releasing
a record lock.

lock_release_autoinc_locks(): Avoid a potential hang.
No dict_table_t::lock_mutex must be waited for while already holding
lock_sys.wait_mutex or trx_t::mutex.

lock_cancel_waiting_and_release(): Correctly handle AUTO_INCREMENT locks.
parent 80ac9ec1
......@@ -801,9 +801,13 @@ class lock_sys_t
void deadlock_check();
/** Cancel a waiting lock request.
@param lock waiting lock request
@param trx active transaction */
static void cancel(trx_t *trx, lock_t *lock);
@param lock waiting lock request
@param trx active transaction
@param check_victim whether to check trx->lock.was_chosen_as_deadlock_victim
@retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */
static dberr_t cancel(trx_t *trx, lock_t *lock, bool check_victim);
/** Cancel a waiting lock request (if any) when killing a transaction */
static void cancel(trx_t *trx);
......
......@@ -1815,19 +1815,7 @@ dberr_t lock_wait(que_thr_t *thr)
end_wait:
if (lock_t *lock= trx->lock.wait_lock)
{
if (lock_sys.rd_lock_try())
cancel:
lock_sys_t::cancel(trx, lock);
else
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.rd_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
lock= trx->lock.wait_lock;
if (lock)
goto cancel;
}
lock_sys.rd_unlock();
lock_sys_t::cancel(trx, lock, false);
lock_sys.deadlock_check();
}
......@@ -3569,10 +3557,12 @@ void lock_table_x_unlock(dict_table_t *table, trx_t *trx)
continue;
lock_sys.rd_lock(SRW_LOCK_CALL);
table->lock_mutex_lock();
mysql_mutex_lock(&lock_sys.wait_mutex);
trx->mutex_lock();
/* There is no need to check for new deadlocks, because only one
exclusive lock may exist on an object at a time. */
lock_table_dequeue(lock, false);
lock_table_dequeue(lock, true);
mysql_mutex_unlock(&lock_sys.wait_mutex);
trx->mutex_unlock();
table->lock_mutex_unlock();
lock_sys.rd_unlock();
......@@ -5356,33 +5346,29 @@ lock_trx_holds_autoinc_locks(
}
/** Release all AUTO_INCREMENT locks of the transaction. */
static void lock_release_autoinc_locks(trx_t *trx, bool owns_wait_mutex)
static void lock_release_autoinc_locks(trx_t *trx)
{
#ifdef SAFE_MUTEX
ut_ad(owns_wait_mutex == mysql_mutex_is_owner(&lock_sys.wait_mutex));
#endif /* SAFE_MUTEX */
ut_ad(trx->mutex_is_owner());
/* Because this is invoked for a running transaction by the thread
that is serving the transaction, it is not necessary to hold trx->mutex. */
auto autoinc_locks= trx->autoinc_locks;
ut_a(autoinc_locks);
/* We release the locks in the reverse order. This is to avoid
searching the vector for the element to delete at the lower level.
See (lock_table_remove_low()) for details. */
while (ulint size= ib_vector_size(autoinc_locks))
{
lock_t *lock= *static_cast<lock_t**>
(ib_vector_get(autoinc_locks, size - 1));
ut_ad(lock->type_mode == (LOCK_AUTO_INC | LOCK_TABLE));
dict_table_t *table= lock->un_member.tab_lock.table;
if (!owns_wait_mutex)
table->lock_mutex_lock();
lock_table_dequeue(lock, owns_wait_mutex);
lock_trx_table_locks_remove(lock);
if (!owns_wait_mutex)
table->lock_mutex_unlock();
LockMutexGuard g{SRW_LOCK_CALL};
mysql_mutex_lock(&lock_sys.wait_mutex);
trx->mutex_lock();
auto autoinc_locks= trx->autoinc_locks;
ut_a(autoinc_locks);
/* We release the locks in the reverse order. This is to avoid
searching the vector for the element to delete at the lower level.
See (lock_table_remove_low()) for details. */
while (ulint size= ib_vector_size(autoinc_locks))
{
lock_t *lock= *static_cast<lock_t**>
(ib_vector_get(autoinc_locks, size - 1));
ut_ad(lock->type_mode == (LOCK_AUTO_INC | LOCK_TABLE));
lock_table_dequeue(lock, true);
lock_trx_table_locks_remove(lock);
}
}
mysql_mutex_unlock(&lock_sys.wait_mutex);
trx->mutex_unlock();
}
/** Cancel a waiting lock request and release possibly waiting transactions */
......@@ -5398,8 +5384,11 @@ static void lock_cancel_waiting_and_release(lock_t *lock)
lock_rec_dequeue_from_page(lock, true);
else
{
if (trx->autoinc_locks)
lock_release_autoinc_locks(trx, true);
if (lock->type_mode == (LOCK_AUTO_INC | LOCK_TABLE))
{
ut_ad(trx->autoinc_locks);
ib_vector_remove(trx->autoinc_locks, lock);
}
lock_table_dequeue(lock, true);
/* Remove the lock from table lock vector too. */
lock_trx_table_locks_remove(lock);
......@@ -5414,68 +5403,94 @@ static void lock_cancel_waiting_and_release(lock_t *lock)
/** Cancel a waiting lock request.
@param lock waiting lock request
@param trx active transaction */
void lock_sys_t::cancel(trx_t *trx, lock_t *lock)
@param trx active transaction
@param check_victim whether to check trx->lock.was_chosen_as_deadlock_victim
@retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */
dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock, bool check_victim)
{
mysql_mutex_assert_owner(&lock_sys.wait_mutex);
ut_ad(trx->lock.wait_lock == lock);
ut_ad(trx->state == TRX_STATE_ACTIVE);
dberr_t err= DB_SUCCESS;
if (lock->is_table())
{
dict_table_t *table= lock->un_member.tab_lock.table;
table->lock_mutex_lock();
if (lock->is_waiting())
lock_cancel_waiting_and_release(lock);
table->lock_mutex_unlock();
if (!lock_sys.rd_lock_try())
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.rd_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
lock= trx->lock.wait_lock;
if (!lock);
else if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
else
goto resolve_table_lock;
}
else
{
resolve_table_lock:
dict_table_t *table= lock->un_member.tab_lock.table;
table->lock_mutex_lock();
if (lock->is_waiting())
lock_cancel_waiting_and_release(lock);
table->lock_mutex_unlock();
/* Even if lock->is_waiting() did not hold above, we must return
DB_LOCK_WAIT, or otherwise optimistic parallel replication could
occasionally hang. Potentially affected tests:
rpl.rpl_parallel_optimistic
rpl.rpl_parallel_optimistic_nobinlog
rpl.rpl_parallel_optimistic_xa_lsu_off */
err= DB_LOCK_WAIT;
}
lock_sys.rd_unlock();
}
else
{
auto latch= lock_sys_t::hash_table::latch
(lock_sys.hash_get(lock->type_mode).cell_get
(lock->un_member.rec_lock.page_id.fold()));
latch->acquire();
if (lock->is_waiting())
lock_cancel_waiting_and_release(lock);
latch->release();
/* To prevent the record lock from being moved between pages
during a page split or merge, we must hold exclusive lock_sys.latch. */
if (!lock_sys.wr_lock_try())
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
lock= trx->lock.wait_lock;
if (!lock);
else if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
else
goto resolve_record_lock;
}
else
{
resolve_record_lock:
if (lock->is_waiting())
lock_cancel_waiting_and_release(lock);
/* Even if lock->is_waiting() did not hold above, we must return
DB_LOCK_WAIT, or otherwise optimistic parallel replication could
occasionally hang. Potentially affected tests:
rpl.rpl_parallel_optimistic
rpl.rpl_parallel_optimistic_nobinlog
rpl.rpl_parallel_optimistic_xa_lsu_off */
err= DB_LOCK_WAIT;
}
lock_sys.wr_unlock();
}
return err;
}
/** Cancel a waiting lock request (if any) when killing a transaction */
void lock_sys_t::cancel(trx_t *trx)
{
#ifdef HAVE_REPLICATION /* Work around MDEV-25016 (FIXME: remove this!) */
/* Parallel replication tests would occasionally hang if we did not
acquire exclusive lock_sys.latch here. This is not a real fix, but a
work-around!
It would be nice if thd_need_wait_reports() did not hold when no
parallel replication is in use, and only the binlog is enabled. */
if (innodb_deadlock_detect && thd_need_wait_reports(trx->mysql_thd))
{
lock_sys.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
if (lock_t *lock= trx->lock.wait_lock)
{
trx->error_state= DB_INTERRUPTED;
cancel(trx, lock);
}
lock_sys.wr_unlock();
goto func_exit;
}
#endif
lock_sys.rd_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
if (lock_t *lock= trx->lock.wait_lock)
{
trx->error_state= DB_INTERRUPTED;
cancel(trx, lock);
cancel(trx, lock, false);
}
lock_sys.rd_unlock();
#ifdef HAVE_REPLICATION
func_exit:
#endif
lock_sys.deadlock_check();
mysql_mutex_unlock(&lock_sys.wait_mutex);
}
......@@ -5504,11 +5519,7 @@ lock_unlock_table_autoinc(
necessary to hold trx->mutex here. */
if (lock_trx_holds_autoinc_locks(trx)) {
lock_sys.rd_lock(SRW_LOCK_CALL);
trx->mutex_lock();
lock_release_autoinc_locks(trx, false);
trx->mutex_unlock();
lock_sys.rd_unlock();
lock_release_autoinc_locks(trx);
}
}
......@@ -5530,28 +5541,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx)
if (trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
else if (lock_t *wait_lock= trx->lock.wait_lock)
{
if (!lock_sys.rd_lock_try())
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.rd_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.was_chosen_as_deadlock_victim)
{
err= DB_DEADLOCK;
goto release;
}
wait_lock= trx->lock.wait_lock;
if (!wait_lock)
goto release;
}
err= DB_LOCK_WAIT;
lock_sys_t::cancel(trx, wait_lock);
release:
lock_sys.rd_unlock();
}
err= lock_sys_t::cancel(trx, wait_lock, true);
lock_sys.deadlock_check();
mysql_mutex_unlock(&lock_sys.wait_mutex);
return err;
......@@ -5954,20 +5944,7 @@ static bool Deadlock::check_and_resolve(trx_t *trx)
return false;
if (lock_t *wait_lock= trx->lock.wait_lock)
{
if (!lock_sys.rd_lock_try())
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.rd_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
wait_lock= trx->lock.wait_lock;
if (!wait_lock)
goto resolved;
}
lock_sys_t::cancel(trx, wait_lock);
resolved:
lock_sys.rd_unlock();
}
lock_sys_t::cancel(trx, wait_lock, false);
lock_sys.deadlock_check();
return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment