Commit be811386 authored by Vlad Lesin's avatar Vlad Lesin

MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock

The code was backported from 10.6 bd03c0e5
commit. See that commit message for details.

Apart from the above commit trx_lock_t::wait_trx was also backported from
MDEV-24738. trx_lock_t::wait_trx is protected with lock_sys.wait_mutex
in 10.6, but that mutex was implemented only in MDEV-24789. As there is no
need to backport MDEV-24789 for MDEV-27025,
trx_lock_t::wait_trx is protected with the same mutexes as
trx_lock_t::wait_lock.

This fix should not break innodb-lock-schedule-algorithm=VATS. This
algorithm uses an Eldest-Transaction-First (ETF) heuristic, which prefers
older transactions over new ones. In this fix we just insert granted lock
just before the last granted lock of the same transaction, what does not
change transactions execution order.

The changes in lock_rec_create_low() should not break Galera Cluster,
there is a big "if" branch for WSREP. This branch is necessary to provide
the correct transactions execution order, and should not be changed for
the current bug fix.
parent e44439ab
#
# MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock
#
CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB;
connect prevent_purge,localhost,root,,;
start transaction with consistent snapshot;
connection default;
INSERT INTO t VALUES (20,20);
DELETE FROM t WHERE b = 20;
connect con_ins,localhost,root,,;
SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont';
INSERT INTO t VALUES(10, 20);
connect con_del,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked';
DELETE FROM t WHERE b = 20;
connection default;
SET DEBUG_SYNC = 'now WAIT_FOR del_locked';
SET DEBUG_SYNC = 'now SIGNAL ins_cont';
connection con_ins;
disconnect con_ins;
connection con_del;
disconnect con_del;
disconnect prevent_purge;
connection default;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
--source include/have_innodb.inc
--source include/count_sessions.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--echo #
--echo # MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock
--echo #
# The test checks the ability to acquire exclusive record lock if the acquiring
# transaction already holds a shared lock on the record and another transaction
# is waiting for a lock.
CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB;
--connect(prevent_purge,localhost,root,,)
start transaction with consistent snapshot;
--connection default
INSERT INTO t VALUES (20,20);
DELETE FROM t WHERE b = 20;
--connect(con_ins,localhost,root,,)
SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont';
send
INSERT INTO t VALUES(10, 20);
--connect(con_del,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked';
###############################################################################
# This DELETE creates waiting ORDINARY X-lock for heap_no 2 as the record is
# delete-marked, this lock conflicts with ORDINARY S-lock set by the the last
# INSERT. After the last INSERT creates insert-intention lock on
# heap_no 2, this lock will conflict with waiting ORDINARY X-lock of this
# DELETE, what causes DEADLOCK error for this DELETE.
###############################################################################
send
DELETE FROM t WHERE b = 20;
--connection default
SET DEBUG_SYNC = 'now WAIT_FOR del_locked';
SET DEBUG_SYNC = 'now SIGNAL ins_cont';
--connection con_ins
--reap
--disconnect con_ins
--connection con_del
# Without the fix, ER_LOCK_DEADLOCK would be reported here.
--reap
--disconnect con_del
--disconnect prevent_purge
--connection default
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
--source include/wait_until_count_sessions.inc
......@@ -283,7 +283,6 @@ connection default;
update t1 set b = 'foo';
connection con1;
update t1 set a = 'bar';
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
disconnect con1;
connection default;
drop table t1;
......
......@@ -186,7 +186,9 @@ send update t1 set b = 'foo';
connection con1;
let $wait_condition= select count(*) from information_schema.innodb_lock_waits;
source include/wait_condition.inc;
error ER_LOCK_DEADLOCK;
# There must no be DEADLOCK here as con1 transaction already holds locks, and
# default's transaction lock is waiting, so the locks of the following "UPDATE"
# must not conflict with waiting lock.
update t1 set a = 'bar';
disconnect con1;
connection default;
......
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2020, MariaDB Corporation.
Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -28,8 +28,29 @@ Created 5/20/1997 Heikki Tuuri
#include "ut0rnd.h"
struct hash_table_t;
struct hash_cell_t{
void* node; /*!< hash chain node, NULL if none */
struct hash_cell_t
{
/** singly-linked, nullptr terminated list of hash buckets */
void *node;
/** Insert an element after another.
@tparam T type of the element
@param after the element after which to insert
@param insert the being-inserted element
@param next the next-element pointer in T */
template<typename T>
void insert_after(T &after, T &insert, T *T::*next)
{
#ifdef UNIV_DEBUG
for (const T *c= static_cast<const T*>(node); c; c= c->*next)
if (c == &after)
goto found;
ut_error;
found:
#endif
insert.*next= after.*next;
after.*next= &insert;
}
};
typedef void* hash_node_t;
......
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2021, MariaDB Corporation.
Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -832,26 +832,29 @@ class lock_sys_t
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
@param[in] c_lock conflicting lock
@param[in] thr thread owning trx
@param[in] type_mode lock mode and wait flag, type is ignored and replaced by
LOCK_REC
@param[in] block buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] index index of record
@param[in,out] trx transaction
@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
UNIV_INLINE
lock_t*
lock_rec_create(
/*============*/
lock_t *lock_rec_create(lock_t *c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
que_thr_t *thr,
#endif
unsigned type_mode,/*!< in: lock mode and wait
flag, type is ignored and
replaced by LOCK_REC */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in,out: transaction */
bool caller_owns_trx_mutex);
/*!< in: true if caller owns
trx mutex */
unsigned type_mode, const buf_block_t *block,
ulint heap_no, dict_index_t *index, trx_t *trx,
bool caller_owns_trx_mutex,
bool insert_before_waiting= false);
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue. */
......@@ -864,6 +867,7 @@ lock_rec_discard(
/** Create a new record lock and inserts it to the lock queue,
without checking for deadlocks or conflicts.
@param[in] c_lock conflicting lock
@param[in] type_mode lock mode and wait flag; type will be replaced
with LOCK_REC
@param[in] page_id index page number
......@@ -872,11 +876,15 @@ without checking for deadlocks or conflicts.
@param[in] index the index tree
@param[in,out] trx transaction
@param[in] holds_trx_mutex whether the caller holds trx->mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
lock_t*
lock_rec_create_low(
lock_t* c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
#endif
unsigned type_mode,
......@@ -885,7 +893,9 @@ lock_rec_create_low(
ulint heap_no,
dict_index_t* index,
trx_t* trx,
bool holds_trx_mutex);
bool holds_trx_mutex,
bool insert_before_waiting = false);
/** Enqueue a waiting request for a lock which cannot be granted immediately.
Check for deadlocks.
@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
......@@ -906,9 +916,7 @@ Check for deadlocks.
(or it happened to commit) */
dberr_t
lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
#endif
unsigned type_mode,
const buf_block_t* block,
ulint heap_no,
......
......@@ -72,32 +72,35 @@ lock_hash_get(
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
@param[in] c_lock conflicting lock
@param[in] thr thread owning trx
@param[in] type_mode lock mode and wait flag, type is ignored and replaced by
LOCK_REC
@param[in] block buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] index index of record
@param[in,out] trx transaction
@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
UNIV_INLINE
lock_t*
lock_rec_create(
/*============*/
lock_t *lock_rec_create(lock_t *c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
que_thr_t *thr,
#endif
unsigned type_mode,/*!< in: lock mode and wait
flag, type is ignored and
replaced by LOCK_REC */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in,out: transaction */
bool caller_owns_trx_mutex)
/*!< in: TRUE if caller owns
trx mutex */
unsigned type_mode, const buf_block_t *block,
ulint heap_no, dict_index_t *index, trx_t *trx,
bool caller_owns_trx_mutex,
bool insert_before_waiting)
{
btr_assert_not_corrupted(block, index);
return lock_rec_create_low(
return lock_rec_create_low(c_lock,
#ifdef WITH_WSREP
c_lock, thr,
thr,
#endif
type_mode, block->page.id(), block->frame, heap_no,
index, trx, caller_owns_trx_mutex);
index, trx, caller_owns_trx_mutex, insert_before_waiting);
}
......@@ -562,14 +562,13 @@ lock_rec_get_next_const(
/*********************************************************************//**
Gets the first explicit lock request on a record.
@param[in] hash hash chain the lock on
@param[in] page_id page id
@param[in] heap_no heap number of the record
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first(
/*===============*/
hash_table_t* hash, /*!< in: hash chain the lock on */
const buf_block_t* block, /*!< in: block containing the record */
ulint heap_no);/*!< in: heap number of the record */
lock_t *lock_rec_get_first(hash_table_t *hash, page_id_t page_id,
ulint heap_no);
/*********************************************************************//**
Gets the mode of a lock.
......@@ -623,15 +622,26 @@ lock_table_has(
/** Set the wait status of a lock.
@param[in,out] lock lock that will be waited for
@param[in,out] trx transaction that will wait for the lock */
inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx)
@param[in,out] trx transaction that will wait for the lock
@param[in] c_lock conflicting lock */
inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx,
const lock_t *c_lock)
{
ut_ad(lock);
ut_ad(lock->trx == trx);
ut_ad(trx->lock.wait_lock == NULL);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
if (trx->lock.wait_trx) {
ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx);
ut_ad(trx->lock.wait_lock);
ut_ad((*trx->lock.wait_lock).trx == trx);
} else {
ut_ad(c_lock);
trx->lock.wait_trx = c_lock->trx;
ut_ad(!trx->lock.wait_lock);
}
trx->lock.wait_lock = lock;
lock->type_mode |= LOCK_WAIT;
}
......@@ -644,6 +654,7 @@ inline void lock_reset_lock_and_trx_wait(lock_t* lock)
ut_ad(lock_mutex_own());
ut_ad(lock->trx->lock.wait_lock == NULL
|| lock->trx->lock.wait_lock == lock);
lock->trx->lock.wait_trx= nullptr;
lock->trx->lock.wait_lock = NULL;
lock->type_mode &= ~LOCK_WAIT;
}
......
......@@ -156,16 +156,15 @@ lock_rec_get_next_const(
/*********************************************************************//**
Gets the first explicit lock request on a record.
@return first lock, NULL if none exists */
@param[in] hash hash chain the lock on
@param[in] page_id page id
@param[in] heap_no heap number of the record
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first(
/*===============*/
hash_table_t* hash, /*!< in: hash chain the lock on */
const buf_block_t* block, /*!< in: block containing the record */
ulint heap_no)/*!< in: heap number of the record */
lock_t *lock_rec_get_first(hash_table_t *hash, page_id_t page_id,
ulint heap_no)
{
for (lock_t *lock= lock_sys.get_first(*hash, block->page.id());
for (lock_t *lock= lock_sys.get_first(*hash, page_id);
lock; lock= lock_rec_get_next_on_page(lock))
if (lock_rec_get_nth_bit(lock, heap_no))
return lock;
......
......@@ -426,7 +426,9 @@ struct trx_lock_t {
trx_que_t que_state; /*!< valid when trx->state
== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
TRX_QUE_LOCK_WAIT, ... */
/** Transaction being waited for; protected by the same mutexes as
wait_lock */
trx_t* wait_trx;
lock_t* wait_lock; /*!< if trx execution state is
TRX_QUE_LOCK_WAIT, this points to
the lock request, otherwise this is
......
This diff is collapsed.
......@@ -243,7 +243,7 @@ lock_prdt_has_lock(
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
for (lock = lock_rec_get_first(
lock_hash_get(type_mode), block, PRDT_HEAPNO);
lock_hash_get(type_mode), block->page.id(), PRDT_HEAPNO);
lock != NULL;
lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
ut_ad(lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
......@@ -298,7 +298,7 @@ lock_prdt_other_has_conflicting(
ut_ad(lock_mutex_own());
for (lock_t* lock = lock_rec_get_first(
lock_hash_get(mode), block, PRDT_HEAPNO);
lock_hash_get(mode), block->page.id(), PRDT_HEAPNO);
lock != NULL;
lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
......@@ -489,9 +489,13 @@ lock_prdt_add_to_queue(
}
}
lock = lock_rec_create(
/* Note: We will not pass any conflicting lock to lock_rec_create(),
because we should be moving an existing waiting lock request. */
ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx);
lock = lock_rec_create(NULL,
#ifdef WITH_WSREP
NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
type_mode, block, PRDT_HEAPNO, index, trx,
caller_owns_trx_mutex);
......@@ -543,7 +547,8 @@ lock_prdt_insert_check_and_lock(
lock_t* lock;
/* Only need to check locks on prdt_hash */
lock = lock_rec_get_first(&lock_sys.prdt_hash, block, PRDT_HEAPNO);
lock = lock_rec_get_first(&lock_sys.prdt_hash, block->page.id(),
PRDT_HEAPNO);
if (lock == NULL) {
lock_mutex_exit();
......@@ -581,9 +586,7 @@ lock_prdt_insert_check_and_lock(
trx_mutex_enter(trx);
err = lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION,
block, PRDT_HEAPNO, index, thr, prdt);
......@@ -822,9 +825,9 @@ lock_prdt_lock(
lock_t* lock = lock_sys.get_first(hash, block->page.id());
if (lock == NULL) {
lock = lock_rec_create(
lock = lock_rec_create(NULL,
#ifdef WITH_WSREP
NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
prdt_mode, block, PRDT_HEAPNO,
index, trx, FALSE);
......@@ -854,10 +857,8 @@ lock_prdt_lock(
if (wait_for != NULL) {
err = lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
NULL, /* FIXME: replicate
SPATIAL INDEX locks */
#endif
prdt_mode,
block, PRDT_HEAPNO,
index, thr, prdt);
......@@ -937,9 +938,9 @@ lock_place_prdt_page_lock(
}
if (lock == NULL) {
lock = lock_rec_create_low(
lock = lock_rec_create_low(NULL,
#ifdef WITH_WSREP
NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
mode, page_id, NULL, PRDT_HEAPNO,
index, trx, FALSE);
......@@ -985,7 +986,7 @@ lock_prdt_rec_move(
lock_mutex_enter();
for (lock_t *lock = lock_rec_get_first(&lock_sys.prdt_hash,
donator, PRDT_HEAPNO);
donator->page.id(), PRDT_HEAPNO);
lock != NULL;
lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment