Commit 43b239a0 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24915 Galera conflict resolution is unnecessarily complex

The fix of MDEV-23328 introduced a background thread for
killing conflicting transactions.
Thanks to the refactoring that was conducted in MDEV-24671,
the high-priority ("brute-force") applier thread can kill the
conflicting transactions itself, before waiting for the
locks to be finally released (after the conflicting transactions
have been rolled back).

This also allows us to remove the hack LockGGuard that had to
be added in MDEV-20612, and remove Galera-related function
parameters from lock creation.
parent 18dc5b01
...@@ -17991,162 +17991,66 @@ static struct st_mysql_storage_engine innobase_storage_engine= ...@@ -17991,162 +17991,66 @@ static struct st_mysql_storage_engine innobase_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION }; { MYSQL_HANDLERTON_INTERFACE_VERSION };
#ifdef WITH_WSREP #ifdef WITH_WSREP
/** Request a transaction to be killed that holds a conflicting lock.
struct bg_wsrep_kill_trx_arg { @param bf_trx brute force applier transaction
my_thread_id thd_id, bf_thd_id; @param thd_id thd_get_thread_id(victim_trx->mysql_htd)
trx_id_t trx_id, bf_trx_id; @param trx_id victim_trx->id */
bool signal; void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id)
};
/** Kill one transaction from a background manager thread
wsrep_innobase_kill_one_trx() is invoked when lock_sys.mutex and trx mutex
are taken, wsrep_thd_bf_abort() cannot be used there as it takes THD mutexes
that must be taken before lock_sys.mutex and trx mutex. That's why
wsrep_innobase_kill_one_trx only posts the killing task to the manager thread
and the actual killing happens asynchronously here.
As no mutexes were held we don't know whether THD or trx pointers are still
valid, so we need to pass thread/trx ids and perform a lookup.
*/
static void bg_wsrep_kill_trx(void *void_arg)
{ {
bg_wsrep_kill_trx_arg *arg= (bg_wsrep_kill_trx_arg *)void_arg; THD *bf_thd= bf_trx->mysql_thd;
THD *thd, *bf_thd;
trx_t *victim_trx;
bool aborting= false;
if ((bf_thd= find_thread_by_id(arg->bf_thd_id)))
wsrep_thd_LOCK(bf_thd);
if ((thd= find_thread_by_id(arg->thd_id)))
wsrep_thd_LOCK(thd);
if (!thd || !bf_thd || !(victim_trx= thd_to_trx(thd)))
goto ret0;
if (THD *vthd= find_thread_by_id(thd_id))
{
bool aborting= false;
wsrep_thd_LOCK(vthd);
if (trx_t *vtrx= thd_to_trx(vthd))
{
lock_sys.wr_lock(SRW_LOCK_CALL); lock_sys.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
victim_trx->mutex_lock(); vtrx->mutex_lock();
if (victim_trx->id != arg->trx_id || if (vtrx->id == trx_id && vtrx->state == TRX_STATE_ACTIVE)
victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
{ {
/* apparently victim trx was meanwhile rolled back. */ WSREP_LOG_CONFLICT(bf_thd, vthd, TRUE);
goto ret1; WSREP_DEBUG("Aborter BF trx_id: " TRX_ID_FMT " thread: %ld "
} "seqno: %lld client_state: %s "
"client_mode: %s transaction_mode: %s query: %s",
DBUG_ASSERT(wsrep_on(bf_thd)); bf_trx->id,
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
"query: %s",
wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
arg->bf_trx_id,
thd_get_thread_id(bf_thd), thd_get_thread_id(bf_thd),
wsrep_thd_trx_seqno(bf_thd), wsrep_thd_trx_seqno(bf_thd),
wsrep_thd_client_state_str(bf_thd), wsrep_thd_client_state_str(bf_thd),
wsrep_thd_client_mode_str(bf_thd), wsrep_thd_client_mode_str(bf_thd),
wsrep_thd_transaction_state_str(bf_thd), wsrep_thd_transaction_state_str(bf_thd),
wsrep_thd_query(bf_thd)); wsrep_thd_query(bf_thd));
WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld " WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld "
"seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " "seqno: %lld client_state: %s "
"query: %s", "client_mode: %s transaction_mode: %s query: %s",
wsrep_thd_is_BF(thd, false) ? "BF" : "normal", wsrep_thd_is_BF(vthd, false) ? "BF" : "normal",
victim_trx->id, vtrx->id,
thd_get_thread_id(thd), thd_get_thread_id(vthd),
wsrep_thd_trx_seqno(thd), wsrep_thd_trx_seqno(vthd),
wsrep_thd_client_state_str(thd), wsrep_thd_client_state_str(vthd),
wsrep_thd_client_mode_str(thd), wsrep_thd_client_mode_str(vthd),
wsrep_thd_transaction_state_str(thd), wsrep_thd_transaction_state_str(vthd),
wsrep_thd_query(thd)); wsrep_thd_query(vthd));
/* Mark transaction as a victim for Galera abort */ /* Mark transaction as a victim for Galera abort */
victim_trx->lock.was_chosen_as_deadlock_victim.fetch_or(2); vtrx->lock.was_chosen_as_deadlock_victim.fetch_or(2);
if (wsrep_thd_set_wsrep_aborter(bf_thd, thd)) if (!wsrep_thd_set_wsrep_aborter(bf_thd, vthd))
{
WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
goto ret1;
}
aborting= true; aborting= true;
else
ret1: WSREP_DEBUG("kill transaction skipped due to wsrep_aborter set");
victim_trx->mutex_unlock(); }
lock_sys.wr_unlock(); lock_sys.wr_unlock();
mysql_mutex_unlock(&lock_sys.wait_mutex); mysql_mutex_unlock(&lock_sys.wait_mutex);
ret0: vtrx->mutex_unlock();
if (thd) {
wsrep_thd_UNLOCK(thd);
if (aborting) {
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
wsrep_thd_bf_abort(bf_thd, thd, arg->signal);
} }
wsrep_thd_kill_UNLOCK(thd); wsrep_thd_UNLOCK(vthd);
if (aborting)
{
DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
wsrep_thd_bf_abort(bf_thd, vthd, true);
} }
if (bf_thd) { wsrep_thd_kill_UNLOCK(vthd);
wsrep_thd_UNLOCK(bf_thd);
wsrep_thd_kill_UNLOCK(bf_thd);
} }
free(arg);
}
/** This function is used to kill one transaction.
This transaction was open on this node (not-yet-committed), and a
conflicting writeset from some other node that was being applied
caused a locking conflict. First committed (from other node)
wins, thus open transaction is rolled back. BF stands for
brute-force: any transaction can get aborted by galera any time
it is necessary.
This conflict can happen only when the replicated writeset (from
other node) is being applied, not when it’s waiting in the queue.
If our local transaction reached its COMMIT and this conflicting
writeset was in the queue, then it should fail the local
certification test instead.
A brute force abort is only triggered by a locking conflict
between a writeset being applied by an applier thread (slave thread)
and an open transaction on the node, not by a Galera writeset
comparison as in the local certification failure.
@param[in] bf_thd Brute force (BF) thread
@param[in,out] victim_trx Vimtim trx to be killed
@param[in] signal Should victim be signaled */
void
wsrep_innobase_kill_one_trx(
THD* bf_thd,
trx_t *victim_trx,
bool signal)
{
ut_ad(bf_thd);
ut_ad(victim_trx);
ut_ad(victim_trx->mutex_is_owner());
DBUG_ENTER("wsrep_innobase_kill_one_trx");
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
{
const char act[]=
"now "
"SIGNAL sync.before_wsrep_thd_abort_reached "
"WAIT_FOR signal.before_wsrep_thd_abort";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);
trx_t* bf_trx= thd_to_trx(bf_thd);
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
arg->trx_id = victim_trx->id;
arg->bf_thd_id = thd_get_thread_id(bf_thd);
arg->bf_trx_id = bf_trx ? bf_trx->id : TRX_ID_MAX;
arg->signal = signal;
mysql_manager_submit(bg_wsrep_kill_trx, arg);
DBUG_VOID_RETURN;
} }
/** This function forces the victim transaction to abort. Aborting the /** This function forces the victim transaction to abort. Aborting the
......
...@@ -209,7 +209,6 @@ innobase_casedn_str( ...@@ -209,7 +209,6 @@ innobase_casedn_str(
char* a); /*!< in/out: string to put in lower case */ char* a); /*!< in/out: string to put in lower case */
#ifdef WITH_WSREP #ifdef WITH_WSREP
void wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal);
ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
unsigned char* str, ulint str_length, unsigned char* str, ulint str_length,
unsigned int buf_length); unsigned int buf_length);
......
...@@ -580,7 +580,6 @@ class lock_sys_t ...@@ -580,7 +580,6 @@ class lock_sys_t
{ {
friend struct LockGuard; friend struct LockGuard;
friend struct LockMultiGuard; friend struct LockMultiGuard;
friend struct LockGGuard;
/** Hash table latch */ /** Hash table latch */
struct hash_latch struct hash_latch
...@@ -920,18 +919,6 @@ struct LockGuard ...@@ -920,18 +919,6 @@ struct LockGuard
lock_sys_t::hash_latch *latch; lock_sys_t::hash_latch *latch;
}; };
#ifdef WITH_WSREP
/** lock_sys.latch guard for a page_id_t shard */
struct LockGGuard
{
LockGGuard(lock_sys_t::hash_table &hash, const page_id_t id, bool all);
~LockGGuard();
private:
/** The hash bucket (nullptr if all of them) */
lock_sys_t::hash_latch *latch;
};
#endif
/** lock_sys.latch guard for 2 page_id_t shards */ /** lock_sys.latch guard for 2 page_id_t shards */
struct LockMultiGuard struct LockMultiGuard
{ {
...@@ -952,9 +939,6 @@ lock_t* ...@@ -952,9 +939,6 @@ lock_t*
lock_rec_create( lock_rec_create(
/*============*/ /*============*/
lock_t* c_lock, /*!< conflicting lock */ lock_t* c_lock, /*!< conflicting lock */
#ifdef WITH_WSREP
que_thr_t* thr, /*!< thread owning trx */
#endif
unsigned type_mode,/*!< in: lock mode and wait flag */ unsigned type_mode,/*!< in: lock mode and wait flag */
const buf_block_t* block, /*!< in: buffer block containing const buf_block_t* block, /*!< in: buffer block containing
the record */ the record */
...@@ -984,9 +968,6 @@ without checking for deadlocks or conflicts. ...@@ -984,9 +968,6 @@ without checking for deadlocks or conflicts.
lock_t* lock_t*
lock_rec_create_low( lock_rec_create_low(
lock_t* c_lock, lock_t* c_lock,
#ifdef WITH_WSREP
que_thr_t* thr, /*!< thread owning trx */
#endif
unsigned type_mode, unsigned type_mode,
const page_id_t page_id, const page_id_t page_id,
const page_t* page, const page_t* page,
......
...@@ -61,9 +61,6 @@ lock_t* ...@@ -61,9 +61,6 @@ lock_t*
lock_rec_create( lock_rec_create(
/*============*/ /*============*/
lock_t* c_lock, /*!< conflicting lock */ lock_t* c_lock, /*!< conflicting lock */
#ifdef WITH_WSREP
que_thr_t* thr, /*!< thread owning trx */
#endif
unsigned type_mode,/*!< in: lock mode and wait flag */ unsigned type_mode,/*!< in: lock mode and wait flag */
const buf_block_t* block, /*!< in: buffer block containing const buf_block_t* block, /*!< in: buffer block containing
the record */ the record */
...@@ -77,9 +74,6 @@ lock_rec_create( ...@@ -77,9 +74,6 @@ lock_rec_create(
btr_assert_not_corrupted(block, index); btr_assert_not_corrupted(block, index);
return lock_rec_create_low( return lock_rec_create_low(
c_lock, c_lock,
#ifdef WITH_WSREP
thr,
#endif
type_mode, block->page.id(), block->frame, heap_no, type_mode, block->page.id(), block->frame, heap_no,
index, trx, caller_owns_trx_mutex); index, trx, caller_owns_trx_mutex);
} }
This diff is collapsed.
...@@ -468,9 +468,6 @@ lock_prdt_add_to_queue( ...@@ -468,9 +468,6 @@ lock_prdt_add_to_queue(
ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx); ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx);
lock_t* lock = lock_rec_create(nullptr, lock_t* lock = lock_rec_create(nullptr,
#ifdef WITH_WSREP
nullptr,
#endif
type_mode, block, PRDT_HEAPNO, index, type_mode, block, PRDT_HEAPNO, index,
trx, caller_owns_trx_mutex); trx, caller_owns_trx_mutex);
...@@ -734,9 +731,6 @@ lock_prdt_lock( ...@@ -734,9 +731,6 @@ lock_prdt_lock(
if (lock == NULL) { if (lock == NULL) {
lock = lock_rec_create( lock = lock_rec_create(
NULL, NULL,
#ifdef WITH_WSREP
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
prdt_mode, block, PRDT_HEAPNO, prdt_mode, block, PRDT_HEAPNO,
index, trx, FALSE); index, trx, FALSE);
...@@ -835,9 +829,6 @@ lock_place_prdt_page_lock( ...@@ -835,9 +829,6 @@ lock_place_prdt_page_lock(
if (lock == NULL) { if (lock == NULL) {
lock = lock_rec_create_low( lock = lock_rec_create_low(
NULL, NULL,
#ifdef WITH_WSREP
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
mode, page_id, NULL, PRDT_HEAPNO, mode, page_id, NULL, PRDT_HEAPNO,
index, trx, FALSE); index, trx, FALSE);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment