Commit 179c2833 authored by Jan Lindström's avatar Jan Lindström

Merge branch 10.4 into 10.5

parents 981a6b70 a44d896f
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000));
INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200));
INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200));
SET SESSION wsrep_sync_wait=0;
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1a;
SET SESSION wsrep_sync_wait=0;
connection node_1;
begin;
select f1,f2 from t1;
f1 f2
1 0
3 3
connection node_2;
UPDATE t1 SET f2=2 WHERE f1=3;
connection node_1a;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
connection node_1;
UPDATE t1 SET f2=1 WHERE f1=3;
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
COMMIT;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync';
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
select f1,f2 from t1;
f1 f2
1 0
3 2
DROP TABLE t1;
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1));
INSERT INTO t1 VALUES (1, 'a');
INSERT INTO t1 VALUES (2, 'a');
connection node_1;
SET AUTOCOMMIT=ON;
START TRANSACTION;
UPDATE t1 SET f2 = 'b' WHERE f1 = 1;
SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE;
f1 f2
2 a
SAVEPOINT my_sp;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET SESSION wsrep_sync_wait=0;
SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync';
connection node_2;
UPDATE t1 SET f2 = 'c' WHERE f1 = 2;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
COMMIT;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end';
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync';
connection node_1;
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
COUNT(*) = 1
1
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';
COUNT(*) = 1
1
wsrep_local_replays
1
connection node_2;
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
COUNT(*) = 1
1
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';
COUNT(*) = 1
1
DROP TABLE t1;
!include ../galera_2nodes.cnf
[mysqld]
log-bin
log-slave-updates
[mysqld.1]
log_bin
log_slave_updates
max-binlog-size=4096
expire-logs-days=1
[mysqld.2]
#
# This test is for reproducing the issue in:
# https://jira.mariadb.org/browse/MDEV-29512
#
# The hanging in MDEV-29512 happens when binlog purging is attempted, and there is
# one local BF aborted transaction waiting for commit monitor.
#
# The test will launch two node cluster and enable binlogging with expire log days,
# to force binlog purging to happen.
# A local transaction is executed so that will become BF abort victim, and has advanced
# to replication stage waiting for commit monitor for final cleanup (to mark position in innodb)
# after that, applier is released to complete the BF abort and due to binlog configuration,
# starting the binlog purging. This is where the hanging would occur, if code is buggy
#
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/galera_have_debug_sync.inc
#
# binlog size is limited to 4096 bytes, we will create enough events to
# cause binlog rotation
#
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000));
INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200));
INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200));
SET SESSION wsrep_sync_wait=0;
# set sync point for replication applier
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
# Control connection to manage sync points for appliers
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1a
SET SESSION wsrep_sync_wait=0;
# starting local transaction, only select so far,
# write will happen later and this will be ordered after the transaction in node_2
--connection node_1
begin;
select f1,f2 from t1;
# send from node 2 an UPDATE transaction, which will BF abort the transaction in node_1
--connection node_2
--let $wait_condition=select count(*)=2 from t1
--source include/wait_condition.inc
UPDATE t1 SET f2=2 WHERE f1=3;
--connection node_1a
# wait to see the UPDATE from node_2 in apply_cb sync point
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
--connection node_1
# now issuing conflicting update
UPDATE t1 SET f2=1 WHERE f1=3;
# Block the local commit, send final COMMIT and wait until it gets blocked
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_set_sync_point.inc
--send COMMIT
--connection node_1a
# wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# release the local transaction to continue with commit
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_signal_sync_point.inc
# and now release the applier, it should force local trx to abort
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
--connection node_1
--error ER_LOCK_DEADLOCK
--reap
# wait until applying is complete
--let $wait_condition = SELECT COUNT(*)=1 FROM t1 WHERE f2=2
--source include/wait_condition.inc
# final read to verify what we got
select f1,f2 from t1;
DROP TABLE t1;
#
# This test tests replaying a transaction with savepoint
#
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/galera_have_debug_sync.inc
--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 CHAR(1));
INSERT INTO t1 VALUES (1, 'a');
INSERT INTO t1 VALUES (2, 'a');
--connection node_1
SET AUTOCOMMIT=ON;
START TRANSACTION;
UPDATE t1 SET f2 = 'b' WHERE f1 = 1;
SELECT * FROM t1 WHERE f1 = 2 FOR UPDATE;
SAVEPOINT my_sp;
# Block the applier on node #1 and issue a conflicting update on node #2
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
SET SESSION wsrep_sync_wait=0;
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_set_sync_point.inc
--connection node_2
UPDATE t1 SET f2 = 'c' WHERE f1 = 2;
--connection node_1a
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# Block the commit, send the COMMIT and wait until it gets blocked
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_set_sync_point.inc
--connection node_1
--send COMMIT
--connection node_1a
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# Let the conflicting UPDATE proceed and wait until it hits abort_trx_end.
# The victim transaction still sits in commit_monitor_master_sync_point.
--let $galera_sync_point = abort_trx_end
--source include/galera_set_sync_point.inc
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc
--let $galera_sync_point = abort_trx_end commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
# Let the transactions proceed
--source include/galera_clear_sync_point.inc
--let $galera_sync_point = abort_trx_end
--source include/galera_signal_sync_point.inc
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_signal_sync_point.inc
# Commit succeeds
--connection node_1
--reap
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';
# wsrep_local_replays has increased by 1
--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
--disable_query_log
--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays;
--enable_query_log
--connection node_2
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'b';
SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c';
DROP TABLE t1;
/* Copyright 2018-2021 Codership Oy <info@codership.com>
/* Copyright 2018-2023 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -273,7 +273,9 @@ extern "C" my_bool wsrep_thd_skip_locking(const THD *thd)
extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right)
{
if (wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) {
if (wsrep_thd_is_BF(left, false) &&
wsrep_thd_is_BF(right, false) &&
wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) {
WSREP_DEBUG("BF conflict, order: %lld %lld\n",
(long long)wsrep_thd_trx_seqno(left),
(long long)wsrep_thd_trx_seqno(right));
......@@ -385,13 +387,20 @@ extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd)
extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd)
{
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter called");
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
if (!bf_thd)
{
victim_thd->wsrep_aborter= 0;
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter resetting wsrep_aborter");
return false;
}
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
{
return true;
}
victim_thd->wsrep_aborter = bf_thd->thread_id;
victim_thd->wsrep_aborter= bf_thd->thread_id;
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u",
victim_thd->wsrep_aborter);
return false;
}
......
......@@ -5414,8 +5414,7 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
(e.g. InnoDB does it by keeping lock_sys.mutex locked)
*/
if (WSREP_ON &&
wsrep_thd_is_BF(const_cast<THD *>(thd), false) &&
wsrep_thd_is_BF(const_cast<THD *>(other_thd), false))
wsrep_thd_order_before(thd, other_thd))
return 0;
#endif /* WITH_WSREP */
rgi= thd->rgi_slave;
......
/* Copyright 2018-2021 Codership Oy <info@codership.com>
/* Copyright 2018-2023 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -639,6 +639,9 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd)
transactional locks */
DBUG_ASSERT(!orig_thd->mdl_context.has_transactional_locks());
replayer_thd->system_thread_info.rpl_sql_info=
new rpl_sql_thread_info(replayer_thd->wsrep_rgi->rli->mi->rpl_filter);
/* Make a shadow copy of diagnostics area and reset */
m_da_shadow.status= orig_thd->get_stmt_da()->status();
if (m_da_shadow.status == Diagnostics_area::DA_OK)
......@@ -677,35 +680,35 @@ Wsrep_replayer_service::Wsrep_replayer_service(THD* replayer_thd, THD* orig_thd)
Wsrep_replayer_service::~Wsrep_replayer_service()
{
THD* replayer_thd= m_thd;
THD* orig_thd= m_orig_thd;
/* Switch execution context back to original. */
wsrep_after_apply(replayer_thd);
wsrep_after_command_ignore_result(replayer_thd);
wsrep_close(replayer_thd);
wsrep_reset_threadvars(replayer_thd);
wsrep_store_threadvars(orig_thd);
wsrep_after_apply(m_thd);
wsrep_after_command_ignore_result(m_thd);
wsrep_close(m_thd);
wsrep_reset_threadvars(m_thd);
wsrep_store_threadvars(m_orig_thd);
DBUG_ASSERT(!orig_thd->get_stmt_da()->is_sent());
DBUG_ASSERT(!orig_thd->get_stmt_da()->is_set());
DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_sent());
DBUG_ASSERT(!m_orig_thd->get_stmt_da()->is_set());
delete m_thd->system_thread_info.rpl_sql_info;
m_thd->system_thread_info.rpl_sql_info= nullptr;
if (m_replay_status == wsrep::provider::success)
{
DBUG_ASSERT(replayer_thd->wsrep_cs().current_error() == wsrep::e_success);
orig_thd->reset_kill_query();
my_ok(orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id);
DBUG_ASSERT(m_thd->wsrep_cs().current_error() == wsrep::e_success);
m_orig_thd->reset_kill_query();
my_ok(m_orig_thd, m_da_shadow.affected_rows, m_da_shadow.last_insert_id);
}
else if (m_replay_status == wsrep::provider::error_certification_failed)
{
wsrep_override_error(orig_thd, ER_LOCK_DEADLOCK);
wsrep_override_error(m_orig_thd, ER_LOCK_DEADLOCK);
}
else
{
DBUG_ASSERT(0);
WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s",
m_replay_status,
orig_thd->db.str, wsrep_thd_query(orig_thd));
m_orig_thd->db.str, wsrep_thd_query(m_orig_thd));
unireg_abort(1);
}
}
......
/* Copyright (C) 2013-2022 Codership Oy <info@codership.com>
/* Copyright (C) 2013-2023 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -308,11 +308,11 @@ void wsrep_fire_rollbacker(THD *thd)
}
int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
int wsrep_abort_thd(THD *bf_thd,
THD *victim_thd,
my_bool signal)
{
DBUG_ENTER("wsrep_abort_thd");
THD *victim_thd= (THD *) victim_thd_ptr;
THD *bf_thd= (THD *) bf_thd_ptr;
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
......@@ -323,16 +323,21 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
if ((WSREP(bf_thd) ||
((WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) &&
wsrep_thd_is_toi(bf_thd))) &&
victim_thd &&
!wsrep_thd_is_aborting(victim_thd))
{
WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
(long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu",
(long long)bf_thd->real_id, (long long)victim_thd->real_id);
ha_abort_transaction(bf_thd, victim_thd, signal);
}
else
{
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
WSREP_DEBUG("wsrep_abort_thd not effective: bf %llu victim %llu "
"wsrep %d wsrep_on %d RSU %d TOI %d aborting %d",
(long long)bf_thd->real_id, (long long)victim_thd->real_id,
WSREP_NNULL(bf_thd), WSREP_ON,
bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU,
wsrep_thd_is_toi(bf_thd),
wsrep_thd_is_aborting(victim_thd));
wsrep_thd_UNLOCK(victim_thd);
}
......
/* Copyright (C) 2013-2022 Codership Oy <info@codership.com>
/* Copyright (C) 2013-2023 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -88,7 +88,9 @@ bool wsrep_create_appliers(long threads, bool mutex_protected=false);
void wsrep_create_rollbacker();
bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd);
int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal);
int wsrep_abort_thd(THD *bf_thd,
THD *victim_thd,
my_bool signal) __attribute__((nonnull(1,2)));
/*
Helper methods to deal with thread local storage.
......
......@@ -18778,6 +18778,16 @@ wsrep_kill_victim(
lock_cancel_waiting_and_release(wait_lock);
}
}
else
{
wsrep_thd_LOCK(thd);
victim_trx->lock.was_chosen_as_wsrep_victim= false;
wsrep_thd_set_wsrep_aborter(NULL, thd);
wsrep_thd_UNLOCK(thd);
WSREP_DEBUG("wsrep_thd_bf_abort has failed, victim %lu will survive",
thd_get_thread_id(thd));
}
DBUG_VOID_RETURN;
}
......
......@@ -787,25 +787,34 @@ lock_rec_has_to_wait(
}
#ifdef WITH_WSREP
/* New lock request from a transaction is using unique key
scan and this transaction is a wsrep high priority transaction
(brute force). If conflicting transaction is also wsrep high
priority transaction we should avoid lock conflict because
ordering of these transactions is already decided and
conflicting transaction will be later replayed. Note
that thread holding conflicting lock can't be
committed or rolled back while we hold
lock_sys->mutex. */
if (trx->is_wsrep_UK_scan()
&& wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) {
return false;
}
/* New lock request from a transaction is using unique key
scan and this transaction is a wsrep high priority transaction
(brute force). If conflicting transaction is also wsrep high
priority transaction we should avoid lock conflict because
ordering of these transactions is already decided and
conflicting transaction will be later replayed. Note
that thread holding conflicting lock can't be
committed or rolled back while we hold
lock_sys->mutex. */
if (trx->is_wsrep_UK_scan()
&& wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) {
return false;
}
/* We very well can let bf to wait normally as other
BF will be replayed in case of conflict. For debug
builds we will do additional sanity checks to catch
unsupported bf wait if any. */
ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
/* If BF-BF conflict, we have to look at write set order */
if (trx->is_wsrep()
&& (type_mode & LOCK_MODE_MASK) == LOCK_X
&& (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X
&& wsrep_thd_order_before(trx->mysql_thd,
lock2->trx->mysql_thd)) {
return false;
}
/* We very well can let bf to wait normally as other
BF will be replayed in case of conflict. For debug
builds we will do additional sanity checks to catch
unsupported bf wait if any. */
ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
#endif /* WITH_WSREP */
return true;
......@@ -1987,6 +1996,14 @@ lock_rec_has_to_wait_in_queue(
if (heap_no < lock_rec_get_n_bits(lock)
&& (p[bit_offset] & bit_mask)
&& lock_has_to_wait(wait_lock, lock)) {
#ifdef WITH_WSREP
if (lock->trx->is_wsrep()
&& wsrep_thd_order_before(wait_lock->trx->mysql_thd,
lock->trx->mysql_thd)) {
/* don't wait for another BF lock */
continue;
}
#endif
return(lock);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment