Commit 2c9e75cc authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-15326 after-merge fixes

trx_t::is_recovered: Revert most of the changes that were made by the
merge of MDEV-15326 from 10.2. The trx_sys.rw_trx_hash and the recovery
of transactions at startup is quite different in 10.3.

trx_free_at_shutdown(): Avoid excessive mutex protection. Reading fields
that can only be modified by the current thread (owning the transaction)
can be done outside mutex.

trx_t::commit_state(): Restore a tighter assertion.

trx_rollback_recovered(): Clarify why there is no potential race condition
with other transactions.

lock_trx_release_locks(): Merge with trx_t::release_locks(),
and avoid holding lock_sys.mutex unnecessarily long.

rw_trx_hash_t::find(): Remove redundant code, and avoid starving the
committer by checking trx_t::state before trx_t::reference().
parent 537f8594
...@@ -485,7 +485,7 @@ lock_rec_unlock( ...@@ -485,7 +485,7 @@ lock_rec_unlock(
/** Release the explicit locks of a committing transaction, /** Release the explicit locks of a committing transaction,
and release possible other transactions waiting because of these locks. */ and release possible other transactions waiting because of these locks. */
void lock_trx_release_locks(trx_t* trx); void lock_release(trx_t* trx);
/*********************************************************************//** /*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or Calculates the fold value of a page file address: used in inserting or
......
...@@ -628,12 +628,7 @@ class rw_trx_hash_t ...@@ -628,12 +628,7 @@ class rw_trx_hash_t
The caller should already have handled trx_id==0 specially. The caller should already have handled trx_id==0 specially.
*/ */
ut_ad(trx_id); ut_ad(trx_id);
if (caller_trx && caller_trx->id == trx_id) ut_ad(!caller_trx || caller_trx->id != trx_id || !do_ref_count);
{
if (do_ref_count)
caller_trx->reference();
return caller_trx;
}
trx_t *trx= 0; trx_t *trx= 0;
LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash); LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
...@@ -648,9 +643,25 @@ class rw_trx_hash_t ...@@ -648,9 +643,25 @@ class rw_trx_hash_t
lf_hash_search_unpin(pins); lf_hash_search_unpin(pins);
if ((trx= element->trx)) { if ((trx= element->trx)) {
DBUG_ASSERT(trx_id == trx->id); DBUG_ASSERT(trx_id == trx->id);
ut_d(validate_element(trx));
if (do_ref_count) if (do_ref_count)
{
/*
We have an early state check here to avoid committer
starvation in a wait loop for transaction references,
when there's a stream of trx_sys.find() calls from other
threads. The trx->state may change to COMMITTED after
trx->mutex is released, and it will have to be rechecked
by the caller after reacquiring the mutex.
*/
trx_mutex_enter(trx);
const trx_state_t state= trx->state;
trx_mutex_exit(trx);
if (state == TRX_STATE_COMMITTED_IN_MEMORY)
trx= NULL;
else
trx->reference(); trx->reference();
ut_d(validate_element(trx)); }
} }
mutex_exit(&element->mutex); mutex_exit(&element->mutex);
} }
......
...@@ -699,10 +699,9 @@ Normally, only the thread that is currently associated with a running ...@@ -699,10 +699,9 @@ Normally, only the thread that is currently associated with a running
transaction may access (read and modify) the trx object, and it may do transaction may access (read and modify) the trx object, and it may do
so without holding any mutex. The following are exceptions to this: so without holding any mutex. The following are exceptions to this:
* trx_rollback_resurrected() may access resurrected (connectionless) * trx_rollback_recovered() may access resurrected (connectionless)
transactions while the system is already processing new user transactions (state == TRX_STATE_ACTIVE && is_recovered)
transactions. The trx_sys.mutex and trx->is_recovered prevent while the system is already processing new user transactions (!is_recovered).
a race condition between it and trx_commit().
* trx_print_low() may access transactions not associated with the current * trx_print_low() may access transactions not associated with the current
thread. The caller must be holding lock_sys.mutex. thread. The caller must be holding lock_sys.mutex.
...@@ -839,10 +838,6 @@ struct trx_t { ...@@ -839,10 +838,6 @@ struct trx_t {
Transitions to COMMITTED are protected by trx_t::mutex. */ Transitions to COMMITTED are protected by trx_t::mutex. */
trx_state_t state; trx_state_t state;
/** whether this is a recovered transaction that should be
rolled back by trx_rollback_or_clean_recovered().
Protected by trx_t::mutex for transactions that are in trx_sys. */
bool is_recovered;
ReadView read_view; /*!< consistent read view used in the ReadView read_view; /*!< consistent read view used in the
transaction, or NULL if not yet set */ transaction, or NULL if not yet set */
...@@ -852,6 +847,15 @@ struct trx_t { ...@@ -852,6 +847,15 @@ struct trx_t {
by trx_t::mutex). */ by trx_t::mutex). */
/* These fields are not protected by any mutex. */ /* These fields are not protected by any mutex. */
/** false=normal transaction, true=recovered (must be rolled back)
or disconnected transaction in XA PREPARE STATE.
This field is accessed by the thread that owns the transaction,
without holding any mutex.
There is only one foreign-thread access in trx_print_low()
and a possible race condition with trx_disconnect_prepared(). */
bool is_recovered;
const char* op_info; /*!< English text describing the const char* op_info; /*!< English text describing the
current operation, or an empty current operation, or an empty
string */ string */
......
...@@ -4274,23 +4274,17 @@ lock_check_dict_lock( ...@@ -4274,23 +4274,17 @@ lock_check_dict_lock(
} }
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
/*********************************************************************//** /** Release the explicit locks of a committing transaction,
Releases transaction locks, and releases possible other transactions waiting and release possible other transactions waiting because of these locks. */
because of these locks. */ void lock_release(trx_t* trx)
static
void
lock_release(
/*=========*/
trx_t* trx) /*!< in/out: transaction */
{ {
lock_t* lock;
ulint count = 0; ulint count = 0;
trx_id_t max_trx_id = trx_sys.get_max_trx_id(); trx_id_t max_trx_id = trx_sys.get_max_trx_id();
ut_ad(lock_mutex_own()); lock_mutex_enter();
ut_ad(!trx_mutex_own(trx)); ut_ad(!trx_mutex_own(trx));
for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); for (lock_t* lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
lock != NULL; lock != NULL;
lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) { lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
...@@ -4330,6 +4324,8 @@ lock_release( ...@@ -4330,6 +4324,8 @@ lock_release(
++count; ++count;
} }
lock_mutex_exit();
} }
/* True if a lock mode is S or X */ /* True if a lock mode is S or X */
...@@ -4784,7 +4780,7 @@ lock_table_queue_validate( ...@@ -4784,7 +4780,7 @@ lock_table_queue_validate(
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
/* lock->trx->state cannot change from or to NOT_STARTED /* lock->trx->state cannot change from or to NOT_STARTED
while we are holding the trx_sys.mutex. It may change while we are holding the lock_sys.mutex. It may change
from ACTIVE or PREPARED to PREPARED or COMMITTED. */ from ACTIVE or PREPARED to PREPARED or COMMITTED. */
trx_mutex_enter(lock->trx); trx_mutex_enter(lock->trx);
check_trx_state(lock->trx); check_trx_state(lock->trx);
...@@ -5390,13 +5386,13 @@ lock_rec_convert_impl_to_expl_for_trx( ...@@ -5390,13 +5386,13 @@ lock_rec_convert_impl_to_expl_for_trx(
trx_t* trx, /*!< in/out: active transaction */ trx_t* trx, /*!< in/out: active transaction */
ulint heap_no)/*!< in: rec heap number to lock */ ulint heap_no)/*!< in: rec heap number to lock */
{ {
ut_ad(trx->is_referenced());
ut_ad(page_rec_is_leaf(rec)); ut_ad(page_rec_is_leaf(rec));
ut_ad(!rec_is_metadata(rec, index)); ut_ad(!rec_is_metadata(rec, index));
DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx"); DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
lock_mutex_enter(); lock_mutex_enter();
trx_mutex_enter(trx); trx_mutex_enter(trx);
ut_ad(trx->is_referenced());
ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED)); ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
...@@ -6235,27 +6231,6 @@ lock_unlock_table_autoinc( ...@@ -6235,27 +6231,6 @@ lock_unlock_table_autoinc(
} }
} }
/** Release the explicit locks of a committing transaction,
and release possible other transactions waiting because of these locks. */
void lock_trx_release_locks(trx_t* trx)
{
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks));
lock_mutex_enter();
lock_release(trx);
trx->lock.n_rec_locks = 0;
/* We don't remove the locks one by one from the vector for
efficiency reasons. We simply reset it because we would have
released all the locks anyway. */
trx->lock.table_locks.clear();
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
ut_ad(ib_vector_is_empty(trx->autoinc_locks));
lock_mutex_exit();
mem_heap_empty(trx->lock.lock_heap);
}
static inline dberr_t lock_trx_handle_wait_low(trx_t* trx) static inline dberr_t lock_trx_handle_wait_low(trx_t* trx)
{ {
ut_ad(lock_mutex_own()); ut_ad(lock_mutex_own());
......
...@@ -252,7 +252,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) ...@@ -252,7 +252,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
/* After the purge thread has been given permission to exit, /* After the purge thread has been given permission to exit,
we may roll back transactions (trx->undo_no==0) we may roll back transactions (trx->undo_no==0)
in THD::cleanup() invoked from unlink_thd() in fast shutdown, in THD::cleanup() invoked from unlink_thd() in fast shutdown,
or in trx_rollback_resurrected() in slow shutdown. or in trx_rollback_recovered() in slow shutdown.
Before any transaction-generating background threads or the Before any transaction-generating background threads or the
purge have been started, recv_recovery_rollback_active() can purge have been started, recv_recovery_rollback_active() can
......
...@@ -764,12 +764,8 @@ static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element, ...@@ -764,12 +764,8 @@ static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element,
mutex_enter(&element->mutex); mutex_enter(&element->mutex);
if (trx_t *trx= element->trx) if (trx_t *trx= element->trx)
{ {
/* The trx->is_recovered flag and trx->state are set
atomically under the protection of the trx->mutex in
trx_t::commit_state(). We do not want to accidentally clean up
a non-recovered transaction here. */
mutex_enter(&trx->mutex); mutex_enter(&trx->mutex);
if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)) if (trx_state_eq(trx, TRX_STATE_ACTIVE) && trx->is_recovered)
trx_list->push_back(trx); trx_list->push_back(trx);
mutex_exit(&trx->mutex); mutex_exit(&trx->mutex);
} }
...@@ -815,7 +811,8 @@ void trx_rollback_recovered(bool all) ...@@ -815,7 +811,8 @@ void trx_rollback_recovered(bool all)
ut_ad(trx); ut_ad(trx);
ut_d(trx_mutex_enter(trx)); ut_d(trx_mutex_enter(trx));
ut_ad(trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE)); ut_ad(trx->is_recovered);
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
ut_d(trx_mutex_exit(trx)); ut_d(trx_mutex_exit(trx));
if (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown) if (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown)
...@@ -831,6 +828,22 @@ void trx_rollback_recovered(bool all) ...@@ -831,6 +828,22 @@ void trx_rollback_recovered(bool all)
ut_ad(!srv_undo_sources); ut_ad(!srv_undo_sources);
ut_ad(srv_fast_shutdown); ut_ad(srv_fast_shutdown);
discard: discard:
/* Note: before kill_server() invoked innobase_end() via
unireg_end(), it invoked close_connections(), which should initiate
the rollback of any user transactions via THD::cleanup() in the
connection threads, and wait for all THD::cleanup() to complete.
So, no active user transactions should exist at this point.
srv_undo_sources=false was cleared early in innobase_end().
Generally, the server guarantees that all connections using
InnoDB must be disconnected by the time we are reaching this code,
be it during shutdown or UNINSTALL PLUGIN.
Because there is no possible race condition with any
concurrent user transaction, we do not have to invoke
trx->commit_state() or wait for !trx->is_referenced()
before trx_sys.deregister_rw(trx). */
trx_sys.deregister_rw(trx); trx_sys.deregister_rw(trx);
trx_free_at_shutdown(trx); trx_free_at_shutdown(trx);
} }
......
...@@ -463,6 +463,9 @@ void trx_free(trx_t*& trx) ...@@ -463,6 +463,9 @@ void trx_free(trx_t*& trx)
/** Transition to committed state, to release implicit locks. */ /** Transition to committed state, to release implicit locks. */
inline void trx_t::commit_state() inline void trx_t::commit_state()
{ {
ut_ad(state == TRX_STATE_PREPARED
|| state == TRX_STATE_PREPARED_RECOVERED
|| state == TRX_STATE_ACTIVE);
/* This makes the transaction committed in memory and makes its /* This makes the transaction committed in memory and makes its
changes to data visible to other transactions. NOTE that there is a changes to data visible to other transactions. NOTE that there is a
small discrepancy from the strict formal visibility rules here: a small discrepancy from the strict formal visibility rules here: a
...@@ -475,23 +478,9 @@ inline void trx_t::commit_state() ...@@ -475,23 +478,9 @@ inline void trx_t::commit_state()
makes modifications to the database, will get an lsn larger than the makes modifications to the database, will get an lsn larger than the
committing transaction T. In the case where the log flush fails, and committing transaction T. In the case where the log flush fails, and
T never gets committed, also T2 will never get committed. */ T never gets committed, also T2 will never get committed. */
ut_ad(trx_mutex_own(this)); trx_mutex_enter(this);
ut_ad(state != TRX_STATE_NOT_STARTED);
ut_ad(state != TRX_STATE_COMMITTED_IN_MEMORY
|| (is_recovered && !UT_LIST_GET_LEN(lock.trx_locks)));
state= TRX_STATE_COMMITTED_IN_MEMORY; state= TRX_STATE_COMMITTED_IN_MEMORY;
trx_mutex_exit(this);
/* If the background thread trx_rollback_or_clean_recovered()
is still active then there is a chance that the rollback
thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
to clean it up calling trx_cleanup_at_db_startup(). This can
happen in the case we are committing a trx here that is left
in PREPARED state during the crash. Note that commit of the
rollback of a PREPARED trx happens in the recovery thread
while the rollback of other transactions happen in the
background thread. To avoid this race we unconditionally unset
the is_recovered flag. */
is_recovered= false;
ut_ad(id || !is_referenced()); ut_ad(id || !is_referenced());
} }
...@@ -499,18 +488,24 @@ inline void trx_t::commit_state() ...@@ -499,18 +488,24 @@ inline void trx_t::commit_state()
inline void trx_t::release_locks() inline void trx_t::release_locks()
{ {
DBUG_ASSERT(state == TRX_STATE_COMMITTED_IN_MEMORY); DBUG_ASSERT(state == TRX_STATE_COMMITTED_IN_MEMORY);
DBUG_ASSERT(!is_referenced());
if (UT_LIST_GET_LEN(lock.trx_locks)) if (UT_LIST_GET_LEN(lock.trx_locks))
lock_trx_release_locks(this); {
else lock_release(this);
lock.table_locks.clear(); // Work around a bug lock.n_rec_locks = 0;
ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0);
ut_ad(ib_vector_is_empty(autoinc_locks));
mem_heap_empty(lock.lock_heap);
}
lock.table_locks.clear(); /* outside "if" to work around MDEV-20483 */
} }
/** At shutdown, frees a transaction object. */ /** At shutdown, frees a transaction object. */
void void
trx_free_at_shutdown(trx_t *trx) trx_free_at_shutdown(trx_t *trx)
{ {
trx_mutex_enter(trx);
ut_ad(trx->is_recovered); ut_ad(trx->is_recovered);
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
|| trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED) || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED)
...@@ -525,7 +520,6 @@ trx_free_at_shutdown(trx_t *trx) ...@@ -525,7 +520,6 @@ trx_free_at_shutdown(trx_t *trx)
ut_a(trx->magic_n == TRX_MAGIC_N); ut_a(trx->magic_n == TRX_MAGIC_N);
trx->commit_state(); trx->commit_state();
trx_mutex_exit(trx);
trx->release_locks(); trx->release_locks();
trx_undo_free_at_shutdown(trx); trx_undo_free_at_shutdown(trx);
...@@ -1369,9 +1363,7 @@ trx_commit_in_memory( ...@@ -1369,9 +1363,7 @@ trx_commit_in_memory(
DBUG_LOG("trx", "Autocommit in memory: " << trx); DBUG_LOG("trx", "Autocommit in memory: " << trx);
trx->state = TRX_STATE_NOT_STARTED; trx->state = TRX_STATE_NOT_STARTED;
} else { } else {
trx_mutex_enter(trx);
trx->commit_state(); trx->commit_state();
trx_mutex_exit(trx);
if (trx->id) { if (trx->id) {
trx_sys.deregister_rw(trx); trx_sys.deregister_rw(trx);
...@@ -1397,6 +1389,7 @@ trx_commit_in_memory( ...@@ -1397,6 +1389,7 @@ trx_commit_in_memory(
} else { } else {
trx_update_mod_tables_timestamp(trx); trx_update_mod_tables_timestamp(trx);
MONITOR_INC(MONITOR_TRX_RW_COMMIT); MONITOR_INC(MONITOR_TRX_RW_COMMIT);
trx->is_recovered = false;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment