Commit a7dd7c89 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-23651 InnoDB: Failing assertion: !space->referenced()

commit de942c9f (MDEV-15983)
introduced a race condition that we inadequately fixed in
commit 93b69825 (MDEV-16169).

Because fil_space_t::release() or fil_space_t::acquire() are
not protected by fil_system.mutex like their predecessors,
it is possible that stop_new_ops was set between the time
a thread checked fil_space_t::is_stopping() and invoked
fil_space_t::acquire().

In an execution trace, this happened in fil_system_t::keyrotate_next(),
causing an assertion failure in fil_delete_tablespace()
in the other thread that seeked to stop new operations.

We fix this bug by merging the flag fil_space_t::stop_new_ops
and the reference count fil_space_t::n_pending_ops into a
single word that is only being accessed by atomic memory operations.

fil_space_t::set_stopping(): Accessor for changing the state of
the former stop_new_ops flag.

fil_space_t::acquire(): Return whether the acquisition succeeded.
It would fail between set_stopping(true) and set_stopping(false).
parent 33ae1616
......@@ -1418,12 +1418,15 @@ inline fil_space_t *fil_system_t::keyrotate_next(fil_space_t *space,
}
}
if (it == end)
return NULL;
while (it != end)
{
space= &*it;
if (space->acquire())
return space;
while (++it != end && (!UT_LIST_GET_LEN(it->chain) || it->is_stopping()));
}
space= &*it;
space->acquire();
return space;
return NULL;
}
/** Return the next tablespace.
......@@ -1445,12 +1448,14 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space= UT_LIST_GET_FIRST(fil_system.space_list);
/* We can trust that space is not NULL because at least the
system tablespace is always present and loaded first. */
space->acquire();
if (!space->acquire())
goto next;
}
else
{
/* Move on to the next fil_space_t */
space->release();
next:
space= UT_LIST_GET_NEXT(space_list, space);
/* Skip abnormal tablespaces or those that are being created by
......@@ -1460,8 +1465,8 @@ static fil_space_t *fil_space_next(fil_space_t *space, bool recheck,
space->is_stopping() || space->purpose != FIL_TYPE_TABLESPACE))
space= UT_LIST_GET_NEXT(space_list, space);
if (space)
space->acquire();
if (space && !space->acquire())
goto next;
}
mutex_exit(&fil_system.mutex);
......@@ -2330,31 +2335,27 @@ static void fil_crypt_rotation_list_fill()
space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose != FIL_TYPE_TABLESPACE
|| space->is_in_rotation_list
|| space->is_stopping()
|| UT_LIST_GET_LEN(space->chain) == 0) {
|| UT_LIST_GET_LEN(space->chain) == 0
|| !space->acquire()) {
continue;
}
/* Ensure that crypt_data has been initialized. */
if (!space->size) {
/* Protect the tablespace while we may
release fil_system.mutex. */
ut_d(space->acquire());
ut_d(const fil_space_t* s=)
fil_system.read_page0(space->id);
ut_ad(!s || s == space);
ut_d(space->release());
if (!space->size) {
/* Page 0 was not loaded.
Skip this tablespace. */
continue;
goto next;
}
}
/* Skip ENCRYPTION!=DEFAULT tablespaces. */
if (space->crypt_data
&& !space->crypt_data->is_default_encryption()) {
continue;
goto next;
}
if (srv_encrypt_tables) {
......@@ -2362,19 +2363,21 @@ static void fil_crypt_rotation_list_fill()
innodb_encrypt_tables!=OFF */
if (space->crypt_data
&& space->crypt_data->min_key_version) {
continue;
goto next;
}
} else {
/* Skip unencrypted tablespaces if
innodb_encrypt_tables=OFF */
if (!space->crypt_data
|| !space->crypt_data->min_key_version) {
continue;
goto next;
}
}
fil_system.rotation_list.push_back(*space);
space->is_in_rotation_list = true;
next:
space->release();
}
}
......
......@@ -763,8 +763,7 @@ fil_try_to_close_file_in_LRU(
static void fil_flush_low(fil_space_t* space, bool metadata = false)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_ad(space);
ut_ad(!space->stop_new_ops);
ut_ad(!space->is_stopping());
if (fil_buffering_disabled(space)) {
......@@ -1934,10 +1933,8 @@ fil_space_acquire_low(ulint id, bool silent)
ib::warn() << "Trying to access missing"
" tablespace " << id;
}
} else if (space->is_stopping()) {
} else if (!space->acquire()) {
space = NULL;
} else {
space->acquire();
}
mutex_exit(&fil_system.mutex);
......@@ -2254,14 +2251,14 @@ fil_check_pending_ops(const fil_space_t* space, ulint count)
{
ut_ad(mutex_own(&fil_system.mutex));
if (space == NULL) {
if (!space) {
return 0;
}
if (ulint n_pending_ops = my_atomic_loadlint(&space->n_pending_ops)) {
if (uint32_t n_pending_ops = space->referenced()) {
/* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) {
/* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) {
ib::warn() << "Trying to close/delete/truncate"
" tablespace '" << space->name
<< "' but there are " << n_pending_ops
......@@ -2347,14 +2344,13 @@ fil_check_pending_operations(
fil_space_t* sp = fil_space_get_by_id(id);
if (sp) {
sp->stop_new_ops = true;
if (sp->crypt_data) {
sp->acquire();
if (sp->crypt_data && sp->acquire()) {
mutex_exit(&fil_system.mutex);
fil_space_crypt_close_tablespace(sp);
mutex_enter(&fil_system.mutex);
sp->release();
}
sp->set_stopping(true);
}
/* Check for pending operations. */
......@@ -2875,7 +2871,7 @@ fil_rename_tablespace(
multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
space->n_pending_ops++;
ut_a(space->acquire());
mutex_exit(&fil_system.mutex);
......@@ -2897,8 +2893,7 @@ fil_rename_tablespace(
/* log_sys.mutex is above fil_system.mutex in the latching order */
ut_ad(log_mutex_own());
mutex_enter(&fil_system.mutex);
ut_ad(space->n_pending_ops);
space->n_pending_ops--;
space->release();
ut_ad(space->name == old_space_name);
ut_ad(node->name == old_file_name);
bool success;
......@@ -4204,7 +4199,7 @@ fil_io(
if (space == NULL
|| (req_type.is_read()
&& !sync
&& space->stop_new_ops
&& space->is_stopping()
&& !space->is_being_truncated)) {
mutex_exit(&fil_system.mutex);
......@@ -4844,7 +4839,7 @@ fil_space_validate_for_mtr_commit(
fil_space_acquire() before mtr_start() and
fil_space_t::release() after mtr_commit(). This is why
n_pending_ops should not be zero if stop_new_ops is set. */
ut_ad(!space->stop_new_ops
ut_ad(!space->is_stopping()
|| space->is_being_truncated /* fil_truncate_prepare() */
|| space->referenced());
}
......@@ -5070,7 +5065,7 @@ truncate_t::truncate(
err = DB_ERROR;
}
space->stop_new_ops = false;
space->set_stopping(false);
/* If we opened the file in this function, close it. */
if (!already_open) {
......@@ -5171,26 +5166,6 @@ fil_space_get_block_size(const fil_space_t* space, unsigned offset)
return block_size;
}
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
fil_space_t*
fil_space_found_by_id(
/*==================*/
ulint id) /*!< in: space id */
{
fil_space_t* space = NULL;
mutex_enter(&fil_system.mutex);
space = fil_space_get_by_id(id);
/* Not found if space is being deleted */
if (space && space->stop_new_ops) {
space = NULL;
}
mutex_exit(&fil_system.mutex);
return space;
}
/**
Get should we punch hole to tablespace.
@param[in] node File node
......
......@@ -8579,8 +8579,7 @@ i_s_tablespaces_encryption_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) {
space->acquire();
&& space->acquire()) {
mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_encryption(
thd, space, tables->table)) {
......@@ -8842,8 +8841,7 @@ i_s_tablespaces_scrubbing_fill_table(
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) {
if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_stopping()) {
space->acquire();
&& space->acquire()) {
mutex_exit(&fil_system.mutex);
if (int err = i_s_dict_fill_tablespaces_scrubbing(
thd, space, tables->table)) {
......
......@@ -93,18 +93,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Log sequence number of the latest MLOG_INDEX_LOAD record
that was found while parsing the redo log */
lsn_t enable_lsn;
bool stop_new_ops;
/*!< we set this true when we start
deleting a single-table tablespace.
When this is set following new ops
are not allowed:
* read IO request
* ibuf merge
* file flush
Note that we can still possibly have
new write operations because we don't
check this flag when doing flush
batches. */
/** whether undo tablespace truncation is in progress */
bool is_being_truncated;
#ifdef UNIV_DEBUG
......@@ -142,14 +130,24 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ulint n_pending_flushes; /*!< this is positive when flushing
the tablespace to disk; dropping of the
tablespace is forbidden if this is positive */
/** Number of pending buffer pool operations accessing the tablespace
without holding a table lock or dict_operation_lock S-latch
that would prevent the table (and tablespace) from being
dropped. An example is change buffer merge.
The tablespace cannot be dropped while this is nonzero,
or while fil_node_t::n_pending is nonzero.
Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
ulint n_pending_ops;
private:
/** Number of pending buffer pool operations accessing the
tablespace without holding a table lock or dict_operation_lock
S-latch that would prevent the table (and tablespace) from being
dropped. An example is change buffer merge.
The tablespace cannot be dropped while this is nonzero, or while
fil_node_t::n_pending is nonzero.
The most significant bit contains the STOP_NEW_OPS flag.
Protected by my_atomic. */
uint32_t n_pending_ops;
/** Flag in n_pending_ops that indicates that the tablespace is being
deleted, and no further operations should be performed */
static const uint32_t STOP_NEW_OPS= 1U << 31;
public:
/** Number of pending block read or write operations
(when a write is imminent or a read has recently completed).
The tablespace object cannot be freed while this is nonzero,
......@@ -183,9 +181,6 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
/** @return whether the tablespace is about to be dropped */
bool is_stopping() const { return stop_new_ops; }
/** Clamp a page number for batched I/O, such as read-ahead.
@param offset page number limit
@return offset clamped to the tablespace size */
......@@ -267,39 +262,78 @@ struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
/** Close each file. Only invoked on fil_system.temp_space. */
void close();
/** Acquire a tablespace reference. */
void acquire() { my_atomic_addlint(&n_pending_ops, 1); }
/** Release a tablespace reference.
@return whether this was the last reference */
bool release()
{
ulint n = my_atomic_addlint(&n_pending_ops, ulint(-1));
ut_ad(n);
return n == 1;
}
/** @return whether references are being held */
bool referenced() { return my_atomic_loadlint(&n_pending_ops); }
/** @return whether references are being held */
bool referenced() const
{
return const_cast<fil_space_t*>(this)->referenced();
}
/** @return whether the tablespace is about to be dropped or is referenced */
uint32_t is_stopping_or_referenced()
{
return my_atomic_load32(&n_pending_ops);
}
/** Acquire a tablespace reference for I/O. */
void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); }
/** Release a tablespace reference for I/O. */
void release_for_io()
{
ut_ad(pending_io());
my_atomic_addlint(&n_pending_ios, ulint(-1));
}
/** @return whether I/O is pending */
bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
/** @return whether I/O is pending */
bool pending_io() const
{
return const_cast<fil_space_t*>(this)->pending_io();
}
/** @return whether the tablespace is about to be dropped or is referenced */
uint32_t is_stopping_or_referenced() const
{
return const_cast<fil_space_t*>(this)->is_stopping_or_referenced();
}
/** @return whether the tablespace is about to be dropped */
bool is_stopping() const
{
return is_stopping_or_referenced() & STOP_NEW_OPS;
}
/** @return number of references being held */
uint32_t referenced() const
{
return is_stopping_or_referenced() & ~STOP_NEW_OPS;
}
/** Note that operations on the tablespace must stop or can resume */
void set_stopping(bool stopping)
{
/* Note: starting with 10.4 this should be std::atomic::fetch_xor() */
uint32_t n= stopping ? 0 : STOP_NEW_OPS;
while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n,
n ^ STOP_NEW_OPS,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED))
ut_ad(!(n & STOP_NEW_OPS) == stopping);
}
MY_ATTRIBUTE((warn_unused_result))
/** @return whether a tablespace reference was successfully acquired */
bool acquire()
{
uint32_t n= 0;
while (!my_atomic_cas32_strong_explicit(&n_pending_ops, &n, n + 1,
MY_MEMORY_ORDER_ACQUIRE,
MY_MEMORY_ORDER_RELAXED))
if (UNIV_UNLIKELY(n & STOP_NEW_OPS))
return false;
return true;
}
/** Release a tablespace reference.
@return whether this was the last reference */
bool release()
{
uint32_t n= my_atomic_add32(&n_pending_ops, uint32_t(-1));
ut_ad(n & ~STOP_NEW_OPS);
return (n & ~STOP_NEW_OPS) == 1;
}
/** Acquire a tablespace reference for I/O. */
void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); }
/** Release a tablespace reference for I/O. */
void release_for_io()
{
ut_d(ulint n=) my_atomic_addlint(&n_pending_ios, ulint(-1));
ut_ad(n);
}
/** @return whether I/O is pending */
bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
/** @return whether I/O is pending */
bool pending_io() const
{
return const_cast<fil_space_t*>(this)->pending_io();
}
};
/** Value of fil_space_t::magic_n */
......
......@@ -1030,13 +1030,13 @@ trx_purge_initiate_truncate(
/* This is only executed by the srv_purge_coordinator_thread. */
export_vars.innodb_undo_truncations++;
/* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */
/* In MDEV-8319 (10.5) we will PUNCH_HOLE the garbage
(with write-ahead logging). */
mutex_enter(&fil_system.mutex);
ut_ad(space->stop_new_ops);
ut_ad(space->is_being_truncated);
space->stop_new_ops = false;
space->is_being_truncated = false;
space->set_stopping(false);
mutex_exit(&fil_system.mutex);
if (purge_sys.rseg != NULL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment