Commit 8677c14e authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24391 heap-use-after-free in fil_space_t::flush_low()

We observed a race condition that involved two threads
executing fil_flush_file_spaces() and one thread
executing fil_delete_tablespace(). After one of the
fil_flush_file_spaces() observed that
space.needs_flush_not_stopping() is set and was
releasing the fil_system.mutex, the other fil_flush_file_spaces()
would complete the execution of fil_space_t::flush_low() on
the same tablespace. Then, fil_delete_tablespace() would
destroy the object, because the value of fil_space_t::n_pending
did not prevent that. Finally, the fil_flush_file_spaces() would
resume execution and invoke fil_space_t::flush_low() on the freed
object.

This race condition was introduced in
commit 118e258a of MDEV-23855.

fil_space_t::flush(): Add a template parameter that indicates
whether the caller is holding a reference to prevent the
tablespace from being freed.

buf_dblwr_t::flush_buffered_writes_completed(),
row_quiesce_table_start(): Acquire a reference for the duration
of the fil_space_t::flush_low() operation. It should be impossible
for the object to be freed in these code paths, but we want to
satisfy the debug assertions.

fil_space_t::flush_low(): Do not increment or decrement the
reference count, but instead assert that the caller is holding
a reference.

fil_space_extend_must_retry(), fil_flush_file_spaces():
Acquire a reference before releasing fil_system.mutex.
This is what will fix the race condition.
parent 0c7c4492
......@@ -552,7 +552,7 @@ void CorruptedPages::zero_out_free_pages()
*page_it, space_name.c_str());
}
}
space->flush();
space->flush<true>();
space->release();
}
m_spaces.swap(non_free_pages);
......
......@@ -648,7 +648,7 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
mysql_mutex_unlock(&mutex);
/* Now flush the doublewrite buffer data to disk */
fil_system.sys_space->flush();
fil_system.sys_space->flush<false>();
/* The writes have been flushed to disk now and in recovery we will
find them in the doublewrite buffer blocks. Next, write the data pages. */
......
......@@ -496,17 +496,15 @@ void fil_space_t::flush_low()
{
ut_ad(!mutex_own(&fil_system.mutex));
uint32_t n= 0;
while (!n_pending.compare_exchange_strong(n, (n + 1) | NEEDS_FSYNC,
uint32_t n= 1;
while (!n_pending.compare_exchange_strong(n, n | NEEDS_FSYNC,
std::memory_order_acquire,
std::memory_order_relaxed))
{
ut_ad(n & PENDING);
if (n & STOPPING)
return;
if (!(n & NEEDS_FSYNC))
continue;
if (acquire_low() & STOPPING)
return;
if (n & NEEDS_FSYNC)
break;
}
......@@ -535,7 +533,6 @@ void fil_space_t::flush_low()
}
clear_flush();
release();
fil_n_pending_tablespace_flushes--;
}
......@@ -632,8 +629,10 @@ fil_space_extend_must_retry(
case TRX_SYS_SPACE:
srv_sys_space.set_last_file_size(pages_in_MiB);
do_flush:
space->reacquire();
mutex_exit(&fil_system.mutex);
space->flush_low();
space->release();
mutex_enter(&fil_system.mutex);
break;
default:
......@@ -3516,8 +3515,10 @@ void fil_flush_file_spaces()
{
if (space.needs_flush_not_stopping())
{
space.reacquire();
mutex_exit(&fil_system.mutex);
space.flush_low();
space.release();
goto rescan;
}
}
......
......@@ -972,7 +972,7 @@ struct fil_space_t final
fil_io_t io(const IORequest &type, os_offset_t offset, size_t len,
void *buf, buf_page_t *bpage= nullptr);
/** Flush pending writes from the file system cache to the file. */
inline void flush();
template<bool have_reference> inline void flush();
/** Flush pending writes from the file system cache to the file. */
void flush_low();
......@@ -1452,18 +1452,23 @@ inline void fil_space_t::set_stopping(bool stopping)
}
/** Flush pending writes from the file system cache to the file. */
inline void fil_space_t::flush()
template<bool have_reference> inline void fil_space_t::flush()
{
ut_ad(!mutex_own(&fil_system.mutex));
ut_ad(!have_reference || (pending() & PENDING));
ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT);
if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
{
ut_ad(!is_in_unflushed_spaces);
ut_ad(!needs_flush());
}
else
else if (have_reference)
flush_low();
else if (!(acquire_low() & STOPPING))
{
flush_low();
release();
}
}
/** @return the size in pages (0 if unreadable) */
......
......@@ -545,7 +545,7 @@ row_quiesce_table_start(
if (!trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
table->space->flush();
table->space->flush<false>();
if (row_quiesce_write_cfg(table, trx->mysql_thd)
!= DB_SUCCESS) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment