Commit 8677c14e authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24391 heap-use-after-free in fil_space_t::flush_low()

We observed a race condition that involved two threads
executing fil_flush_file_spaces() and one thread
executing fil_delete_tablespace(). After one of the
fil_flush_file_spaces() observed that
space.needs_flush_not_stopping() is set and was
releasing the fil_system.mutex, the other fil_flush_file_spaces()
would complete the execution of fil_space_t::flush_low() on
the same tablespace. Then, fil_delete_tablespace() would
destroy the object, because the value of fil_space_t::n_pending
did not prevent that. Finally, the fil_flush_file_spaces() would
resume execution and invoke fil_space_t::flush_low() on the freed
object.

This race condition was introduced in
commit 118e258a of MDEV-23855.

fil_space_t::flush(): Add a template parameter that indicates
whether the caller is holding a reference to prevent the
tablespace from being freed.

buf_dblwr_t::flush_buffered_writes_completed(),
row_quiesce_table_start(): Acquire a reference for the duration
of the fil_space_t::flush_low() operation. It should be impossible
for the object to be freed in these code paths, but we want to
satisfy the debug assertions.

fil_space_t::flush_low(): Do not increment or decrement the
reference count, but instead assert that the caller is holding
a reference.

fil_space_extend_must_retry(), fil_flush_file_spaces():
Acquire a reference before releasing fil_system.mutex.
This is what will fix the race condition.
parent 0c7c4492
...@@ -552,7 +552,7 @@ void CorruptedPages::zero_out_free_pages() ...@@ -552,7 +552,7 @@ void CorruptedPages::zero_out_free_pages()
*page_it, space_name.c_str()); *page_it, space_name.c_str());
} }
} }
space->flush(); space->flush<true>();
space->release(); space->release();
} }
m_spaces.swap(non_free_pages); m_spaces.swap(non_free_pages);
......
...@@ -648,7 +648,7 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request) ...@@ -648,7 +648,7 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
mysql_mutex_unlock(&mutex); mysql_mutex_unlock(&mutex);
/* Now flush the doublewrite buffer data to disk */ /* Now flush the doublewrite buffer data to disk */
fil_system.sys_space->flush(); fil_system.sys_space->flush<false>();
/* The writes have been flushed to disk now and in recovery we will /* The writes have been flushed to disk now and in recovery we will
find them in the doublewrite buffer blocks. Next, write the data pages. */ find them in the doublewrite buffer blocks. Next, write the data pages. */
......
...@@ -496,18 +496,16 @@ void fil_space_t::flush_low() ...@@ -496,18 +496,16 @@ void fil_space_t::flush_low()
{ {
ut_ad(!mutex_own(&fil_system.mutex)); ut_ad(!mutex_own(&fil_system.mutex));
uint32_t n= 0; uint32_t n= 1;
while (!n_pending.compare_exchange_strong(n, (n + 1) | NEEDS_FSYNC, while (!n_pending.compare_exchange_strong(n, n | NEEDS_FSYNC,
std::memory_order_acquire, std::memory_order_acquire,
std::memory_order_relaxed)) std::memory_order_relaxed))
{ {
ut_ad(n & PENDING);
if (n & STOPPING) if (n & STOPPING)
return; return;
if (!(n & NEEDS_FSYNC)) if (n & NEEDS_FSYNC)
continue; break;
if (acquire_low() & STOPPING)
return;
break;
} }
fil_n_pending_tablespace_flushes++; fil_n_pending_tablespace_flushes++;
...@@ -535,7 +533,6 @@ void fil_space_t::flush_low() ...@@ -535,7 +533,6 @@ void fil_space_t::flush_low()
} }
clear_flush(); clear_flush();
release();
fil_n_pending_tablespace_flushes--; fil_n_pending_tablespace_flushes--;
} }
...@@ -632,8 +629,10 @@ fil_space_extend_must_retry( ...@@ -632,8 +629,10 @@ fil_space_extend_must_retry(
case TRX_SYS_SPACE: case TRX_SYS_SPACE:
srv_sys_space.set_last_file_size(pages_in_MiB); srv_sys_space.set_last_file_size(pages_in_MiB);
do_flush: do_flush:
space->reacquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
space->flush_low(); space->flush_low();
space->release();
mutex_enter(&fil_system.mutex); mutex_enter(&fil_system.mutex);
break; break;
default: default:
...@@ -3516,8 +3515,10 @@ void fil_flush_file_spaces() ...@@ -3516,8 +3515,10 @@ void fil_flush_file_spaces()
{ {
if (space.needs_flush_not_stopping()) if (space.needs_flush_not_stopping())
{ {
space.reacquire();
mutex_exit(&fil_system.mutex); mutex_exit(&fil_system.mutex);
space.flush_low(); space.flush_low();
space.release();
goto rescan; goto rescan;
} }
} }
......
...@@ -972,7 +972,7 @@ struct fil_space_t final ...@@ -972,7 +972,7 @@ struct fil_space_t final
fil_io_t io(const IORequest &type, os_offset_t offset, size_t len, fil_io_t io(const IORequest &type, os_offset_t offset, size_t len,
void *buf, buf_page_t *bpage= nullptr); void *buf, buf_page_t *bpage= nullptr);
/** Flush pending writes from the file system cache to the file. */ /** Flush pending writes from the file system cache to the file. */
inline void flush(); template<bool have_reference> inline void flush();
/** Flush pending writes from the file system cache to the file. */ /** Flush pending writes from the file system cache to the file. */
void flush_low(); void flush_low();
...@@ -1452,18 +1452,23 @@ inline void fil_space_t::set_stopping(bool stopping) ...@@ -1452,18 +1452,23 @@ inline void fil_space_t::set_stopping(bool stopping)
} }
/** Flush pending writes from the file system cache to the file. */ /** Flush pending writes from the file system cache to the file. */
inline void fil_space_t::flush() template<bool have_reference> inline void fil_space_t::flush()
{ {
ut_ad(!mutex_own(&fil_system.mutex)); ut_ad(!mutex_own(&fil_system.mutex));
ut_ad(!have_reference || (pending() & PENDING));
ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT); ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT);
if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
{ {
ut_ad(!is_in_unflushed_spaces); ut_ad(!is_in_unflushed_spaces);
ut_ad(!needs_flush()); ut_ad(!needs_flush());
} }
else else if (have_reference)
flush_low();
else if (!(acquire_low() & STOPPING))
{
flush_low(); flush_low();
release();
}
} }
/** @return the size in pages (0 if unreadable) */ /** @return the size in pages (0 if unreadable) */
......
...@@ -545,7 +545,7 @@ row_quiesce_table_start( ...@@ -545,7 +545,7 @@ row_quiesce_table_start(
if (!trx_is_interrupted(trx)) { if (!trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */ /* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes(); os_aio_wait_until_no_pending_writes();
table->space->flush(); table->space->flush<false>();
if (row_quiesce_write_cfg(table, trx->mysql_thd) if (row_quiesce_write_cfg(table, trx->mysql_thd)
!= DB_SUCCESS) { != DB_SUCCESS) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment