Commit 73fee39e authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-27985 buf_flush_freed_pages() causes InnoDB to hang

buf_flush_freed_pages(): Assert that neither buf_pool.mutex
nor buf_pool.flush_list_mutex are held. Simplify the loops.
Return the tablespace and the number of pages written or punched.

buf_flush_LRU_list_batch(), buf_do_flush_list_batch():
Release buf_pool.mutex before invoking buf_flush_space().

buf_flush_list_space(): Acquire the mutexes only after invoking
buf_flush_freed_pages().

Reviewed by: Thirunarayanan Balathandayuthapani
parent 00896db1
......@@ -1046,42 +1046,52 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
return i;
}
MY_ATTRIBUTE((nonnull))
MY_ATTRIBUTE((nonnull, warn_unused_result))
/** Write punch-hole or zeroes of the freed ranges when
innodb_immediate_scrub_data_uncompressed from the freed ranges.
@param space tablespace which may contain ranges of freed pages */
static void buf_flush_freed_pages(fil_space_t *space)
@param space tablespace which may contain ranges of freed pages
@param writable whether the tablespace is writable
@return number of pages written or hole-punched */
static uint32_t buf_flush_freed_pages(fil_space_t *space, bool writable)
{
const bool punch_hole= space->punch_hole;
if (!srv_immediate_scrub_data_uncompressed && !punch_hole)
return;
lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn();
if (!punch_hole && !srv_immediate_scrub_data_uncompressed)
return 0;
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
mysql_mutex_assert_not_owner(&buf_pool.mutex);
std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex);
if (space->freed_ranges.empty()
|| flush_to_disk_lsn < space->get_last_freed_lsn())
space->freed_range_mutex.lock();
if (space->freed_ranges.empty() ||
log_sys.get_flushed_lsn() < space->get_last_freed_lsn())
{
freed_lock.unlock();
return;
space->freed_range_mutex.unlock();
return 0;
}
const unsigned physical_size{space->physical_size()};
range_set freed_ranges= std::move(space->freed_ranges);
freed_lock.unlock();
uint32_t written= 0;
for (const auto &range : freed_ranges)
if (!writable);
else if (punch_hole)
{
const ulint physical_size= space->physical_size();
if (punch_hole)
for (const auto &range : freed_ranges)
{
written+= range.last - range.first + 1;
space->reacquire();
space->io(IORequest(IORequest::PUNCH_RANGE),
os_offset_t{range.first} * physical_size,
(range.last - range.first + 1) * physical_size,
nullptr);
}
else if (srv_immediate_scrub_data_uncompressed)
}
else
{
for (const auto &range : freed_ranges)
{
written+= range.last - range.first + 1;
for (os_offset_t i= range.first; i <= range.last; i++)
{
space->reacquire();
......@@ -1090,8 +1100,10 @@ static void buf_flush_freed_pages(fil_space_t *space)
const_cast<byte*>(field_ref_zero));
}
}
buf_pool.stat.n_pages_written+= (range.last - range.first + 1);
}
space->freed_range_mutex.unlock();
return written;
}
/** Flushes to disk all flushable pages within the flush area
......@@ -1213,14 +1225,12 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
/** Start writing out pages for a tablespace.
@param id tablespace identifier
@return tablespace
@retval nullptr if the pages for this tablespace should be discarded */
static fil_space_t *buf_flush_space(const uint32_t id)
@return tablespace and number of pages written */
static std::pair<fil_space_t*, uint32_t> buf_flush_space(const uint32_t id)
{
fil_space_t *space= fil_space_t::get(id);
if (space)
buf_flush_freed_pages(space);
return space;
if (fil_space_t *space= fil_space_t::get(id))
return {space, buf_flush_freed_pages(space, true)};
return {nullptr, 0};
}
struct flush_counters_t
......@@ -1288,6 +1298,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
n->flushed + n->evicted < max) ||
recv_recovery_is_on()); ++scanned)
{
retry:
buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
const lsn_t oldest_modification= bpage->oldest_modification();
buf_pool.lru_hp.set(prev);
......@@ -1309,10 +1320,18 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
{
if (last_space_id != space_id)
{
buf_pool.lru_hp.set(bpage);
mysql_mutex_unlock(&buf_pool.mutex);
if (space)
space->release();
space= buf_flush_space(space_id);
auto p= buf_flush_space(space_id);
space= p.first;
last_space_id= space_id;
mysql_mutex_lock(&buf_pool.mutex);
if (p.second)
buf_pool.stat.n_pages_written+= p.second;
bpage= buf_pool.lru_hp.get();
goto retry;
}
else
ut_ad(!space);
......@@ -1455,10 +1474,28 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
{
if (last_space_id != space_id)
{
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_pool.flush_hp.set(bpage);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
mysql_mutex_unlock(&buf_pool.mutex);
if (space)
space->release();
space= buf_flush_space(space_id);
auto p= buf_flush_space(space_id);
space= p.first;
last_space_id= space_id;
mysql_mutex_lock(&buf_pool.mutex);
if (p.second)
buf_pool.stat.n_pages_written+= p.second;
mysql_mutex_lock(&buf_pool.flush_list_mutex);
bpage= buf_pool.flush_hp.get();
if (!bpage)
break;
if (bpage->id() != page_id)
continue;
buf_pool.flush_hp.set(UT_LIST_GET_PREV(list, bpage));
if (bpage->oldest_modification() <= 1 || !bpage->ready_for_flush())
goto next;
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
else
ut_ad(!space);
......@@ -1486,6 +1523,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
next:
bpage= buf_pool.flush_hp.get();
}
......@@ -1582,11 +1620,14 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
bool may_have_skipped= false;
ulint max_n_flush= srv_io_capacity;
mysql_mutex_lock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
bool acquired= space->acquire();
buf_flush_freed_pages(space);
{
const uint32_t written{buf_flush_freed_pages(space, acquired)};
mysql_mutex_lock(&buf_pool.mutex);
if (written)
buf_pool.stat.n_pages_written+= written;
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; )
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment