Commit 0d6d63e1 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-22027 Assertion oldest_lsn >= log_sys.last_checkpoint_lsn failed

log_buf_pool_get_oldest_modification(): Acquire
log_sys_t::flush_order_mutex in order to prevent a race condition
that was introduced in
commit 1a6f708e (MDEV-15058).

Before that change, log_buf_pool_get_oldest_modification()
was protected by both log_sys.mutex and log_sys.flush_order_mutex
like it was supposed to be ever since
commit a52c4820 (MySQL 5.5.10).

buf_pool_t::get_oldest_modification(): Replaces
buf_pool_get_oldest_modification(), to emphasize that
log_sys.flush_order_mutex must be acquired by the caller if needed.

log_close(): Invoke log_buf_pool_get_oldest_modification()
in order to ensure a clean shutdown.

The scenario of the race condition is as follows:

1. The buffer pool is clean (no writes are pending).
2. mtr_add_dirtied_pages_to_flush_list() releases log_sys.mutex.
3. log_buf_pool_get_oldest_modification() observes that the
buffer pool is clean and returns log_sys.lsn.
4. log_checkpoint() completes, writing a wrong checkpoint header
according to which everything up to log_sys.lsn was clean.
5. mtr_add_dirtied_pages_to_flush_list() completes the execution
of mtr_memo_note_modifications(), releases the page latches and
the flush_order_mutex.
6. On a subsequent log_checkpoint(), the assertion could fail
if the page modifications had not been flushed yet.

The failing assertion (which is valid) was added in MySQL 5.7
mysql/mysql-server@5c6c6ec69336369487dfc080a6980089b4e1a3c2
and merged to MariaDB Server 10.2.2 in
commit fec844ac.
parent 661ebd46
......@@ -483,31 +483,27 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
/**
@return the smallest oldest_modification lsn for any page.
@retval 0 if all modified persistent pages have been flushed */
lsn_t
buf_pool_get_oldest_modification()
@retval 0 if all modified persistent pages have been flushed */
lsn_t buf_pool_t::get_oldest_modification()
{
mutex_enter(&buf_pool.flush_list_mutex);
buf_page_t* bpage;
/* FIXME: Keep temporary tablespace pages in a separate flush
list. We would only need to write out temporary pages if the
page is about to be evicted from the buffer pool, and the page
contents is still needed (the page has not been freed). */
for (bpage = UT_LIST_GET_LAST(buf_pool.flush_list);
bpage != NULL && fsp_is_system_temporary(bpage->id.space());
bpage = UT_LIST_GET_PREV(list, bpage)) {
ut_ad(bpage->in_flush_list);
}
lsn_t oldest_lsn = bpage ? bpage->oldest_modification : 0;
mutex_exit(&buf_pool.flush_list_mutex);
/* The returned answer may be out of date: the flush_list can
change after the mutex has been released. */
return(oldest_lsn);
mutex_enter(&flush_list_mutex);
/* FIXME: Keep temporary tablespace pages in a separate flush
list. We would only need to write out temporary pages if the
page is about to be evicted from the buffer pool, and the page
contents is still needed (the page has not been freed). */
const buf_page_t *bpage;
for (bpage= UT_LIST_GET_LAST(flush_list);
bpage && fsp_is_system_temporary(bpage->id.space());
bpage= UT_LIST_GET_PREV(list, bpage))
ut_ad(bpage->in_flush_list);
lsn_t oldest_lsn= bpage ? bpage->oldest_modification : 0;
mutex_exit(&flush_list_mutex);
/* The result may become stale as soon as we released the mutex.
On log checkpoint, also log_sys.flush_order_mutex will be needed. */
return oldest_lsn;
}
/** Allocate a buffer block.
......
......@@ -2441,7 +2441,7 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
sum_pages = 0;
}
oldest_lsn = buf_pool_get_oldest_modification();
oldest_lsn = buf_pool.get_oldest_modification();
ut_ad(oldest_lsn <= log_get_lsn());
......
......@@ -198,11 +198,6 @@ UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
/**
@return the smallest oldest_modification lsn for any page.
@retval 0 if all modified persistent pages have been flushed */
lsn_t
buf_pool_get_oldest_modification();
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
......@@ -1868,6 +1863,11 @@ class buf_pool_t
bool is_block_lock(const BPageLock *l) const
{ return is_block_field(reinterpret_cast<const void*>(l)); }
/**
@return the smallest oldest_modification lsn for any page
@retval 0 if all modified persistent pages have been flushed */
lsn_t get_oldest_modification();
/** Determine if a buffer block was created by chunk_t::create().
@param block block descriptor (not dereferenced)
@return whether block has been created by chunk_t::create() */
......
......@@ -139,7 +139,7 @@ log_get_max_modified_age_async(void);
/*================================*/
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
and lsn - buf_pool.get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@retval true on success
@retval false if the smallest log is too small to
......@@ -667,13 +667,13 @@ struct log_t{
lsn_t max_modified_age_async;
/*!< when this recommended
value for lsn -
buf_pool_get_oldest_modification()
buf_pool.get_oldest_modification()
is exceeded, we start an
asynchronous preflush of pool pages */
lsn_t max_modified_age_sync;
/*!< when this recommended
value for lsn -
buf_pool_get_oldest_modification()
buf_pool.get_oldest_modification()
is exceeded, we start a
synchronous preflush of pool pages */
lsn_t max_checkpoint_age_async;
......
......@@ -91,27 +91,17 @@ should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
the previous */
#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
/****************************************************************//**
Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
exists.
/** Return the oldest modified LSN in buf_pool.flush_list,
or the latest LSN if all pages are clean.
@return LSN of oldest modification */
static
lsn_t
log_buf_pool_get_oldest_modification(void)
/*======================================*/
static lsn_t log_buf_pool_get_oldest_modification()
{
lsn_t lsn;
ut_ad(log_mutex_own());
lsn = buf_pool_get_oldest_modification();
ut_ad(log_mutex_own());
log_flush_order_mutex_enter();
lsn_t lsn= buf_pool.get_oldest_modification();
log_flush_order_mutex_exit();
if (!lsn) {
lsn = log_sys.get_lsn();
}
return(lsn);
return lsn ? lsn : log_sys.get_lsn();
}
/** Extends the log buffer.
......@@ -419,7 +409,7 @@ log_close(void)
goto function_exit;
}
oldest_lsn = buf_pool_get_oldest_modification();
oldest_lsn = log_buf_pool_get_oldest_modification();
if (!oldest_lsn
|| lsn - oldest_lsn > log_sys.max_modified_age_sync
......@@ -432,7 +422,7 @@ log_close(void)
}
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
and lsn - buf_pool.get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@retval true on success
@retval false if the smallest log group is too small to
......
......@@ -2002,7 +2002,7 @@ srv_mon_process_existing_counter(
break;
case MONITOR_OVLD_BUF_OLDEST_LSN:
value = (mon_type_t) buf_pool_get_oldest_modification();
value = (mon_type_t) buf_pool.get_oldest_modification();
break;
case MONITOR_OVLD_LSN_CHECKPOINT:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment