Commit 4c3ad244 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-27416 InnoDB hang in buf_flush_wait_flushed(), on log checkpoint

InnoDB could sometimes hang when triggering a log checkpoint. This is
due to commit 7b1252c0 (MDEV-24278),
which introduced an untimed wait to buf_flush_page_cleaner().

The hang was noticed by occasional failures of IMPORT TABLESPACE tests,
such as innodb.innodb-wl5522, which would (unnecessarily) invoke
log_make_checkpoint() from row_import_cleanup().

The reason of the hang was that buf_flush_page_cleaner() would enter
untimed sleep despite buf_flush_sync_lsn being set. The exact failure
scenario is unclear, because buf_flush_sync_lsn should actually be
protected by buf_pool.flush_list_mutex. We prevent the hang by
invoking buf_pool.page_cleaner_set_idle(false) whenever we are
setting buf_flush_sync_lsn and signaling buf_pool.do_flush_list.

The bulk of these changes was originally developed as a preparation
for MDEV-26827, to invoke buf_flush_list() from fewer threads,
and tested on 10.6 by Matthias Leich.

This fix was tested by running 100 repetitions of 100 concurrent instances
of the test innodb.innodb-wl5522 on a RelWithDebInfo build, using ext4fs
and innodb_flush_method=O_DIRECT on a SATA SSD with 4096-byte block size.
During the test, the call to log_make_checkpoint() in row_import_cleanup()
was present.

buf_flush_list(): Make static.

buf_flush_wait(): Wait for buf_pool.get_oldest_modification()
to reach a target, by work done in the buf_flush_page_cleaner.
If buf_flush_sync_lsn is going to be set, we will invoke
buf_pool.page_cleaner_set_idle(false).

buf_flush_ahead(): If buf_flush_sync_lsn or buf_flush_async_lsn
is going to be set and the page cleaner woken up, we will invoke
buf_pool.page_cleaner_set_idle(false).

buf_flush_wait_flushed(): Invoke buf_flush_wait().

buf_flush_sync(): Invoke recv_sys.apply() at the start in case
crash recovery is active. Invoke buf_flush_wait().

buf_flush_sync_batch(): A lower-level variant of buf_flush_sync()
that is only called by recv_sys_t::apply().

buf_flush_sync_for_checkpoint(): Do not trigger log apply
or checkpoint during recovery.

buf_dblwr_t::create(): Only initiate a buffer pool flush, not
a checkpoint.

row_import_cleanup(): Do not unnecessarily invoke log_make_checkpoint().
Invoking buf_flush_list_space() before starting to generate redo log
for the imported tablespace should suffice.

srv_prepare_to_delete_redo_log_file():
Set recv_sys.recovery_on in order to prevent
buf_flush_sync_for_checkpoint() from initiating a checkpoint
while the log is inaccessible. Remove a wait loop that is already
part of buf_flush_sync().
Do not invoke fil_names_clear() if the log is being upgraded,
because the FILE_MODIFY record is specific to the latest format.

create_log_file(): Clear recv_sys.recovery_on only after calling
log_make_checkpoint(), to prevent buf_flush_page_cleaner from
invoking a checkpoint.

innodb_shutdown(): Simplify the logic in mariadb-backup --prepare.

os_aio_wait_until_no_pending_writes(): Update the function comment.
Apart from row_quiesce_table_start() during FLUSH TABLES...FOR EXPORT,
this is being called by buf_flush_list_space(), which is invoked
by ALTER TABLE...IMPORT TABLESPACE as well as some encryption operations.
parent eab89f14
...@@ -3300,10 +3300,7 @@ buf_page_get_low( ...@@ -3300,10 +3300,7 @@ buf_page_get_low(
fix_block->fix(); fix_block->fix();
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_list(); buf_flush_sync();
buf_flush_wait_batch_end_acquiring_mutex(false);
while (buf_flush_list_space(space));
os_aio_wait_until_no_pending_writes();
if (fix_block->page.buf_fix_count() == 1 if (fix_block->page.buf_fix_count() == 1
&& !fix_block->page.oldest_modification()) { && !fix_block->page.oldest_modification()) {
......
...@@ -219,8 +219,7 @@ bool buf_dblwr_t::create() ...@@ -219,8 +219,7 @@ bool buf_dblwr_t::create()
trx_sys_block->frame, TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N); trx_sys_block->frame, TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N);
mtr.commit(); mtr.commit();
/* Flush the modified pages to disk and make a checkpoint */ buf_flush_wait_flushed(mtr.commit_lsn());
log_make_checkpoint();
/* Remove doublewrite pages from LRU */ /* Remove doublewrite pages from LRU */
buf_pool_invalidate(); buf_pool_invalidate();
......
...@@ -1538,7 +1538,7 @@ void buf_flush_wait_batch_end(bool lru) ...@@ -1538,7 +1538,7 @@ void buf_flush_wait_batch_end(bool lru)
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target @param lsn buf_pool.get_oldest_modification(LSN_MAX) target
@return the number of processed pages @return the number of processed pages
@retval 0 if a buf_pool.flush_list batch is already running */ @retval 0 if a buf_pool.flush_list batch is already running */
ulint buf_flush_list(ulint max_n, lsn_t lsn) static ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, lsn_t lsn= LSN_MAX)
{ {
ut_ad(lsn); ut_ad(lsn);
...@@ -1833,6 +1833,30 @@ ATTRIBUTE_COLD void log_make_checkpoint() ...@@ -1833,6 +1833,30 @@ ATTRIBUTE_COLD void log_make_checkpoint()
while (!log_checkpoint()); while (!log_checkpoint());
} }
/** Wait for all dirty pages up to an LSN to be written out.
NOTE: The calling thread is not allowed to hold any buffer page latches! */
static void buf_flush_wait(lsn_t lsn)
{
ut_ad(lsn <= log_sys.get_lsn());
while (buf_pool.get_oldest_modification(lsn) < lsn)
{
if (buf_flush_sync_lsn < lsn)
{
buf_flush_sync_lsn= lsn;
buf_pool.page_cleaner_set_idle(false);
pthread_cond_signal(&buf_pool.do_flush_list);
}
my_cond_wait(&buf_pool.done_flush_list,
&buf_pool.flush_list_mutex.m_mutex);
}
/* Wait for the checkpoint. */
while (buf_flush_sync_lsn)
my_cond_wait(&buf_pool.done_flush_list,
&buf_pool.flush_list_mutex.m_mutex);
}
/** Wait until all persistent pages are flushed up to a limit. /** Wait until all persistent pages are flushed up to a limit.
@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */ @param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
...@@ -1849,9 +1873,9 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) ...@@ -1849,9 +1873,9 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn) if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
{ {
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */ #if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
if (UNIV_UNLIKELY(!buf_page_cleaner_is_active) if (UNIV_UNLIKELY(!buf_page_cleaner_is_active))
ut_d(|| innodb_page_cleaner_disabled_debug))
{ {
do do
{ {
...@@ -1864,35 +1888,21 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) ...@@ -1864,35 +1888,21 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
MONITOR_FLUSH_SYNC_COUNT, MONITOR_FLUSH_SYNC_COUNT,
MONITOR_FLUSH_SYNC_PAGES, n_pages); MONITOR_FLUSH_SYNC_PAGES, n_pages);
} }
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
} }
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn); while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
goto try_checkpoint;
} }
else
#endif #endif
if (buf_flush_sync_lsn < sync_lsn)
{ {
buf_flush_sync_lsn= sync_lsn;
pthread_cond_signal(&buf_pool.do_flush_list);
}
do
{
tpool::tpool_wait_begin();
thd_wait_begin(nullptr, THD_WAIT_DISKIO); thd_wait_begin(nullptr, THD_WAIT_DISKIO);
my_cond_wait(&buf_pool.done_flush_list, tpool::tpool_wait_begin();
&buf_pool.flush_list_mutex.m_mutex); buf_flush_wait(sync_lsn);
thd_wait_end(nullptr);
tpool::tpool_wait_end(); tpool::tpool_wait_end();
thd_wait_end(nullptr);
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
} }
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
} }
try_checkpoint:
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn)) if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn))
...@@ -1925,8 +1935,11 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) ...@@ -1925,8 +1935,11 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
{ {
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (limit < lsn) if (limit < lsn)
{
limit= lsn; limit= lsn;
buf_pool.page_cleaner_set_idle(false);
pthread_cond_signal(&buf_pool.do_flush_list); pthread_cond_signal(&buf_pool.do_flush_list);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
} }
} }
...@@ -1960,10 +1973,6 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn) ...@@ -1960,10 +1973,6 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
MONITOR_FLUSH_SYNC_PAGES, n_flushed); MONITOR_FLUSH_SYNC_PAGES, n_flushed);
} }
/* Attempt to perform a log checkpoint upon completing each batch. */
if (recv_recovery_is_on())
recv_sys.apply(true);
switch (srv_file_flush_method) { switch (srv_file_flush_method) {
case SRV_NOSYNC: case SRV_NOSYNC:
case SRV_O_DIRECT_NO_FSYNC: case SRV_O_DIRECT_NO_FSYNC:
...@@ -1980,7 +1989,8 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn) ...@@ -1980,7 +1989,8 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
mysql_mutex_unlock(&log_sys.flush_order_mutex); mysql_mutex_unlock(&log_sys.flush_order_mutex);
const lsn_t checkpoint_lsn= measure ? measure : newest_lsn; const lsn_t checkpoint_lsn= measure ? measure : newest_lsn;
if (checkpoint_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT) if (!recv_recovery_is_on() &&
checkpoint_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT)
{ {
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
log_checkpoint_low(checkpoint_lsn, newest_lsn); log_checkpoint_low(checkpoint_lsn, newest_lsn);
...@@ -2004,7 +2014,7 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn) ...@@ -2004,7 +2014,7 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
else if (measure >= buf_flush_async_lsn) else if (measure >= buf_flush_async_lsn)
buf_flush_async_lsn= 0; buf_flush_async_lsn= 0;
/* wake up buf_flush_wait_flushed() */ /* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list); pthread_cond_broadcast(&buf_pool.done_flush_list);
lsn= std::max(lsn, target); lsn= std::max(lsn, target);
...@@ -2264,7 +2274,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) ...@@ -2264,7 +2274,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (UNIV_UNLIKELY(lsn_limit != 0)) if (UNIV_UNLIKELY(lsn_limit != 0))
{ {
buf_flush_sync_lsn= 0; buf_flush_sync_lsn= 0;
/* wake up buf_flush_wait_flushed() */ /* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list); pthread_cond_broadcast(&buf_pool.done_flush_list);
} }
unemployed: unemployed:
...@@ -2334,7 +2344,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) ...@@ -2334,7 +2344,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (UNIV_UNLIKELY(lsn_limit != 0)) if (UNIV_UNLIKELY(lsn_limit != 0))
{ {
n_flushed= buf_flush_list(srv_max_io_capacity, lsn_limit); n_flushed= buf_flush_list(srv_max_io_capacity, lsn_limit);
/* wake up buf_flush_wait_flushed() */ /* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list); pthread_cond_broadcast(&buf_pool.done_flush_list);
goto try_checkpoint; goto try_checkpoint;
} }
...@@ -2440,6 +2450,7 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init() ...@@ -2440,6 +2450,7 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
os_thread_create(buf_flush_page_cleaner); os_thread_create(buf_flush_page_cleaner);
} }
#if defined(HAVE_SYSTEMD) && !defined(EMBEDDED_LIBRARY)
/** @return the number of dirty pages in the buffer pool */ /** @return the number of dirty pages in the buffer pool */
static ulint buf_flush_list_length() static ulint buf_flush_list_length()
{ {
...@@ -2448,6 +2459,7 @@ static ulint buf_flush_list_length() ...@@ -2448,6 +2459,7 @@ static ulint buf_flush_list_length()
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
return len; return len;
} }
#endif
/** Flush the buffer pool on shutdown. */ /** Flush the buffer pool on shutdown. */
ATTRIBUTE_COLD void buf_flush_buffer_pool() ATTRIBUTE_COLD void buf_flush_buffer_pool()
...@@ -2458,13 +2470,15 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool() ...@@ -2458,13 +2470,15 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool()
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Waiting to flush the buffer pool"); "Waiting to flush the buffer pool");
while (buf_pool.n_flush_list() || buf_flush_list_length()) mysql_mutex_lock(&buf_pool.flush_list_mutex);
while (buf_pool.get_oldest_modification(0))
{ {
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
buf_flush_list(srv_max_io_capacity); buf_flush_list(srv_max_io_capacity);
timespec abstime;
if (buf_pool.n_flush_list()) if (buf_pool.n_flush_list())
{ {
timespec abstime;
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Waiting to flush " ULINTPF " pages", "Waiting to flush " ULINTPF " pages",
buf_flush_list_length()); buf_flush_list_length());
...@@ -2475,24 +2489,48 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool() ...@@ -2475,24 +2489,48 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool()
&abstime); &abstime);
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
} }
mysql_mutex_lock(&buf_pool.flush_list_mutex);
} }
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ut_ad(!buf_pool.any_io_pending()); ut_ad(!buf_pool.any_io_pending());
} }
/** Synchronously flush dirty blocks during recv_sys_t::apply().
NOTE: The calling thread is not allowed to hold any buffer page latches! */
void buf_flush_sync_batch(lsn_t lsn)
{
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
tpool::tpool_wait_begin();
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_wait(lsn);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
tpool::tpool_wait_end();
thd_wait_end(nullptr);
}
/** Synchronously flush dirty blocks. /** Synchronously flush dirty blocks.
NOTE: The calling thread is not allowed to hold any buffer page latches! */ NOTE: The calling thread is not allowed to hold any buffer page latches! */
void buf_flush_sync() void buf_flush_sync()
{ {
ut_ad(!sync_check_iterate(dict_sync_check())); ut_ad(!sync_check_iterate(dict_sync_check()));
if (recv_recovery_is_on())
recv_sys.apply(true);
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
tpool::tpool_wait_begin();
mysql_mutex_lock(&buf_pool.flush_list_mutex);
for (;;) for (;;)
{ {
const ulint n_flushed= buf_flush_list(srv_max_io_capacity); const lsn_t lsn= log_sys.get_lsn();
buf_flush_wait_batch_end_acquiring_mutex(false); buf_flush_wait(lsn);
if (!n_flushed && !buf_flush_list_length()) if (lsn == log_sys.get_lsn())
return; break;
} }
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
tpool::tpool_wait_end();
thd_wait_end(nullptr);
} }
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
......
...@@ -85,13 +85,6 @@ buf_flush_init_for_writing( ...@@ -85,13 +85,6 @@ buf_flush_init_for_writing(
void* page_zip_, void* page_zip_,
bool use_full_checksum); bool use_full_checksum);
/** Write out dirty blocks from buf_pool.flush_list.
@param max_n wished maximum mumber of blocks flushed
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
@return the number of processed pages
@retval 0 if a buf_pool.flush_list batch is already running */
ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, lsn_t lsn= LSN_MAX);
/** Try to flush dirty pages that belong to a given tablespace. /** Try to flush dirty pages that belong to a given tablespace.
@param space tablespace @param space tablespace
@param n_flushed number of pages written @param n_flushed number of pages written
...@@ -144,6 +137,10 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool(); ...@@ -144,6 +137,10 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool();
void buf_flush_validate(); void buf_flush_validate();
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
/** Synchronously flush dirty blocks during recv_sys_t::apply().
NOTE: The calling thread is not allowed to hold any buffer page latches! */
void buf_flush_sync_batch(lsn_t lsn);
/** Synchronously flush dirty blocks. /** Synchronously flush dirty blocks.
NOTE: The calling thread is not allowed to hold any buffer page latches! */ NOTE: The calling thread is not allowed to hold any buffer page latches! */
void buf_flush_sync(); void buf_flush_sync();
......
...@@ -1111,8 +1111,7 @@ void os_aio_free(); ...@@ -1111,8 +1111,7 @@ void os_aio_free();
@retval DB_IO_ERROR on I/O error */ @retval DB_IO_ERROR on I/O error */
dberr_t os_aio(const IORequest &type, void *buf, os_offset_t offset, size_t n); dberr_t os_aio(const IORequest &type, void *buf, os_offset_t offset, size_t n);
/** Wait until there are no pending asynchronous writes. /** Wait until there are no pending asynchronous writes. */
Only used on FLUSH TABLES...FOR EXPORT. */
void os_aio_wait_until_no_pending_writes(); void os_aio_wait_until_no_pending_writes();
......
...@@ -511,7 +511,7 @@ do { \ ...@@ -511,7 +511,7 @@ do { \
#ifdef HAVE_PSI_STAGE_INTERFACE #ifdef HAVE_PSI_STAGE_INTERFACE
/** Performance schema stage event for monitoring ALTER TABLE progress /** Performance schema stage event for monitoring ALTER TABLE progress
everything after flush log_make_checkpoint(). */ in ha_innobase::commit_inplace_alter_table(). */
extern PSI_stage_info srv_stage_alter_table_end; extern PSI_stage_info srv_stage_alter_table_end;
/** Performance schema stage event for monitoring ALTER TABLE progress /** Performance schema stage event for monitoring ALTER TABLE progress
......
...@@ -2752,7 +2752,7 @@ void recv_sys_t::apply(bool last_batch) ...@@ -2752,7 +2752,7 @@ void recv_sys_t::apply(bool last_batch)
/* Instead of flushing, last_batch could sort the buf_pool.flush_list /* Instead of flushing, last_batch could sort the buf_pool.flush_list
in ascending order of buf_page_t::oldest_modification. */ in ascending order of buf_page_t::oldest_modification. */
buf_flush_sync(); buf_flush_sync_batch(recovered_lsn);
if (!last_batch) if (!last_batch)
{ {
......
...@@ -3821,8 +3821,7 @@ static void os_aio_wait_until_no_pending_writes_low() ...@@ -3821,8 +3821,7 @@ static void os_aio_wait_until_no_pending_writes_low()
tpool::tpool_wait_end(); tpool::tpool_wait_end();
} }
/** Wait until there are no pending asynchronous writes. /** Wait until there are no pending asynchronous writes. */
Only used on FLUSH TABLES...FOR EXPORT. */
void os_aio_wait_until_no_pending_writes() void os_aio_wait_until_no_pending_writes()
{ {
os_aio_wait_until_no_pending_writes_low(); os_aio_wait_until_no_pending_writes_low();
......
...@@ -2211,8 +2211,6 @@ row_import_cleanup( ...@@ -2211,8 +2211,6 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE();); DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
log_make_checkpoint();
return(err); return(err);
} }
......
...@@ -557,7 +557,7 @@ char srv_buffer_pool_load_at_startup = TRUE; ...@@ -557,7 +557,7 @@ char srv_buffer_pool_load_at_startup = TRUE;
#ifdef HAVE_PSI_STAGE_INTERFACE #ifdef HAVE_PSI_STAGE_INTERFACE
/** Performance schema stage event for monitoring ALTER TABLE progress /** Performance schema stage event for monitoring ALTER TABLE progress
everything after flush log_make_checkpoint(). */ in ha_innobase::commit_inplace_alter_table(). */
PSI_stage_info srv_stage_alter_table_end PSI_stage_info srv_stage_alter_table_end
= {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS}; = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
......
...@@ -329,6 +329,13 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn, ...@@ -329,6 +329,13 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
log_sys.log.write_header_durable(lsn); log_sys.log.write_header_durable(lsn);
ut_ad(srv_startup_is_before_trx_rollback_phase);
if (create_new_db) {
srv_startup_is_before_trx_rollback_phase = false;
}
/* Enable checkpoints in buf_flush_page_cleaner(). */
recv_sys.recovery_on = false;
mysql_mutex_unlock(&log_sys.mutex); mysql_mutex_unlock(&log_sys.mutex);
log_make_checkpoint(); log_make_checkpoint();
...@@ -897,87 +904,70 @@ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists) ...@@ -897,87 +904,70 @@ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
{ {
DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
lsn_t flushed_lsn; /* Disable checkpoints in the page cleaner. */
ulint count = 0; ut_ad(!recv_sys.recovery_on);
recv_sys.recovery_on= true;
if (log_sys.log.subformat != 2) {
srv_log_file_size = 0;
}
for (;;) {
/* Clean the buffer pool. */ /* Clean the buffer pool. */
buf_flush_sync(); buf_flush_sync();
if (log_sys.log.subformat != 2)
srv_log_file_size= 0;
DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
mysql_mutex_lock(&log_sys.mutex); mysql_mutex_lock(&log_sys.mutex);
const bool latest_format= (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) ==
log_t::FORMAT_10_5;
lsn_t flushed_lsn= log_sys.get_lsn();
fil_names_clear(log_sys.get_lsn(), false); if (latest_format)
flushed_lsn = log_sys.get_lsn();
{ {
ib::info info; fil_names_clear(flushed_lsn, false);
if (srv_log_file_size == 0 flushed_lsn= log_sys.get_lsn();
|| (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED)
!= log_t::FORMAT_10_5) {
info << "Upgrading redo log: ";
} else if (!old_exists
|| srv_log_file_size
!= srv_log_file_size_requested) {
if (srv_encrypt_log
== (my_bool)log_sys.is_encrypted()) {
info << (srv_encrypt_log
? "Resizing encrypted"
: "Resizing");
} else if (srv_encrypt_log) {
info << "Encrypting and resizing";
} else {
info << "Removing encryption"
" and resizing";
} }
info << " redo log from " << srv_log_file_size {
<< " to "; const char *msg;
} else if (srv_encrypt_log) { if (!latest_format || srv_log_file_size == 0)
info << "Encrypting redo log: "; {
} else { msg= "Upgrading redo log: ";
info << "Removing redo log encryption: "; same_size:
ib::info() << msg << srv_log_file_size_requested << " bytes; LSN="
<< flushed_lsn;
} }
else if (old_exists && srv_log_file_size == srv_log_file_size_requested)
info << srv_log_file_size_requested {
msg= srv_encrypt_log
? "Encrypting redo log: " : "Removing redo log encryption: ";
goto same_size;
}
else
{
if (srv_encrypt_log == (my_bool)log_sys.is_encrypted())
msg= srv_encrypt_log ? "Resizing encrypted" : "Resizing";
else
msg= srv_encrypt_log
? "Encrypting and resizing"
: "Removing encryption and resizing";
ib::info() << msg << " redo log from " << srv_log_file_size << " to "
<< srv_log_file_size_requested
<< " bytes; LSN=" << flushed_lsn; << " bytes; LSN=" << flushed_lsn;
} }
}
mysql_mutex_unlock(&log_sys.mutex); mysql_mutex_unlock(&log_sys.mutex);
if (flushed_lsn != log_sys.get_flushed_lsn()) { if (flushed_lsn != log_sys.get_flushed_lsn())
{
log_write_up_to(flushed_lsn, false); log_write_up_to(flushed_lsn, false);
log_sys.log.flush(); log_sys.log.flush();
} }
ut_ad(flushed_lsn == log_sys.get_lsn()); ut_ad(flushed_lsn == log_sys.get_lsn());
ut_ad(!buf_pool.any_io_pending());
/* Check if the buffer pools are clean. If not
retry till it is clean. */
if (ulint pending_io = buf_pool.io_pending()) {
count++;
/* Print a message every 60 seconds if we
are waiting to clean the buffer pools */
if (srv_print_verbose_log && count > 600) {
ib::info() << "Waiting for "
<< pending_io << " buffer "
<< "page I/Os to complete";
count = 0;
}
os_thread_sleep(100000);
continue;
}
break;
}
DBUG_RETURN(flushed_lsn); DBUG_RETURN(flushed_lsn);
} }
...@@ -1259,7 +1249,7 @@ dberr_t srv_start(bool create_new_db) ...@@ -1259,7 +1249,7 @@ dberr_t srv_start(bool create_new_db)
ut_ad(buf_page_cleaner_is_active); ut_ad(buf_page_cleaner_is_active);
} }
srv_startup_is_before_trx_rollback_phase = !create_new_db; srv_startup_is_before_trx_rollback_phase = true;
/* Check if undo tablespaces and redo log files exist before creating /* Check if undo tablespaces and redo log files exist before creating
a new system tablespace */ a new system tablespace */
...@@ -1308,7 +1298,6 @@ dberr_t srv_start(bool create_new_db) ...@@ -1308,7 +1298,6 @@ dberr_t srv_start(bool create_new_db)
if (create_new_db) { if (create_new_db) {
flushed_lsn = log_sys.get_lsn(); flushed_lsn = log_sys.get_lsn();
log_sys.set_flushed_lsn(flushed_lsn); log_sys.set_flushed_lsn(flushed_lsn);
buf_flush_sync();
err = create_log_file(true, flushed_lsn, logfile0); err = create_log_file(true, flushed_lsn, logfile0);
...@@ -1371,6 +1360,9 @@ dberr_t srv_start(bool create_new_db) ...@@ -1371,6 +1360,9 @@ dberr_t srv_start(bool create_new_db)
if (!log_set_capacity(srv_log_file_size_requested)) { if (!log_set_capacity(srv_log_file_size_requested)) {
return(srv_init_abort(DB_ERROR)); return(srv_init_abort(DB_ERROR));
} }
/* Enable checkpoints in the page cleaner. */
recv_sys.recovery_on = false;
} }
file_checked: file_checked:
...@@ -2024,11 +2016,8 @@ void innodb_shutdown() ...@@ -2024,11 +2016,8 @@ void innodb_shutdown()
break; break;
case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE:
case SRV_OPERATION_RESTORE_EXPORT: case SRV_OPERATION_RESTORE_EXPORT:
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
if (!buf_page_cleaner_is_active) {
break;
}
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
while (buf_page_cleaner_is_active) { while (buf_page_cleaner_is_active) {
pthread_cond_signal(&buf_pool.do_flush_list); pthread_cond_signal(&buf_pool.do_flush_list);
my_cond_wait(&buf_pool.done_flush_list, my_cond_wait(&buf_pool.done_flush_list,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment