Commit 5530926e authored by Marko Mäkelä's avatar Marko Mäkelä

Bug 12323643 - CLEAN UP THE INNODB THREAD SHUTDOWN AND ASSERTIONS (WL#5136)

On shutdown, do not exit threads in os_event_wait(). This method of
exiting was only used by the I/O handler threads. Exit them on a
higher level.

os_event_wait_low(), os_event_wait_time_low(): Do not exit on shutdown.

os_thread_exit(), ut_dbg_assertion_failed(), ut_print_timestamp(): Add
attribute cold, so that GCC knows that these functions are rarely
invoked and can be optimized for size.

os_aio_linux_collect(): Return on shutdown.

os_aio_linux_handle(), os_aio_simulated_handle(), os_aio_windows_handle():
Set *message1 = *message2 = NULL and return TRUE on shutdown.

fil_aio_wait(): Return on shutdown.

logs_empty_and_mark_files_at_shutdown(): Even in very fast shutdown
(innodb_fast_shutdown=2), allow the background threads to exit, but
skip the flushing and log checkpointing.

innobase_shutdown_for_mysql(): Always wait for all the threads to exit.

rb:633 approved by Sunny Bains
parent 272fa443
......@@ -4527,8 +4527,8 @@ fil_aio_wait(
ret = os_aio_linux_handle(segment, &fil_node,
&message, &type);
#else
ret = 0; /* Eliminate compiler warning */
ut_error;
ret = 0; /* Eliminate compiler warning */
#endif
} else {
srv_set_io_thread_op_info(segment, "simulated aio handle");
......@@ -4538,6 +4538,10 @@ fil_aio_wait(
}
ut_a(ret);
if (UNIV_UNLIKELY(fil_node == NULL)) {
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
return;
}
srv_set_io_thread_op_info(segment, "complete io for fil node");
......
......@@ -150,10 +150,7 @@ os_event_free(
os_event_t event); /*!< in: event to free */
/**********************************************************//**
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
event is already in the signaled state).
Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->is_set == TRUE.
......
......@@ -107,8 +107,9 @@ UNIV_INTERN
void
os_thread_exit(
/*===========*/
void* exit_value); /*!< in: exit value; in Windows this void*
void* exit_value) /*!< in: exit value; in Windows this void*
is cast as a DWORD */
__attribute__((cold, noreturn));
/*****************************************************************//**
Returns the thread identifier of current thread.
@return current thread identifier */
......
......@@ -50,9 +50,10 @@ UNIV_INTERN
void
ut_dbg_assertion_failed(
/*====================*/
const char* expr, /*!< in: the failed assertion */
const char* file, /*!< in: source file containing the assertion */
ulint line); /*!< in: line number of the assertion */
const char* expr, /*!< in: the failed assertion */
const char* file, /*!< in: source file containing the assertion */
ulint line) /*!< in: line number of the assertion */
__attribute__((nonnull(2), cold));
#if defined(__WIN__) || defined(__INTEL_COMPILER)
# undef UT_DBG_USE_ABORT
......
......@@ -275,7 +275,8 @@ UNIV_INTERN
void
ut_print_timestamp(
/*===============*/
FILE* file); /*!< in: file where to print */
FILE* file) /*!< in: file where to print */
__attribute__((nonnull, cold));
/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
UNIV_INTERN
......
......@@ -3078,6 +3078,7 @@ logs_empty_and_mark_files_at_shutdown(void)
{
ib_uint64_t lsn;
ulint arch_log_no;
ibool server_busy;
if (srv_print_verbose_log) {
ut_print_timestamp(stderr);
......@@ -3092,14 +3093,12 @@ loop:
mutex_enter(&kernel_mutex);
/* We need the monitor threads to stop before we proceed with a
normal shutdown. In case of very fast shutdown, however, we can
proceed without waiting for monitor threads. */
/* We need the monitor threads to stop before we proceed with
a shutdown. */
if (srv_fast_shutdown < 2
&& (srv_error_monitor_active
|| srv_lock_timeout_active
|| srv_monitor_active)) {
if (srv_error_monitor_active
|| srv_lock_timeout_active
|| srv_monitor_active) {
mutex_exit(&kernel_mutex);
......@@ -3114,65 +3113,57 @@ loop:
for the 'very fast' shutdown, because the InnoDB layer may have
committed or prepared transactions and we don't want to lose them. */
if (trx_n_mysql_transactions > 0
|| UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
mutex_exit(&kernel_mutex);
goto loop;
}
if (srv_fast_shutdown == 2) {
/* In this fastest shutdown we do not flush the buffer pool:
it is essentially a 'crash' of the InnoDB server. Make sure
that the log is all flushed to disk, so that we can recover
all committed transactions in a crash recovery. We must not
write the lsn stamps to the data files, since at a startup
InnoDB deduces from the stamps if the previous shutdown was
clean. */
log_buffer_flush_to_disk();
mutex_exit(&kernel_mutex);
return; /* We SKIP ALL THE REST !! */
}
server_busy = trx_n_mysql_transactions > 0
|| UT_LIST_GET_LEN(trx_sys->trx_list) > 0;
mutex_exit(&kernel_mutex);
/* Check that the background threads are suspended */
if (srv_is_any_background_thread_active()) {
if (server_busy || srv_is_any_background_thread_active()) {
goto loop;
}
mutex_enter(&(log_sys->mutex));
if (log_sys->n_pending_checkpoint_writes
mutex_enter(&log_sys->mutex);
server_busy = log_sys->n_pending_checkpoint_writes
#ifdef UNIV_LOG_ARCHIVE
|| log_sys->n_pending_archive_ios
|| log_sys->n_pending_archive_ios
#endif /* UNIV_LOG_ARCHIVE */
|| log_sys->n_pending_writes) {
mutex_exit(&(log_sys->mutex));
goto loop;
}
mutex_exit(&(log_sys->mutex));
if (!buf_pool_check_no_pending_io()) {
|| log_sys->n_pending_writes;
mutex_exit(&log_sys->mutex);
if (server_busy || !buf_pool_check_no_pending_io()) {
goto loop;
}
#ifdef UNIV_LOG_ARCHIVE
log_archive_all();
#endif /* UNIV_LOG_ARCHIVE */
if (srv_fast_shutdown == 2) {
/* In this fastest shutdown we do not flush the buffer
pool: it is essentially a 'crash' of the InnoDB
server. Make sure that the log is all flushed to disk,
so that we can recover all committed transactions in a
crash recovery. We must not write the lsn stamps to
the data files, since at a startup InnoDB deduces from
the stamps if the previous shutdown was clean. */
log_buffer_flush_to_disk();
/* Check that the background threads stay suspended */
if (srv_is_any_background_thread_active()) {
fprintf(stderr,
"InnoDB: Warning: some background thread"
" woke up during shutdown\n");
goto loop;
}
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
fil_close_all_files();
ut_a(!srv_is_any_background_thread_active());
return;
}
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
mutex_enter(&(log_sys->mutex));
mutex_enter(&log_sys->mutex);
lsn = log_sys->lsn;
......@@ -3183,7 +3174,7 @@ loop:
#endif /* UNIV_LOG_ARCHIVE */
) {
mutex_exit(&(log_sys->mutex));
mutex_exit(&log_sys->mutex);
goto loop;
}
......@@ -3201,7 +3192,7 @@ loop:
log_archive_close_groups(TRUE);
#endif /* UNIV_LOG_ARCHIVE */
mutex_exit(&(log_sys->mutex));
mutex_exit(&log_sys->mutex);
/* Check that the background threads stay suspended */
if (srv_is_any_background_thread_active()) {
......
......@@ -4064,13 +4064,13 @@ os_aio_func(
}
try_again:
if (mode == OS_AIO_NORMAL) {
if (type == OS_FILE_READ) {
array = os_aio_read_array;
} else {
array = os_aio_write_array;
}
} else if (mode == OS_AIO_IBUF) {
switch (mode) {
case OS_AIO_NORMAL:
array = (type == OS_FILE_READ)
? os_aio_read_array
: os_aio_write_array;
break;
case OS_AIO_IBUF:
ut_ad(type == OS_FILE_READ);
/* Reduce probability of deadlock bugs in connection with ibuf:
do not let the ibuf i/o handler sleep */
......@@ -4078,19 +4078,21 @@ try_again:
wake_later = FALSE;
array = os_aio_ibuf_array;
} else if (mode == OS_AIO_LOG) {
break;
case OS_AIO_LOG:
array = os_aio_log_array;
} else if (mode == OS_AIO_SYNC) {
break;
case OS_AIO_SYNC:
array = os_aio_sync_array;
#if defined(LINUX_NATIVE_AIO)
/* In Linux native AIO we don't use sync IO array. */
ut_a(!srv_use_native_aio);
#endif /* LINUX_NATIVE_AIO */
} else {
array = NULL; /* Eliminate compiler warning */
break;
default:
ut_error;
array = NULL; /* Eliminate compiler warning */
}
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
......@@ -4253,11 +4255,17 @@ os_aio_windows_handle(
INFINITE);
}
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
os_mutex_enter(array->mutex);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS
&& array->n_reserved == 0) {
*message1 = NULL;
*message2 = NULL;
os_mutex_exit(array->mutex);
return(TRUE);
}
os_mutex_enter(array->mutex);
ut_a(i >= WAIT_OBJECT_0 && i <= WAIT_OBJECT_0 + n);
slot = os_aio_array_get_nth_slot(array, i + segment * n);
......@@ -4403,14 +4411,6 @@ os_aio_linux_collect(
retry:
/* Go down if we are in shutdown mode.
In case of srv_fast_shutdown == 2, there may be pending
IO requests but that should be OK as we essentially treat
that as a crash of InnoDB. */
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
/* Initialize the events. The timeout value is arbitrary.
We probably need to experiment with it a little. */
memset(events, 0, sizeof(*events) * seg_size);
......@@ -4419,76 +4419,72 @@ retry:
ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
/* This error handling is for any error in collecting the
IO requests. The errors, if any, for any particular IO
request are simply passed on to the calling routine. */
/* Not enough resources! Try again. */
if (ret == -EAGAIN) {
goto retry;
}
/* Interrupted! I have tested the behaviour in case of an
interrupt. If we have some completed IOs available then
the return code will be the number of IOs. We get EINTR only
if there are no completed IOs and we have been interrupted. */
if (ret == -EINTR) {
goto retry;
}
/* No pending request! Go back and check again. */
if (ret == 0) {
goto retry;
}
if (ret > 0) {
for (i = 0; i < ret; i++) {
os_aio_slot_t* slot;
struct iocb* control;
/* All other errors! should cause a trap for now. */
if (UNIV_UNLIKELY(ret < 0)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: unexpected ret_code[%d] from"
" io_getevents()!\n", ret);
ut_error;
}
control = (struct iocb *)events[i].obj;
ut_a(control != NULL);
ut_a(ret > 0);
slot = (os_aio_slot_t *) control->data;
for (i = 0; i < ret; i++) {
os_aio_slot_t* slot;
struct iocb* control;
/* Some sanity checks. */
ut_a(slot != NULL);
ut_a(slot->reserved);
control = (struct iocb *)events[i].obj;
ut_a(control != NULL);
#if defined(UNIV_AIO_DEBUG)
fprintf(stderr,
"io_getevents[%c]: slot[%p] ctx[%p]"
" seg[%lu]\n",
(slot->type == OS_FILE_WRITE) ? 'w' : 'r',
slot, io_ctx, segment);
#endif
slot = (os_aio_slot_t *) control->data;
/* We are not scribbling previous segment. */
ut_a(slot->pos >= start_pos);
/* Some sanity checks. */
ut_a(slot != NULL);
ut_a(slot->reserved);
/* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos);
#if defined(UNIV_AIO_DEBUG)
fprintf(stderr,
"io_getevents[%c]: slot[%p] ctx[%p]"
" seg[%lu]\n",
(slot->type == OS_FILE_WRITE) ? 'w' : 'r',
slot, io_ctx, segment);
#endif
/* Mark this request as completed. The error handling
will be done in the calling function. */
os_mutex_enter(array->mutex);
slot->n_bytes = events[i].res;
slot->ret = events[i].res2;
slot->io_already_done = TRUE;
os_mutex_exit(array->mutex);
}
return;
}
/* We are not scribbling previous segment. */
ut_a(slot->pos >= start_pos);
if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
return;
}
/* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos);
/* This error handling is for any error in collecting the
IO requests. The errors, if any, for any particular IO
request are simply passed on to the calling routine. */
/* Mark this request as completed. The error handling
will be done in the calling function. */
os_mutex_enter(array->mutex);
slot->n_bytes = events[i].res;
slot->ret = events[i].res2;
slot->io_already_done = TRUE;
os_mutex_exit(array->mutex);
switch (ret) {
case -EAGAIN:
/* Not enough resources! Try again. */
case -EINTR:
/* Interrupted! I have tested the behaviour in case of an
interrupt. If we have some completed IOs available then
the return code will be the number of IOs. We get EINTR only
if there are no completed IOs and we have been interrupted. */
case 0:
/* No pending request! Go back and check again. */
goto retry;
}
return;
/* All other errors should cause a trap for now. */
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: unexpected ret_code[%d] from io_getevents()!\n",
ret);
ut_error;
}
/**********************************************************************//**
......@@ -4532,20 +4528,35 @@ os_aio_linux_handle(
/* Loop until we have found a completed request. */
for (;;) {
ibool any_reserved = FALSE;
os_mutex_enter(array->mutex);
for (i = 0; i < n; ++i) {
slot = os_aio_array_get_nth_slot(
array, i + segment * n);
if (slot->reserved && slot->io_already_done) {
array, i + segment * n);
if (!slot->reserved) {
continue;
} else if (slot->io_already_done) {
/* Something for us to work on. */
goto found;
} else {
any_reserved = TRUE;
}
}
os_mutex_exit(array->mutex);
/* We don't have any completed request.
Wait for some request. Note that we return
/* There is no completed request.
If there is no pending request at all,
and the system is being shut down, exit. */
if (UNIV_UNLIKELY
(!any_reserved
&& srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
*message1 = NULL;
*message2 = NULL;
return(TRUE);
}
/* Wait for some request. Note that we return
from wait iff we have found a request. */
srv_set_io_thread_op_info(global_seg,
......@@ -4641,6 +4652,7 @@ os_aio_simulated_handle(
byte* combined_buf;
byte* combined_buf2;
ibool ret;
ibool any_reserved;
ulint n;
ulint i;
......@@ -4671,18 +4683,21 @@ restart:
goto recommended_sleep;
}
os_mutex_enter(array->mutex);
srv_set_io_thread_op_info(global_segment,
"looking for i/o requests (b)");
/* Check if there is a slot for which the i/o has already been
done */
any_reserved = FALSE;
os_mutex_enter(array->mutex);
for (i = 0; i < n; i++) {
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot->reserved && slot->io_already_done) {
if (!slot->reserved) {
continue;
} else if (slot->io_already_done) {
if (os_aio_print_debug) {
fprintf(stderr,
......@@ -4694,9 +4709,23 @@ restart:
ret = TRUE;
goto slot_io_done;
} else {
any_reserved = TRUE;
}
}
/* There is no completed request.
If there is no pending request at all,
and the system is being shut down, exit. */
if (UNIV_UNLIKELY
(!any_reserved
&& srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
os_mutex_exit(array->mutex);
*message1 = NULL;
*message2 = NULL;
return(TRUE);
}
n_consecutive = 0;
/* If there are at least 2 seconds old requests, then pick the oldest
......
......@@ -558,10 +558,7 @@ os_event_free(
}
/**********************************************************//**
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
event is already in the signaled state).
Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->is_set == TRUE.
......@@ -586,8 +583,6 @@ os_event_wait_low(
returned by previous call of
os_event_reset(). */
{
ib_int64_t old_signal_count;
#ifdef __WIN__
if(!srv_use_native_conditions) {
DWORD err;
......@@ -600,43 +595,25 @@ os_event_wait_low(
err = WaitForSingleObject(event->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return;
}
#endif
os_fast_mutex_lock(&(event->os_mutex));
os_fast_mutex_lock(&event->os_mutex);
if (reset_sig_count) {
old_signal_count = reset_sig_count;
} else {
old_signal_count = event->signal_count;
if (!reset_sig_count) {
reset_sig_count = event->signal_count;
}
for (;;) {
if (event->is_set == TRUE
|| event->signal_count != old_signal_count) {
os_fast_mutex_unlock(&(event->os_mutex));
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
/* Ok, we may return */
return;
}
while (!event->is_set && event->signal_count == reset_sig_count) {
os_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we
have to check if the event really has been signaled after
we came here to wait */
}
os_fast_mutex_unlock(&event->os_mutex);
}
/**********************************************************//**
......@@ -657,7 +634,6 @@ os_event_wait_time_low(
{
ibool timed_out = FALSE;
ib_int64_t old_signal_count;
#ifdef __WIN__
DWORD time_in_ms;
......@@ -727,15 +703,12 @@ os_event_wait_time_low(
os_fast_mutex_lock(&event->os_mutex);
if (reset_sig_count) {
old_signal_count = reset_sig_count;
} else {
old_signal_count = event->signal_count;
if (!reset_sig_count) {
reset_sig_count = event->signal_count;
}
do {
if (event->is_set == TRUE
|| event->signal_count != old_signal_count) {
if (event->is_set || event->signal_count != reset_sig_count) {
break;
}
......@@ -753,11 +726,6 @@ os_event_wait_time_low(
os_fast_mutex_unlock(&event->os_mutex);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
}
......
......@@ -3082,11 +3082,7 @@ suspend_thread:
os_event_wait(slot->event);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
/* This is only extra safety, the thread should exit
already when the event wait ends */
os_thread_exit(NULL);
}
/* When there is user activity, InnoDB will set the event and the
......
......@@ -2122,17 +2122,9 @@ innobase_shutdown_for_mysql(void)
srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
/* In a 'very fast' shutdown, we do not need to wait for these threads
to die; all which counts is that we flushed the log; a 'very fast'
shutdown is essentially a crash. */
if (srv_fast_shutdown == 2) {
return(DB_SUCCESS);
}
/* All threads end up waiting for certain events. Put those events
to the signaled state. Then the threads will exit themselves in
os_thread_event_wait(). */
to the signaled state. Then the threads will exit themselves after
os_event_wait(). */
for (i = 0; i < 1000; i++) {
/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment