Commit 9a961662 authored by Marko Mäkelä's avatar Marko Mäkelä

Bug#11877216 InnoDB too eager to commit suicide on a busy server

sync_array_print_long_waits(): Return the longest waiting thread ID
and the longest waited-for lock. Only if those remain unchanged
between calls in srv_error_monitor_thread(), increment
fatal_cnt. Otherwise, reset fatal_cnt.

Background: There is a built-in watchdog in InnoDB whose purpose is to
kill the server when some thread is stuck waiting for a mutex or
rw-lock. Before this fix, the logic was flawed.

The function sync_array_print_long_waits() returns TRUE if it finds a
lock wait that exceeds 10 minutes (srv_fatal_semaphore_wait_threshold).
The function srv_error_monitor_thread() will kill the server if this
happens 10 times in a row (fatal_cnt reaches 10), checked every 30
seconds. This is wrong, because this situation does not mean that the
server is hung. If the server is very busy for a little over 15
minutes, it will be killed.

Consider this example. Thread T1 is waiting for mutex M. Some time
later, threads T2..Tn start waiting for the same mutex M. If T1 keeps
waiting for 600 seconds, fatal_cnt will be incremented to 1. So far,
so good. Now, if M is granted to T1, the server was obviously not
stuck. But, T2..Tn keeps waiting, and their wait time will be longer
than 600 seconds. If 5 minutes later, some Tn has still been waiting
for more than 10 minutes for the mutex M, the server can be killed,
even though it is not stuck.

rb:622 approved by Jimmy Yang
parent 58683967
...@@ -93,10 +93,13 @@ sync_arr_wake_threads_if_sema_free(void); ...@@ -93,10 +93,13 @@ sync_arr_wake_threads_if_sema_free(void);
Prints warnings of long semaphore waits to stderr. */ Prints warnings of long semaphore waits to stderr. */
ibool ibool
sync_array_print_long_waits(void); sync_array_print_long_waits(
/*=============================*/ /*========================*/
/* out: TRUE if fatal semaphore wait threshold /* out: TRUE if fatal semaphore wait threshold
was exceeded */ was exceeded */
os_thread_id_t* waiter, /* out: longest waiting thread */
const void** sema) /* out: longest-waited-for semaphore */
__attribute__((nonnull));
/************************************************************************ /************************************************************************
Validates the integrity of the wait array. Checks Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */ that the number of reserved cells equals the count variable. */
......
...@@ -2180,9 +2180,15 @@ srv_error_monitor_thread( ...@@ -2180,9 +2180,15 @@ srv_error_monitor_thread(
os_thread_create */ os_thread_create */
{ {
/* number of successive fatal timeouts observed */ /* number of successive fatal timeouts observed */
ulint fatal_cnt = 0; ulint fatal_cnt = 0;
dulint old_lsn; dulint old_lsn;
dulint new_lsn; dulint new_lsn;
/* longest waiting thread for a semaphore */
os_thread_id_t waiter = os_thread_get_curr_id();
os_thread_id_t old_waiter = waiter;
/* the semaphore that is being waited for */
const void* sema = NULL;
const void* old_sema = NULL;
old_lsn = srv_start_lsn; old_lsn = srv_start_lsn;
...@@ -2224,10 +2230,11 @@ srv_error_monitor_thread( ...@@ -2224,10 +2230,11 @@ srv_error_monitor_thread(
/* In case mutex_exit is not a memory barrier, it is /* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */ the semaphore is already released. Wake up those threads: */
sync_arr_wake_threads_if_sema_free(); sync_arr_wake_threads_if_sema_free();
if (sync_array_print_long_waits()) { if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
fatal_cnt++; fatal_cnt++;
if (fatal_cnt > 10) { if (fatal_cnt > 10) {
...@@ -2242,6 +2249,8 @@ srv_error_monitor_thread( ...@@ -2242,6 +2249,8 @@ srv_error_monitor_thread(
} }
} else { } else {
fatal_cnt = 0; fatal_cnt = 0;
old_waiter = waiter;
old_sema = sema;
} }
/* Flush stderr so that a database user gets the output /* Flush stderr so that a database user gets the output
......
...@@ -916,10 +916,12 @@ sync_arr_wake_threads_if_sema_free(void) ...@@ -916,10 +916,12 @@ sync_arr_wake_threads_if_sema_free(void)
Prints warnings of long semaphore waits to stderr. */ Prints warnings of long semaphore waits to stderr. */
ibool ibool
sync_array_print_long_waits(void) sync_array_print_long_waits(
/*=============================*/ /*========================*/
/* out: TRUE if fatal semaphore wait threshold /* out: TRUE if fatal semaphore wait threshold
was exceeded */ was exceeded */
os_thread_id_t* waiter, /* out: longest waiting thread */
const void** sema) /* out: longest-waited-for semaphore */
{ {
sync_cell_t* cell; sync_cell_t* cell;
ibool old_val; ibool old_val;
...@@ -927,24 +929,40 @@ sync_array_print_long_waits(void) ...@@ -927,24 +929,40 @@ sync_array_print_long_waits(void)
ulint i; ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE; ibool fatal = FALSE;
double longest_diff = 0;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) { for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
double diff;
void* wait_object;
cell = sync_array_get_nth_cell(sync_primary_wait_array, i); cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
if (cell->wait_object != NULL && cell->waiting wait_object = cell->wait_object;
&& difftime(time(NULL), cell->reservation_time) > 240) {
if (wait_object == NULL || !cell->waiting) {
continue;
}
diff = difftime(time(NULL), cell->reservation_time);
if (diff > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n", fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr); stderr);
sync_array_cell_print(stderr, cell); sync_array_cell_print(stderr, cell);
noticed = TRUE; noticed = TRUE;
} }
if (cell->wait_object != NULL && cell->waiting if (diff > fatal_timeout) {
&& difftime(time(NULL), cell->reservation_time)
> fatal_timeout) {
fatal = TRUE; fatal = TRUE;
} }
if (diff > longest_diff) {
longest_diff = diff;
*sema = wait_object;
*waiter = cell->thread;
}
} }
if (noticed) { if (noticed) {
......
2011-03-30 The InnoDB Team
* srv/srv0srv.c, sync/sync0arr.h, sync/sync0arr.c:
Fix Bug#11877216 InnoDB too eager to commit suicide on a busy server
2011-03-15 The InnoDB Team 2011-03-15 The InnoDB Team
* btr/btr0cur.c, page/page0zip.c: * btr/btr0cur.c, page/page0zip.c:
......
...@@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr. ...@@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */ @return TRUE if fatal semaphore wait threshold was exceeded */
UNIV_INTERN UNIV_INTERN
ibool ibool
sync_array_print_long_waits(void); sync_array_print_long_waits(
/*=============================*/ /*========================*/
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema) /*!< out: longest-waited-for semaphore */
__attribute__((nonnull));
/********************************************************************//** /********************************************************************//**
Validates the integrity of the wait array. Checks Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */ that the number of reserved cells equals the count variable. */
......
...@@ -2236,6 +2236,12 @@ srv_error_monitor_thread( ...@@ -2236,6 +2236,12 @@ srv_error_monitor_thread(
ulint fatal_cnt = 0; ulint fatal_cnt = 0;
ib_uint64_t old_lsn; ib_uint64_t old_lsn;
ib_uint64_t new_lsn; ib_uint64_t new_lsn;
/* longest waiting thread for a semaphore */
os_thread_id_t waiter = os_thread_get_curr_id();
os_thread_id_t old_waiter = waiter;
/* the semaphore that is being waited for */
const void* sema = NULL;
const void* old_sema = NULL;
old_lsn = srv_start_lsn; old_lsn = srv_start_lsn;
...@@ -2284,7 +2290,8 @@ srv_error_monitor_thread( ...@@ -2284,7 +2290,8 @@ srv_error_monitor_thread(
sync_arr_wake_threads_if_sema_free(); sync_arr_wake_threads_if_sema_free();
if (sync_array_print_long_waits()) { if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
fatal_cnt++; fatal_cnt++;
if (fatal_cnt > 10) { if (fatal_cnt > 10) {
...@@ -2299,6 +2306,8 @@ srv_error_monitor_thread( ...@@ -2299,6 +2306,8 @@ srv_error_monitor_thread(
} }
} else { } else {
fatal_cnt = 0; fatal_cnt = 0;
old_waiter = waiter;
old_sema = sema;
} }
/* Flush stderr so that a database user gets the output /* Flush stderr so that a database user gets the output
......
...@@ -914,8 +914,10 @@ Prints warnings of long semaphore waits to stderr. ...@@ -914,8 +914,10 @@ Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */ @return TRUE if fatal semaphore wait threshold was exceeded */
UNIV_INTERN UNIV_INTERN
ibool ibool
sync_array_print_long_waits(void) sync_array_print_long_waits(
/*=============================*/ /*========================*/
os_thread_id_t* waiter, /*!< out: longest waiting thread */
const void** sema) /*!< out: longest-waited-for semaphore */
{ {
sync_cell_t* cell; sync_cell_t* cell;
ibool old_val; ibool old_val;
...@@ -923,24 +925,40 @@ sync_array_print_long_waits(void) ...@@ -923,24 +925,40 @@ sync_array_print_long_waits(void)
ulint i; ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE; ibool fatal = FALSE;
double longest_diff = 0;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) { for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
double diff;
void* wait_object;
cell = sync_array_get_nth_cell(sync_primary_wait_array, i); cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
if (cell->wait_object != NULL && cell->waiting wait_object = cell->wait_object;
&& difftime(time(NULL), cell->reservation_time) > 240) {
if (wait_object == NULL || !cell->waiting) {
continue;
}
diff = difftime(time(NULL), cell->reservation_time);
if (diff > 240) {
fputs("InnoDB: Warning: a long semaphore wait:\n", fputs("InnoDB: Warning: a long semaphore wait:\n",
stderr); stderr);
sync_array_cell_print(stderr, cell); sync_array_cell_print(stderr, cell);
noticed = TRUE; noticed = TRUE;
} }
if (cell->wait_object != NULL && cell->waiting if (diff > fatal_timeout) {
&& difftime(time(NULL), cell->reservation_time)
> fatal_timeout) {
fatal = TRUE; fatal = TRUE;
} }
if (diff > longest_diff) {
longest_diff = diff;
*sema = wait_object;
*waiter = cell->thread;
}
} }
if (noticed) { if (noticed) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment