Commit 657fcdf4 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24280 InnoDB triggers too many independent periodic tasks

A side effect of MDEV-16264 is that a large number of threads will
be created at server startup, to be destroyed after a minute or two.

One source of such thread creation is srv_start_periodic_timer().
InnoDB is creating 3 periodic tasks: srv_master_callback (1Hz)
srv_error_monitor_task (1Hz), and srv_monitor_task (0.2Hz).

It appears that we can merge srv_error_monitor_task and srv_monitor_task
and have them invoked 4 times per minute (every 15 seconds). This will
affect our ability to enforce innodb_fatal_semaphore_wait_threshold and
some computations around BUF_LRU_STAT_N_INTERVAL.

We could remove srv_master_callback along with the DROP TABLE queue
at some point of time in the future. We must keep it independent
of the innodb_fatal_semaphore_wait_threshold detection, because
the background DROP TABLE queue could get stuck due to dict_sys
being locked by another thread. For now, srv_master_callback
must be invoked once per second, so that
innodb_flush_log_at_timeout=1 can work.

BUF_LRU_STAT_N_INTERVAL: Reduce the precision and extend the time
from 50*1 second to 4*15 seconds.

srv_error_monitor_timer: Remove.

MAX_MUTEX_NOWAIT: Increase from 20*1 second to 2*15 seconds.

srv_refresh_innodb_monitor_stats(): Avoid a repeated call to time(NULL).
Change the interval to less than 60 seconds.

srv_monitor(): Renamed from srv_monitor_task.

srv_monitor_task(): Renamed from srv_error_monitor_task().
Invoked only once in 15 seconds. Invoke also srv_monitor().
Increase the fatal_cnt threshold from 10*1 second to 1*15 seconds.

sync_array_print_long_waits_low(): Invoke time(NULL) only once.
Remove a bogus message about printouts for 30 seconds. Those
printouts were effectively already disabled in MDEV-16264
(commit 5e62b6a5).
parent 7b1252c0
......@@ -76,13 +76,12 @@ uncompressed and compressed data), which must be clean. */
/* @{ */
/** Number of intervals for which we keep the history of these stats.
Each interval is 1 second, defined by the rate at which
srv_error_monitor_thread() calls buf_LRU_stat_update(). */
static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
Updated at SRV_MONITOR_INTERVAL (the buf_LRU_stat_update() call rate). */
static constexpr ulint BUF_LRU_STAT_N_INTERVAL= 4;
/** Co-efficient with which we multiply I/O operations to equate them
with page_zip_decompress() operations. */
static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
static constexpr ulint BUF_LRU_IO_TO_UNZIP_FACTOR= 50;
/** Sampled values buf_LRU_stat_cur.
Not protected by any mutex. Updated by buf_LRU_stat_update(). */
......
......@@ -702,13 +702,6 @@ Complete the shutdown tasks such as background DROP TABLE,
and optionally change buffer merge (on innodb_fast_shutdown=0). */
void srv_shutdown(bool ibuf_merge);
/*************************************************************************
A task which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
*/
void srv_error_monitor_task(void*);
} /* extern "C" */
#ifdef UNIV_DEBUG
......@@ -900,12 +893,14 @@ struct srv_slot_t{
extern tpool::thread_pool *srv_thread_pool;
extern std::unique_ptr<tpool::timer> srv_master_timer;
extern std::unique_ptr<tpool::timer> srv_error_monitor_timer;
extern std::unique_ptr<tpool::timer> srv_monitor_timer;
/** The interval at which srv_monitor_task is invoked, in milliseconds */
constexpr unsigned SRV_MONITOR_INTERVAL= 15000; /* 4 times per minute */
static inline void srv_monitor_timer_schedule_now()
{
srv_monitor_timer->set_time(0, 5000);
srv_monitor_timer->set_time(0, SRV_MONITOR_INTERVAL);
}
static inline void srv_start_periodic_timer(std::unique_ptr<tpool::timer>& t,
void (*func)(void*), int period)
......
......@@ -1011,7 +1011,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
!srv_read_only_mode && srv_fast_shutdown < 2) {
buf_dump_start();
}
srv_error_monitor_timer.reset();
srv_monitor_timer.reset();
lock_sys.timeout_timer.reset();
if (do_srv_shutdown) {
......
......@@ -186,7 +186,7 @@ tpool::thread_pool* srv_thread_pool;
/** Maximum number of times allowed to conditionally acquire
mutex before switching to blocking wait on the mutex */
#define MAX_MUTEX_NOWAIT 20
#define MAX_MUTEX_NOWAIT 2
/** Check whether the number of failed nonblocking mutex
acquisition attempts exceeds maximum allowed value. If so,
......@@ -555,8 +555,7 @@ struct purge_coordinator_state
static purge_coordinator_state purge_state;
/** threadpool timer for srv_error_monitor_task(). */
std::unique_ptr<tpool::timer> srv_error_monitor_timer;
/** threadpool timer for srv_monitor_task() */
std::unique_ptr<tpool::timer> srv_monitor_timer;
......@@ -769,16 +768,11 @@ srv_boot(void)
/******************************************************************//**
Refreshes the values used to calculate per-second averages. */
static
void
srv_refresh_innodb_monitor_stats(void)
/*==================================*/
static void srv_refresh_innodb_monitor_stats(time_t current_time)
{
mutex_enter(&srv_innodb_monitor_mutex);
time_t current_time = time(NULL);
if (difftime(current_time, srv_last_monitor_time) <= 60) {
if (difftime(current_time, srv_last_monitor_time) < 60) {
/* We referesh InnoDB Monitor values so that averages are
printed from at most 60 last seconds */
mutex_exit(&srv_innodb_monitor_mutex);
......@@ -1309,26 +1303,18 @@ struct srv_monitor_state_t
static srv_monitor_state_t monitor_state;
/** A task which prints the info output by various InnoDB monitors.*/
void srv_monitor_task(void*)
static void srv_monitor()
{
double time_elapsed;
time_t current_time;
ut_ad(!srv_read_only_mode);
current_time = time(NULL);
time_elapsed = difftime(current_time, monitor_state.last_monitor_time);
time_t current_time = time(NULL);
if (time_elapsed > 15) {
if (difftime(current_time, monitor_state.last_monitor_time) >= 15) {
monitor_state.last_monitor_time = current_time;
if (srv_print_innodb_monitor) {
/* Reset mutex_skipped counter everytime
srv_print_innodb_monitor changes. This is to
ensure we will not be blocked by lock_sys.mutex
for short duration information printing,
such as requested by sync_array_print_long_waits() */
for short duration information printing */
if (!monitor_state.last_srv_print_monitor) {
monitor_state.mutex_skipped = 0;
monitor_state.last_srv_print_monitor = true;
......@@ -1366,14 +1352,14 @@ void srv_monitor_task(void*)
}
}
srv_refresh_innodb_monitor_stats();
srv_refresh_innodb_monitor_stats(current_time);
}
/*********************************************************************//**
A task which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
*/
void srv_error_monitor_task(void*)
void srv_monitor_task(void*)
{
/* number of successive fatal timeouts observed */
static ulint fatal_cnt;
......@@ -1408,20 +1394,17 @@ void srv_error_monitor_task(void*)
if (sync_array_print_long_waits(&waiter, &sema)
&& sema == old_sema && os_thread_eq(waiter, old_waiter)) {
#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
if (os_event_is_set(srv_allow_writes_event)) {
#endif /* WITH_WSREP */
fatal_cnt++;
#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
} else {
fprintf(stderr,
"WSREP: avoiding InnoDB self crash due to long "
"semaphore wait of > %lu seconds\n"
"Server is processing SST donor operation, "
"fatal_cnt now: " ULINTPF,
srv_fatal_semaphore_wait_threshold, fatal_cnt);
}
if (!os_event_is_set(srv_allow_writes_event)) {
fprintf(stderr,
"WSREP: avoiding InnoDB self crash due to "
"long semaphore wait of > %lu seconds\n"
"Server is processing SST donor operation, "
"fatal_cnt now: " ULINTPF,
srv_fatal_semaphore_wait_threshold, fatal_cnt);
return;
}
#endif /* WITH_WSREP */
if (fatal_cnt > 10) {
if (fatal_cnt++) {
ib::fatal() << "Semaphore wait has lasted > "
<< srv_fatal_semaphore_wait_threshold
<< " seconds. We intentionally crash the"
......@@ -1432,6 +1415,8 @@ void srv_error_monitor_task(void*)
old_waiter = waiter;
old_sema = sema;
}
srv_monitor();
}
/******************************************************************//**
......
......@@ -1826,8 +1826,8 @@ dberr_t srv_start(bool create_new_db)
DBUG_EXECUTE_IF("innodb_skip_monitors", goto skip_monitors;);
/* Create the task which warns of long semaphore waits */
srv_start_periodic_timer(srv_error_monitor_timer, srv_error_monitor_task, 1000);
srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task, 5000);
srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task,
SRV_MONITOR_INTERVAL);
#ifndef DBUG_OFF
skip_monitors:
......
......@@ -893,6 +893,7 @@ sync_array_print_long_waits_low(
#else
# define SYNC_ARRAY_TIMEOUT 240
#endif
const time_t now = time(NULL);
for (ulint i = 0; i < arr->n_cells; i++) {
......@@ -908,7 +909,7 @@ sync_array_print_long_waits_low(
continue;
}
double diff = difftime(time(NULL), cell->reservation_time);
double diff = difftime(now, cell->reservation_time);
if (diff > SYNC_ARRAY_TIMEOUT) {
ib::warn() << "A long semaphore wait:";
......@@ -982,12 +983,6 @@ sync_array_print_long_waits(
}
if (noticed) {
fprintf(stderr,
"InnoDB: ###### Starts InnoDB Monitor"
" for 30 secs to print diagnostic info:\n");
my_bool old_val = srv_print_innodb_monitor;
/* If some crucial semaphore is reserved, then also the InnoDB
Monitor can hang, and we do not get diagnostics. Since in
many cases an InnoDB hang is caused by a pwrite() or a pread()
......@@ -1000,14 +995,7 @@ sync_array_print_long_waits(
MONITOR_VALUE(MONITOR_OS_PENDING_READS),
MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
srv_print_innodb_monitor = TRUE;
lock_wait_timeout_task(nullptr);
srv_print_innodb_monitor = static_cast<my_bool>(old_val);
fprintf(stderr,
"InnoDB: ###### Diagnostic info printed"
" to the standard error stream\n");
}
return(fatal);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment