Commit ebf1ac2b authored by Sunny Bains's avatar Sunny Bains

Bug# 55681 - MTR slowdown after default storage engine was changed to InnoDB

Add new function os_cond_wait_timed(). Change the os_thread_sleep() calls
to timed conditional waits. Signal the background threads during the shutdown
phase so that we avoid waiting for the sleep to timeout thus saving some time.

rb://439 -- Approved by Jimmy Yang
parent 6628aa47
...@@ -76,6 +76,12 @@ struct os_event_struct { ...@@ -76,6 +76,12 @@ struct os_event_struct {
/*!< list of all created events */ /*!< list of all created events */
}; };
/** Denotes an infinite delay for os_event_wait_time() */
#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED
/** Return value of os_event_wait_time() when the time is exceeded */
#define OS_SYNC_TIME_EXCEEDED 1
/** Operating system mutex */ /** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t; typedef struct os_mutex_struct os_mutex_str_t;
/** Operating system mutex handle */ /** Operating system mutex handle */
...@@ -173,7 +179,23 @@ os_event_wait_low( ...@@ -173,7 +179,23 @@ os_event_wait_low(
os_event_reset(). */ os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0) #define os_event_wait(event) os_event_wait_low(event, 0)
#define os_event_wait_time(e, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time_low(
/*===================*/
os_event_t event, /*!< in: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
ib_int64_t reset_sig_count); /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
......
...@@ -57,6 +57,15 @@ extern const char srv_mysql50_table_name_prefix[9]; ...@@ -57,6 +57,15 @@ extern const char srv_mysql50_table_name_prefix[9];
thread starts running */ thread starts running */
extern os_event_t srv_lock_timeout_thread_event; extern os_event_t srv_lock_timeout_thread_event;
/* The monitor thread waits on this event. */
extern os_event_t srv_monitor_event;
/* The lock timeout thread waits on this event. */
extern os_event_t srv_timeout_event;
/* The error monitor thread waits on this event. */
extern os_event_t srv_error_event;
/* If the last data file is auto-extended, we add this many pages to it /* If the last data file is auto-extended, we add this many pages to it
at a time */ at a time */
#define SRV_AUTO_EXTEND_INCREMENT \ #define SRV_AUTO_EXTEND_INCREMENT \
......
...@@ -3098,10 +3098,15 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3098,10 +3098,15 @@ logs_empty_and_mark_files_at_shutdown(void)
if (srv_fast_shutdown < 2 if (srv_fast_shutdown < 2
&& (srv_error_monitor_active && (srv_error_monitor_active
|| srv_lock_timeout_active || srv_monitor_active)) { || srv_lock_timeout_active
|| srv_monitor_active)) {
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);
os_event_set(srv_error_event);
os_event_set(srv_monitor_event);
os_event_set(srv_timeout_event);
goto loop; goto loop;
} }
...@@ -3128,6 +3133,8 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3128,6 +3133,8 @@ logs_empty_and_mark_files_at_shutdown(void)
log_buffer_flush_to_disk(); log_buffer_flush_to_disk();
mutex_exit(&kernel_mutex);
return; /* We SKIP ALL THE REST !! */ return; /* We SKIP ALL THE REST !! */
} }
......
...@@ -72,6 +72,9 @@ UNIV_INTERN ulint os_event_count = 0; ...@@ -72,6 +72,9 @@ UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_mutex_count = 0;
UNIV_INTERN ulint os_fast_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0;
/* The number of microsecnds in a second. */
static const ulint MICROSECS_IN_A_SECOND = 1000000;
/* Because a mutex is embedded inside an event and there is an /* Because a mutex is embedded inside an event and there is an
event embedded inside a mutex, on free, this generates a recursive call. event embedded inside a mutex, on free, this generates a recursive call.
This version of the free event function doesn't acquire the global lock */ This version of the free event function doesn't acquire the global lock */
...@@ -121,6 +124,47 @@ os_cond_init( ...@@ -121,6 +124,47 @@ os_cond_init(
#endif #endif
} }
/*********************************************************//**
Do a timed wait on condition variable.
@return TRUE if timed out, FALSE otherwise */
UNIV_INLINE
ibool
os_cond_wait_timed(
/*===============*/
os_cond_t* cond, /*!< in: condition variable. */
os_fast_mutex_t* mutex, /*!< in: fast mutex */
#ifndef __WIN__
const struct timespec* abstime /*!< in: timeout */
#else
ulint time_in_ms /*!< in: timeout in
milliseconds */
#endif /* !__WIN__ */
)
{
#ifdef __WIN__
BOOL ret;
ut_a(sleep_condition_variable != NULL);
ret = sleep_condition_variable(cond, mutex, time_in_ms);
if (!ret && GetLastError() == WAIT_TIMEOUT) {
return(TRUE);
}
ut_a(ret);
return(FALSE);
#else
int ret;
ret = pthread_cond_timedwait(cond, mutex, abstime);
ut_a(ret == 0 || ret == ETIMEDOUT);
return(ret == ETIMEDOUT);
#endif
}
/*********************************************************//** /*********************************************************//**
Wait on condition variable */ Wait on condition variable */
UNIV_INLINE UNIV_INLINE
...@@ -572,6 +616,113 @@ os_event_wait_low( ...@@ -572,6 +616,113 @@ os_event_wait_low(
} }
} }
/**********************************************************//**
Waits for an event object until it is in the signaled state or
a timeout is exceeded.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
os_event_wait_time_low(
/*===================*/
os_event_t event, /*!< in: event to wait */
ulint time_in_usec, /*!< in: timeout in
microseconds, or
OS_SYNC_INFINITE_TIME */
ib_int64_t reset_sig_count) /*!< in: zero or the value
returned by previous call of
os_event_reset(). */
{
ibool timed_out;
ib_int64_t old_signal_count;
#ifdef __WIN__
DWORD time_in_ms = time_in_usec / 1000;
if (!srv_use_native_conditions) {
DWORD err;
ut_a(event);
if (time_in_ms != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, time_in_ms);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
if (err == WAIT_OBJECT_0) {
return(0);
} else if (err == WAIT_TIMEOUT) {
return(OS_SYNC_TIME_EXCEEDED);
}
ut_error;
/* Dummy value to eliminate compiler warning. */
return(42);
} else {
ut_a(sleep_condition_variable != NULL);
}
#else
struct timeval tv;
ulint sec;
ulint usec;
int ret;
struct timespec abstime;
ret = ut_usectime(&sec, &usec);
ut_a(ret == 0);
tv.tv_sec = sec;
tv.tv_usec = usec;
tv.tv_usec += time_in_usec;
if ((ulint) tv.tv_usec > MICROSECS_IN_A_SECOND) {
tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND;
tv.tv_usec %= MICROSECS_IN_A_SECOND;
}
/* Convert to nano seconds. We ignore overflow. */
abstime.tv_sec = tv.tv_sec;
abstime.tv_nsec = tv.tv_usec * 1000;
#endif /* __WIN__ */
os_fast_mutex_lock(&event->os_mutex);
if (reset_sig_count) {
old_signal_count = reset_sig_count;
} else {
old_signal_count = event->signal_count;
}
do {
if (event->is_set == TRUE
|| event->signal_count != old_signal_count) {
break;
}
timed_out = os_cond_wait_timed(
&event->cond_var, &event->os_mutex,
#ifndef __WIN__
&abstime
#else
time_in_ms
#endif /* !__WIN__ */
);
} while (!timed_out);
os_fast_mutex_unlock(&event->os_mutex);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
}
/*********************************************************//** /*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
......
...@@ -695,6 +695,12 @@ struct srv_slot_struct{ ...@@ -695,6 +695,12 @@ struct srv_slot_struct{
/* Table for MySQL threads where they will be suspended to wait for locks */ /* Table for MySQL threads where they will be suspended to wait for locks */
UNIV_INTERN srv_slot_t* srv_mysql_table = NULL; UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
UNIV_INTERN os_event_t srv_timeout_event;
UNIV_INTERN os_event_t srv_monitor_event;
UNIV_INTERN os_event_t srv_error_event;
UNIV_INTERN os_event_t srv_lock_timeout_thread_event; UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
UNIV_INTERN srv_sys_t* srv_sys = NULL; UNIV_INTERN srv_sys_t* srv_sys = NULL;
...@@ -1012,6 +1018,12 @@ srv_init(void) ...@@ -1012,6 +1018,12 @@ srv_init(void)
ut_a(slot->event); ut_a(slot->event);
} }
srv_error_event = os_event_create(NULL);
srv_timeout_event = os_event_create(NULL);
srv_monitor_event = os_event_create(NULL);
srv_lock_timeout_thread_event = os_event_create(NULL); srv_lock_timeout_thread_event = os_event_create(NULL);
for (i = 0; i < SRV_MASTER + 1; i++) { for (i = 0; i < SRV_MASTER + 1; i++) {
...@@ -2049,6 +2061,7 @@ srv_monitor_thread( ...@@ -2049,6 +2061,7 @@ srv_monitor_thread(
/*!< in: a dummy parameter required by /*!< in: a dummy parameter required by
os_thread_create */ os_thread_create */
{ {
ib_int64_t sig_count;
double time_elapsed; double time_elapsed;
time_t current_time; time_t current_time;
time_t last_table_monitor_time; time_t last_table_monitor_time;
...@@ -2067,26 +2080,28 @@ srv_monitor_thread( ...@@ -2067,26 +2080,28 @@ srv_monitor_thread(
#endif #endif
UT_NOT_USED(arg); UT_NOT_USED(arg);
srv_last_monitor_time = time(NULL); srv_last_monitor_time = ut_time();
last_table_monitor_time = time(NULL); last_table_monitor_time = ut_time();
last_tablespace_monitor_time = time(NULL); last_tablespace_monitor_time = ut_time();
last_monitor_time = time(NULL); last_monitor_time = ut_time();
mutex_skipped = 0; mutex_skipped = 0;
last_srv_print_monitor = srv_print_innodb_monitor; last_srv_print_monitor = srv_print_innodb_monitor;
loop: loop:
srv_monitor_active = TRUE; srv_monitor_active = TRUE;
/* Wake up every 5 seconds to see if we need to print /* Wake up every 5 seconds to see if we need to print
monitor information. */ monitor information or if signalled at shutdown. */
os_thread_sleep(5000000); sig_count = os_event_reset(srv_monitor_event);
current_time = time(NULL); os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
current_time = ut_time();
time_elapsed = difftime(current_time, last_monitor_time); time_elapsed = difftime(current_time, last_monitor_time);
if (time_elapsed > 15) { if (time_elapsed > 15) {
last_monitor_time = time(NULL); last_monitor_time = ut_time();
if (srv_print_innodb_monitor) { if (srv_print_innodb_monitor) {
/* Reset mutex_skipped counter everytime /* Reset mutex_skipped counter everytime
...@@ -2130,7 +2145,7 @@ srv_monitor_thread( ...@@ -2130,7 +2145,7 @@ srv_monitor_thread(
if (srv_print_innodb_tablespace_monitor if (srv_print_innodb_tablespace_monitor
&& difftime(current_time, && difftime(current_time,
last_tablespace_monitor_time) > 60) { last_tablespace_monitor_time) > 60) {
last_tablespace_monitor_time = time(NULL); last_tablespace_monitor_time = ut_time();
fputs("========================" fputs("========================"
"========================\n", "========================\n",
...@@ -2156,7 +2171,7 @@ srv_monitor_thread( ...@@ -2156,7 +2171,7 @@ srv_monitor_thread(
if (srv_print_innodb_table_monitor if (srv_print_innodb_table_monitor
&& difftime(current_time, last_table_monitor_time) > 60) { && difftime(current_time, last_table_monitor_time) > 60) {
last_table_monitor_time = time(NULL); last_table_monitor_time = ut_time();
fputs("===========================================\n", fputs("===========================================\n",
stderr); stderr);
...@@ -2216,16 +2231,20 @@ srv_lock_timeout_thread( ...@@ -2216,16 +2231,20 @@ srv_lock_timeout_thread(
ibool some_waits; ibool some_waits;
double wait_time; double wait_time;
ulint i; ulint i;
ib_int64_t sig_count;
#ifdef UNIV_PFS_THREAD #ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_lock_timeout_thread_key); pfs_register_thread(srv_lock_timeout_thread_key);
#endif #endif
loop: loop:
/* When someone is waiting for a lock, we wake up every second /* When someone is waiting for a lock, we wake up every second
and check if a timeout has passed for a lock wait */ and check if a timeout has passed for a lock wait */
os_thread_sleep(1000000); sig_count = os_event_reset(srv_timeout_event);
os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
srv_lock_timeout_active = TRUE; srv_lock_timeout_active = TRUE;
...@@ -2320,6 +2339,7 @@ srv_error_monitor_thread( ...@@ -2320,6 +2339,7 @@ srv_error_monitor_thread(
ulint fatal_cnt = 0; ulint fatal_cnt = 0;
ib_uint64_t old_lsn; ib_uint64_t old_lsn;
ib_uint64_t new_lsn; ib_uint64_t new_lsn;
ib_int64_t sig_count;
old_lsn = srv_start_lsn; old_lsn = srv_start_lsn;
...@@ -2395,7 +2415,9 @@ srv_error_monitor_thread( ...@@ -2395,7 +2415,9 @@ srv_error_monitor_thread(
fflush(stderr); fflush(stderr);
os_thread_sleep(1000000); sig_count = os_event_reset(srv_error_event);
os_event_wait_time_low(srv_error_event, 1000000, sig_count);
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) { if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
...@@ -2646,6 +2668,21 @@ srv_master_thread( ...@@ -2646,6 +2668,21 @@ srv_master_thread(
for (i = 0; i < 10; i++) { for (i = 0; i < 10; i++) {
ulint cur_time = ut_time_ms(); ulint cur_time = ut_time_ms();
/* ALTER TABLE in MySQL requires on Unix that the table handler
can drop tables lazily after there no longer are SELECT
queries to them. */
srv_main_thread_op_info = "doing background drop tables";
row_drop_tables_for_mysql_in_background();
srv_main_thread_op_info = "";
if (srv_fast_shutdown && srv_shutdown_state > 0) {
goto background_loop;
}
buf_get_total_stat(&buf_stat); buf_get_total_stat(&buf_stat);
n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
...@@ -2654,7 +2691,8 @@ srv_master_thread( ...@@ -2654,7 +2691,8 @@ srv_master_thread(
srv_main_thread_op_info = "sleeping"; srv_main_thread_op_info = "sleeping";
srv_main_1_second_loops++; srv_main_1_second_loops++;
if (next_itr_time > cur_time) { if (next_itr_time > cur_time
&& srv_shutdown_state == SRV_SHUTDOWN_NONE) {
/* Get sleep interval in micro seconds. We use /* Get sleep interval in micro seconds. We use
ut_min() to avoid long sleep in case of ut_min() to avoid long sleep in case of
...@@ -2668,21 +2706,6 @@ srv_master_thread( ...@@ -2668,21 +2706,6 @@ srv_master_thread(
/* Each iteration should happen at 1 second interval. */ /* Each iteration should happen at 1 second interval. */
next_itr_time = ut_time_ms() + 1000; next_itr_time = ut_time_ms() + 1000;
/* ALTER TABLE in MySQL requires on Unix that the table handler
can drop tables lazily after there no longer are SELECT
queries to them. */
srv_main_thread_op_info = "doing background drop tables";
row_drop_tables_for_mysql_in_background();
srv_main_thread_op_info = "";
if (srv_fast_shutdown && srv_shutdown_state > 0) {
goto background_loop;
}
/* Flush logs if needed */ /* Flush logs if needed */
srv_sync_log_buffer_in_background(); srv_sync_log_buffer_in_background();
...@@ -2860,7 +2883,9 @@ srv_master_thread( ...@@ -2860,7 +2883,9 @@ srv_master_thread(
MySQL tries to drop a table while there are still open handles MySQL tries to drop a table while there are still open handles
to it and we had to put it to the background drop queue.) */ to it and we had to put it to the background drop queue.) */
os_thread_sleep(100000); if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
os_thread_sleep(100000);
}
} }
if (srv_n_purge_threads == 0) { if (srv_n_purge_threads == 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment