Commit b35c5912 authored by Jan Lindström's avatar Jan Lindström

MDEV-6376: InnoDB: Assertion failure in thread 139995225970432

in file buf0mtflu.cc line 570.

Analysis: Real timing bug, we should take the mutex before we
try to send those shutdown messages, that would make sure
that threads doing a unfinished flush (they have acquired
this mutex) have time to do their work before we add shutdown
messages to work queue. Currently, we just add those shutdown
messages to work queue and code assumes that at flush, there
is constant number of items to be processed and thus
leading to assertion.
parent 36e86bac
...@@ -378,6 +378,20 @@ buf_mtflu_io_thread_exit(void) ...@@ -378,6 +378,20 @@ buf_mtflu_io_thread_exit(void)
fprintf(stderr, "InnoDB: [Note]: Signal mtflush_io_threads to exit [%lu]\n", fprintf(stderr, "InnoDB: [Note]: Signal mtflush_io_threads to exit [%lu]\n",
srv_mtflush_threads); srv_mtflush_threads);
/* This lock is to safequard against timing bug: flush request take
this mutex before sending work items to be processed by flush
threads. Inside flush thread we assume that work queue contains only
a constant number of items. Thus, we may not install new work items
below before all previous ones are processed. This mutex is released
by flush request after all work items sent to flush threads have
been processed. Thus, we can get this mutex if and only if work
queue is empty. */
os_fast_mutex_lock(&mtflush_mtx);
/* Make sure the work queue is empty */
ut_a(ib_wqueue_is_empty(mtflush_io->wq));
/* Send one exit work item/thread */ /* Send one exit work item/thread */
for (i=0; i < srv_mtflush_threads; i++) { for (i=0; i < srv_mtflush_threads; i++) {
work_item[i].tsk = MT_WRK_NONE; work_item[i].tsk = MT_WRK_NONE;
...@@ -399,6 +413,9 @@ buf_mtflu_io_thread_exit(void) ...@@ -399,6 +413,9 @@ buf_mtflu_io_thread_exit(void)
ut_a(ib_wqueue_is_empty(mtflush_io->wq)); ut_a(ib_wqueue_is_empty(mtflush_io->wq));
/* Requests sent */
os_fast_mutex_unlock(&mtflush_mtx);
/* Collect all work done items */ /* Collect all work done items */
for (i=0; i < srv_mtflush_threads;) { for (i=0; i < srv_mtflush_threads;) {
wrk_t* work_item = NULL; wrk_t* work_item = NULL;
......
...@@ -385,6 +385,17 @@ buf_mtflu_io_thread_exit(void) ...@@ -385,6 +385,17 @@ buf_mtflu_io_thread_exit(void)
fprintf(stderr, "InnoDB: [Note]: Signal mtflush_io_threads to exit [%lu]\n", fprintf(stderr, "InnoDB: [Note]: Signal mtflush_io_threads to exit [%lu]\n",
srv_mtflush_threads); srv_mtflush_threads);
/* This lock is to safequard against timing bug: flush request take
this mutex before sending work items to be processed by flush
threads. Inside flush thread we assume that work queue contains only
a constant number of items. Thus, we may not install new work items
below before all previous ones are processed. This mutex is released
by flush request after all work items sent to flush threads have
been processed. Thus, we can get this mutex if and only if work
queue is empty. */
os_fast_mutex_lock(&mtflush_mtx);
/* Send one exit work item/thread */ /* Send one exit work item/thread */
for (i=0; i < srv_mtflush_threads; i++) { for (i=0; i < srv_mtflush_threads; i++) {
work_item[i].tsk = MT_WRK_NONE; work_item[i].tsk = MT_WRK_NONE;
...@@ -406,6 +417,9 @@ buf_mtflu_io_thread_exit(void) ...@@ -406,6 +417,9 @@ buf_mtflu_io_thread_exit(void)
ut_a(ib_wqueue_is_empty(mtflush_io->wq)); ut_a(ib_wqueue_is_empty(mtflush_io->wq));
/* Requests sent */
os_fast_mutex_unlock(&mtflush_mtx);
/* Collect all work done items */ /* Collect all work done items */
for (i=0; i < srv_mtflush_threads;) { for (i=0; i < srv_mtflush_threads;) {
wrk_t* work_item = NULL; wrk_t* work_item = NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment