Commit 92064b81 authored by Luis Soares's avatar Luis Soares

BUG#53657: Slave crashed with error 22 when trying to lock mutex

           at mf_iocache.c, line 1722

The slave crashed while two threads: IO thread and user thread
raced for the same mutex (the append_buffer_lock protecting the
relay log's IO_CACHE). The IO thread was trying to flush the
cache, and for that was grabbing the append_buffer_lock. 

However, the other thread was closing and reopening the relay log
when the IO thread tried to lock. Closing and reopening the log
includes destroying and reinitialising the IO_CACHE
mutex. Therefore, the IO thread tried to lock a destroyed mutex.

We fix this by backporting patch for BUG#50364 which fixed this
bug in mysql server 5.5+. The patch deploys missing
synchronization when flush_master_info is called and the relay
log is flushed by the IO thread. In detail the patch backports
revision (from mysql-trunk):
- luis.soares@sun.com-20100203165617-b1yydr0ee24ycpjm

This patch already includes the post-push fix also in BUG#50364:
- luis.soares@sun.com-20100222002629-0cijwqk6baxhj7gr
parent ca0aa95c
...@@ -976,7 +976,7 @@ bool load_master_data(THD* thd) ...@@ -976,7 +976,7 @@ bool load_master_data(THD* thd)
host was specified; there could have been a problem when replication host was specified; there could have been a problem when replication
started, which led to relay log's IO_CACHE to not be inited. started, which led to relay log's IO_CACHE to not be inited.
*/ */
if (flush_master_info(active_mi, 0)) if (flush_master_info(active_mi, FALSE, FALSE))
sql_print_error("Failed to flush master info file"); sql_print_error("Failed to flush master info file");
} }
mysql_free_result(master_status_res); mysql_free_result(master_status_res);
......
...@@ -312,7 +312,7 @@ file '%s')", fname); ...@@ -312,7 +312,7 @@ file '%s')", fname);
mi->inited = 1; mi->inited = 1;
// now change cache READ -> WRITE - must do this before flush_master_info // now change cache READ -> WRITE - must do this before flush_master_info
reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1); reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1);
if ((error=test(flush_master_info(mi, 1)))) if ((error=test(flush_master_info(mi, TRUE, TRUE))))
sql_print_error("Failed to flush master info file"); sql_print_error("Failed to flush master info file");
pthread_mutex_unlock(&mi->data_lock); pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(error); DBUG_RETURN(error);
...@@ -338,10 +338,13 @@ file '%s')", fname); ...@@ -338,10 +338,13 @@ file '%s')", fname);
1 - flush master info failed 1 - flush master info failed
0 - all ok 0 - all ok
*/ */
int flush_master_info(Master_info* mi, bool flush_relay_log_cache) int flush_master_info(Master_info* mi,
bool flush_relay_log_cache,
bool need_lock_relay_log)
{ {
IO_CACHE* file = &mi->file; IO_CACHE* file = &mi->file;
char lbuf[22]; char lbuf[22];
int err= 0;
DBUG_ENTER("flush_master_info"); DBUG_ENTER("flush_master_info");
DBUG_PRINT("enter",("master_pos: %ld", (long) mi->master_log_pos)); DBUG_PRINT("enter",("master_pos: %ld", (long) mi->master_log_pos));
...@@ -358,9 +361,23 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache) ...@@ -358,9 +361,23 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache)
When we come to this place in code, relay log may or not be initialized; When we come to this place in code, relay log may or not be initialized;
the caller is responsible for setting 'flush_relay_log_cache' accordingly. the caller is responsible for setting 'flush_relay_log_cache' accordingly.
*/ */
if (flush_relay_log_cache && if (flush_relay_log_cache)
flush_io_cache(mi->rli.relay_log.get_log_file())) {
DBUG_RETURN(2); pthread_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
IO_CACHE *log_file= mi->rli.relay_log.get_log_file();
if (need_lock_relay_log)
pthread_mutex_lock(log_lock);
safe_mutex_assert_owner(log_lock);
err= flush_io_cache(log_file);
if (need_lock_relay_log)
pthread_mutex_unlock(log_lock);
if (err)
DBUG_RETURN(2);
}
/* /*
We flushed the relay log BEFORE the master.info file, because if we crash We flushed the relay log BEFORE the master.info file, because if we crash
......
...@@ -108,7 +108,8 @@ int init_master_info(Master_info* mi, const char* master_info_fname, ...@@ -108,7 +108,8 @@ int init_master_info(Master_info* mi, const char* master_info_fname,
bool abort_if_no_master_info_file, bool abort_if_no_master_info_file,
int thread_mask); int thread_mask);
void end_master_info(Master_info* mi); void end_master_info(Master_info* mi);
int flush_master_info(Master_info* mi, bool flush_relay_log_cache); int flush_master_info(Master_info* mi,
bool flush_relay_log_cache,
bool need_lock_relay_log);
#endif /* HAVE_REPLICATION */ #endif /* HAVE_REPLICATION */
#endif /* RPL_MI_H */ #endif /* RPL_MI_H */
...@@ -120,7 +120,7 @@ int init_relay_log_info(Relay_log_info* rli, ...@@ -120,7 +120,7 @@ int init_relay_log_info(Relay_log_info* rli,
/* /*
The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE.
Note that the I/O thread flushes it to disk after writing every Note that the I/O thread flushes it to disk after writing every
event, in flush_master_info(mi, 1). event, in flush_master_info(mi, 1, ?).
*/ */
/* /*
......
...@@ -1480,7 +1480,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi) ...@@ -1480,7 +1480,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
" to the relay log, SHOW SLAVE STATUS may be" " to the relay log, SHOW SLAVE STATUS may be"
" inaccurate"); " inaccurate");
rli->relay_log.harvest_bytes_written(&rli->log_space_total); rli->relay_log.harvest_bytes_written(&rli->log_space_total);
if (flush_master_info(mi, 1)) if (flush_master_info(mi, TRUE, TRUE))
sql_print_error("Failed to flush master info file"); sql_print_error("Failed to flush master info file");
delete ev; delete ev;
} }
...@@ -2731,7 +2731,7 @@ Stopping slave I/O thread due to out-of-memory error from master"); ...@@ -2731,7 +2731,7 @@ Stopping slave I/O thread due to out-of-memory error from master");
"could not queue event from master"); "could not queue event from master");
goto err; goto err;
} }
if (flush_master_info(mi, 1)) if (flush_master_info(mi, TRUE, TRUE))
{ {
sql_print_error("Failed to flush master info file"); sql_print_error("Failed to flush master info file");
goto err; goto err;
......
...@@ -1282,7 +1282,7 @@ bool change_master(THD* thd, Master_info* mi) ...@@ -1282,7 +1282,7 @@ bool change_master(THD* thd, Master_info* mi)
Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never
a slave before). a slave before).
*/ */
if (flush_master_info(mi, 0)) if (flush_master_info(mi, FALSE, FALSE))
{ {
my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file"); my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file");
unlock_slave_threads(mi); unlock_slave_threads(mi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment