Commit 0c9aaecc authored by Luis Soares's avatar Luis Soares

BUG#38826 Race in MYSQL_LOG::purge_logs is impossible to debug in production

BUG#39325 Server crash inside MYSQL_LOG::purge_first_log halts replicaiton

The patch reverses the order of the purging and updating events for log and
relay-log.info/index files respectively.

This solves the problem of having holes caused by crashes happening between updating
info/index files and purging logs.

This patch also contains an aditional test case for testing the crashing before purge logs.
      
NOTE1: This is a combined patch for BUG#38826 and BUG#39325. This patch is based on
bugteam tree and takes into account reviewers suggestions.

NOTE2: Merge from 5.0-bugteam
parents 3059c95c 5726574b
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
let $counter= 500; let $counter= 500;
while ($mysql_errno) while ($mysql_errno)
{ {
--error 0,2002,2006 --error 0,2002,2006,2003
show status; show status;
dec $counter; dec $counter;
......
...@@ -21,6 +21,7 @@ flush logs; ...@@ -21,6 +21,7 @@ flush logs;
*** must be a warning master-bin.000001 was not found *** *** must be a warning master-bin.000001 was not found ***
Warnings: Warnings:
Warning 1612 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found Warning 1612 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
Warning 1612 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
*** must show one record, of the active binlog, left in the index file after PURGE *** *** must show one record, of the active binlog, left in the index file after PURGE ***
show binary logs; show binary logs;
Log_name File_size Log_name File_size
......
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
stop slave;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (id INT);
start slave;
SET GLOBAL debug= "+d,crash_before_purge_logs";
FLUSH LOGS;
ERROR HY000: Lost connection to MySQL server during query
start slave;
--source include/master-slave.inc
--source include/have_debug.inc
--disable_reconnect
# We have to sync with master, to ensure slave had time to start properly
# # before we stop it. If not, we get errors about UNIX_TIMESTAMP() in the
# log.
sync_slave_with_master;
stop slave;
--source include/wait_for_slave_to_stop.inc
# ON MASTER
connection master;
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (id INT);
let $1=100;
disable_query_log;
begin;
while ($1)
{
eval INSERT INTO t1 VALUES( $1 );
dec $1;
}
DROP TABLE t1;
save_master_pos;
enable_query_log;
## ON SLAVE
connection slave;
start slave;
--source include/wait_for_slave_to_start.inc
sync_with_master 0;
connection master;
save_master_pos;
connection slave;
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/slave0.expect
SET GLOBAL debug= "+d,crash_before_purge_logs";
--error 2013
# try to rotate logs
FLUSH LOGS;
--enable_reconnect
--source include/wait_until_connected_again.inc
start slave;
--source include/wait_for_slave_to_start.inc
sync_with_master 0;
...@@ -2359,6 +2359,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG() ...@@ -2359,6 +2359,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG()
*/ */
index_file_name[0] = 0; index_file_name[0] = 0;
bzero((char*) &index_file, sizeof(index_file)); bzero((char*) &index_file, sizeof(index_file));
bzero((char*) &purge_temp, sizeof(purge_temp));
} }
/* this is called only once */ /* this is called only once */
...@@ -2953,6 +2954,7 @@ err: ...@@ -2953,6 +2954,7 @@ err:
int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
{ {
int error; int error;
char *to_purge_if_included= NULL;
DBUG_ENTER("purge_first_log"); DBUG_ENTER("purge_first_log");
DBUG_ASSERT(is_open()); DBUG_ASSERT(is_open());
...@@ -2960,36 +2962,20 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) ...@@ -2960,36 +2962,20 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name)); DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
pthread_mutex_lock(&LOCK_index); pthread_mutex_lock(&LOCK_index);
pthread_mutex_lock(&rli->log_space_lock); to_purge_if_included= my_strdup(rli->group_relay_log_name, MYF(0));
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
0, 0, &rli->log_space_total);
// Tell the I/O thread to take the relay_log_space_limit into account
rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock);
/*
Ok to broadcast after the critical region as there is no risk of
the mutex being destroyed by this thread later - this helps save
context switches
*/
pthread_cond_broadcast(&rli->log_space_cond);
/* /*
Read the next log file name from the index file and pass it back to Read the next log file name from the index file and pass it back to
the caller the caller.
If included is true, we want the first relay log;
otherwise we want the one after event_relay_log_name.
*/ */
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) || if((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
(!included && (error=find_next_log(&rli->linfo, 0)))
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
(error=find_next_log(&rli->linfo, 0)))))
{ {
char buff[22]; char buff[22];
sql_print_error("next log error: %d offset: %s log: %s included: %d", sql_print_error("next log error: %d offset: %s log: %s included: %d",
error, error,
llstr(rli->linfo.index_file_offset,buff), llstr(rli->linfo.index_file_offset,buff),
rli->group_relay_log_name, rli->event_relay_log_name,
included); included);
goto err; goto err;
} }
...@@ -3017,7 +3003,42 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) ...@@ -3017,7 +3003,42 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
/* Store where we are in the new file for the execution thread */ /* Store where we are in the new file for the execution thread */
flush_relay_log_info(rli); flush_relay_log_info(rli);
DBUG_EXECUTE_IF("crash_before_purge_logs", abort(););
pthread_mutex_lock(&rli->log_space_lock);
rli->relay_log.purge_logs(to_purge_if_included, included,
0, 0, &rli->log_space_total);
// Tell the I/O thread to take the relay_log_space_limit into account
rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock);
/*
Ok to broadcast after the critical region as there is no risk of
the mutex being destroyed by this thread later - this helps save
context switches
*/
pthread_cond_broadcast(&rli->log_space_cond);
/*
* Need to update the log pos because purge logs has been called
* after fetching initially the log pos at the begining of the method.
*/
if(error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))
{
char buff[22];
sql_print_error("next log error: %d offset: %s log: %s included: %d",
error,
llstr(rli->linfo.index_file_offset,buff),
rli->group_relay_log_name,
included);
goto err;
}
/* If included was passed, rli->linfo should be the first entry. */
DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
err: err:
my_free(to_purge_if_included, MYF(0));
pthread_mutex_unlock(&LOCK_index); pthread_mutex_unlock(&LOCK_index);
DBUG_RETURN(error); DBUG_RETURN(error);
} }
...@@ -3078,7 +3099,35 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log, ...@@ -3078,7 +3099,35 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log,
if (need_mutex) if (need_mutex)
pthread_mutex_lock(&LOCK_index); pthread_mutex_lock(&LOCK_index);
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
{
sql_print_error("MYSQL_LOG::purge_logs was called with file %s not "
"listed in the index.", to_log);
goto err;
}
/*
For crash recovery reasons the index needs to be updated before
any files are deleted. Move files to be deleted into a temp file
to be processed after the index is updated.
*/
if (!my_b_inited(&purge_temp))
{
if (error=open_cached_file(&purge_temp, mysql_tmpdir, TEMP_PREFIX,
DISK_BUFFER_SIZE, MYF(MY_WME)))
{
sql_print_error("MYSQL_LOG::purge_logs failed to open purge_temp");
goto err;
}
}
else
{
if (error=reinit_io_cache(&purge_temp, WRITE_CACHE, 0, 0, 1))
{
sql_print_error("MYSQL_LOG::purge_logs failed to reinit purge_temp "
"for write");
goto err; goto err;
}
}
/* /*
File name exists in index file; delete until we find this file File name exists in index file; delete until we find this file
...@@ -3089,6 +3138,61 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log, ...@@ -3089,6 +3138,61 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log,
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) && while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
!log_in_use(log_info.log_file_name)) !log_in_use(log_info.log_file_name))
{ {
if ((error=my_b_write(&purge_temp, (const uchar*)log_info.log_file_name,
strlen(log_info.log_file_name))) ||
(error=my_b_write(&purge_temp, (const uchar*)"\n", 1)))
{
sql_print_error("MYSQL_LOG::purge_logs failed to copy %s to purge_temp",
log_info.log_file_name);
goto err;
}
if (find_next_log(&log_info, 0) || exit_loop)
break;
}
/* We know how many files to delete. Update index file. */
if (error=update_log_index(&log_info, need_update_threads))
{
sql_print_error("MSYQL_LOG::purge_logs failed to update the index file");
goto err;
}
DBUG_EXECUTE_IF("crash_after_update_index", abort(););
/* Switch purge_temp for read. */
if (error=reinit_io_cache(&purge_temp, READ_CACHE, 0, 0, 0))
{
sql_print_error("MSYQL_LOG::purge_logs failed to reinit purge_temp "
"for read");
goto err;
}
/* Read each entry from purge_temp and delete the file. */
for (;;)
{
uint length;
if ((length=my_b_gets(&purge_temp, log_info.log_file_name,
FN_REFLEN)) <= 1)
{
if (purge_temp.error)
{
error= purge_temp.error;
sql_print_error("MSYQL_LOG::purge_logs error %d reading from "
"purge_temp", error);
goto err;
}
/* Reached EOF */
break;
}
/* Get rid of the trailing '\n' */
log_info.log_file_name[length-1]= 0;
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
MY_STAT s; MY_STAT s;
if (!my_stat(log_info.log_file_name, &s, MYF(0))) if (!my_stat(log_info.log_file_name, &s, MYF(0)))
{ {
...@@ -3189,20 +3293,6 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log, ...@@ -3189,20 +3293,6 @@ int MYSQL_BIN_LOG::purge_logs(const char *to_log,
} }
} }
} }
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
if (find_next_log(&log_info, 0) || exit_loop)
break;
}
/*
If we get killed -9 here, the sysadmin would have to edit
the log index file after restart - otherwise, this should be safe
*/
error= update_log_index(&log_info, need_update_threads);
if (error == 0) {
error = ret;
} }
err: err:
...@@ -3216,7 +3306,7 @@ err: ...@@ -3216,7 +3306,7 @@ err:
index file. index file.
@param thd Thread pointer @param thd Thread pointer
@param before_date Delete all log files before given date. @param purge_time Delete all log files before given date.
@note @note
If any of the logs before the deleted one is in use, If any of the logs before the deleted one is in use,
...@@ -3233,6 +3323,7 @@ err: ...@@ -3233,6 +3323,7 @@ err:
int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time) int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
{ {
int error; int error;
char to_log[FN_REFLEN];
LOG_INFO log_info; LOG_INFO log_info;
MY_STAT stat_area; MY_STAT stat_area;
THD *thd= current_thd; THD *thd= current_thd;
...@@ -3240,12 +3331,8 @@ int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time) ...@@ -3240,12 +3331,8 @@ int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
DBUG_ENTER("purge_logs_before_date"); DBUG_ENTER("purge_logs_before_date");
pthread_mutex_lock(&LOCK_index); pthread_mutex_lock(&LOCK_index);
to_log[0]= 0;
/*
Delete until we find curren file
or a file that is used or a file
that is older than purge_time.
*/
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))) if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
goto err; goto err;
...@@ -3295,55 +3382,18 @@ int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time) ...@@ -3295,55 +3382,18 @@ int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
} }
else else
{ {
if (stat_area.st_mtime >= purge_time) if (stat_area.st_mtime < purge_time)
break; strmake(to_log,
if (my_delete(log_info.log_file_name, MYF(0))) log_info.log_file_name,
{ sizeof(log_info.log_file_name));
if (my_errno == ENOENT)
{
/* It's not fatal even if we can't delete a log file */
if (thd)
{
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
log_info.log_file_name);
}
sql_print_information("Failed to delete file '%s'",
log_info.log_file_name);
my_errno= 0;
}
else
{
if (thd)
{
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
ER_BINLOG_PURGE_FATAL_ERR,
"a problem with deleting %s; "
"consider examining correspondence "
"of your binlog index file "
"to the actual binlog files",
log_info.log_file_name);
}
else else
{ break;
sql_print_information("Failed to delete log file '%s'",
log_info.log_file_name);
}
error= LOG_INFO_FATAL;
goto err;
}
}
ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
} }
if (find_next_log(&log_info, 0)) if (find_next_log(&log_info, 0))
break; break;
} }
/* error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
If we get killed -9 here, the sysadmin would have to edit
the log index file after restart - otherwise, this should be safe
*/
error= update_log_index(&log_info, 1);
err: err:
pthread_mutex_unlock(&LOCK_index); pthread_mutex_unlock(&LOCK_index);
......
...@@ -233,6 +233,13 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG ...@@ -233,6 +233,13 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
pthread_cond_t update_cond; pthread_cond_t update_cond;
ulonglong bytes_written; ulonglong bytes_written;
IO_CACHE index_file; IO_CACHE index_file;
/*
purge_temp is a temp file used in purge_logs so that the index file
can be updated before deleting files from disk, yielding better crash
recovery. It is created on demand the first time purge_logs is called
and then reused for subsequent calls. It is cleaned up in cleanup().
*/
IO_CACHE purge_temp;
char index_file_name[FN_REFLEN]; char index_file_name[FN_REFLEN];
/* /*
The max size before rotation (usable only if log_type == LOG_BIN: binary The max size before rotation (usable only if log_type == LOG_BIN: binary
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment