Commit da1bf248 authored by Luis Soares's avatar Luis Soares

BUG#38826 Race in MYSQL_LOG::purge_logs is impossible to debug in production

BUG#39325 Server crash inside MYSQL_LOG::purge_first_log halts replicaiton

The patch reverses the order of the purging and updating events for log and relay-log.info/index files respectively.
This solves the problem of having holes caused by crashes happening between updating info/index files and purging logs.

NOTE: This is a combined patch for BUG#38826 and BUG#39325. This patch is based on bugteam tree and takes into account reviewers suggestions.
parent fef07511
...@@ -21,6 +21,7 @@ flush logs; ...@@ -21,6 +21,7 @@ flush logs;
*** must be a warning master-bin.000001 was not found *** *** must be a warning master-bin.000001 was not found ***
Warnings: Warnings:
Warning 1477 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found Warning 1477 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
Warning 1477 Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
*** must show one record, of the active binlog, left in the index file after PURGE *** *** must show one record, of the active binlog, left in the index file after PURGE ***
show binary logs; show binary logs;
Log_name File_size Log_name File_size
......
...@@ -417,6 +417,7 @@ MYSQL_LOG::MYSQL_LOG() ...@@ -417,6 +417,7 @@ MYSQL_LOG::MYSQL_LOG()
index_file_name[0] = 0; index_file_name[0] = 0;
bzero((char*) &log_file,sizeof(log_file)); bzero((char*) &log_file,sizeof(log_file));
bzero((char*) &index_file, sizeof(index_file)); bzero((char*) &index_file, sizeof(index_file));
bzero((char*) &purge_temp, sizeof(purge_temp));
} }
/* this is called only once */ /* this is called only once */
...@@ -1059,10 +1060,10 @@ err: ...@@ -1059,10 +1060,10 @@ err:
IMPLEMENTATION IMPLEMENTATION
- Protects index file with LOCK_index - Protects index file with LOCK_index
- Read the next file name from the index file and store in rli->linfo
- Delete relevant relay log files - Delete relevant relay log files
- Copy all file names after these ones to the front of the index file - Copy all file names after these ones to the front of the index file
- If the OS has truncate, truncate the file, else fill it with \n' - If the OS has truncate, truncate the file, else fill it with \n'
- Read the next file name from the index file and store in rli->linfo
RETURN VALUES RETURN VALUES
0 ok 0 ok
...@@ -1076,6 +1077,7 @@ err: ...@@ -1076,6 +1077,7 @@ err:
int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included) int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included)
{ {
int error; int error;
char *to_purge_if_included= NULL;
DBUG_ENTER("purge_first_log"); DBUG_ENTER("purge_first_log");
DBUG_ASSERT(is_open()); DBUG_ASSERT(is_open());
...@@ -1083,36 +1085,20 @@ int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included) ...@@ -1083,36 +1085,20 @@ int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included)
DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name)); DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
pthread_mutex_lock(&LOCK_index); pthread_mutex_lock(&LOCK_index);
pthread_mutex_lock(&rli->log_space_lock); to_purge_if_included= my_strdup(rli->group_relay_log_name, MYF(0));
rli->relay_log.purge_logs(rli->group_relay_log_name, included,
0, 0, &rli->log_space_total);
// Tell the I/O thread to take the relay_log_space_limit into account
rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock);
/*
Ok to broadcast after the critical region as there is no risk of
the mutex being destroyed by this thread later - this helps save
context switches
*/
pthread_cond_broadcast(&rli->log_space_cond);
/* /*
Read the next log file name from the index file and pass it back to Read the next log file name from the index file and pass it back to
the caller the caller.
If included is true, we want the first relay log;
otherwise we want the one after event_relay_log_name.
*/ */
if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) || if((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
(!included && (error=find_next_log(&rli->linfo, 0)))
((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
(error=find_next_log(&rli->linfo, 0)))))
{ {
char buff[22]; char buff[22];
sql_print_error("next log error: %d offset: %s log: %s included: %d", sql_print_error("next log error: %d offset: %s log: %s included: %d",
error, error,
llstr(rli->linfo.index_file_offset,buff), llstr(rli->linfo.index_file_offset,buff),
rli->group_relay_log_name, rli->event_relay_log_name,
included); included);
goto err; goto err;
} }
...@@ -1140,7 +1126,42 @@ int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included) ...@@ -1140,7 +1126,42 @@ int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included)
/* Store where we are in the new file for the execution thread */ /* Store where we are in the new file for the execution thread */
flush_relay_log_info(rli); flush_relay_log_info(rli);
DBUG_EXECUTE_IF("crash_before_purge_logs", abort(););
pthread_mutex_lock(&rli->log_space_lock);
rli->relay_log.purge_logs(to_purge_if_included, included,
0, 0, &rli->log_space_total);
// Tell the I/O thread to take the relay_log_space_limit into account
rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock);
/*
Ok to broadcast after the critical region as there is no risk of
the mutex being destroyed by this thread later - this helps save
context switches
*/
pthread_cond_broadcast(&rli->log_space_cond);
/*
* Need to update the log pos because purge logs has been called
* after fetching initially the log pos at the begining of the method.
*/
if(error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))
{
char buff[22];
sql_print_error("next log error: %d offset: %s log: %s included: %d",
error,
llstr(rli->linfo.index_file_offset,buff),
rli->group_relay_log_name,
included);
goto err;
}
/* If included was passed, rli->linfo should be the first entry. */
DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
err: err:
my_free(to_purge_if_included, MYF(0));
pthread_mutex_unlock(&LOCK_index); pthread_mutex_unlock(&LOCK_index);
DBUG_RETURN(error); DBUG_RETURN(error);
} }
...@@ -1199,8 +1220,36 @@ int MYSQL_LOG::purge_logs(const char *to_log, ...@@ -1199,8 +1220,36 @@ int MYSQL_LOG::purge_logs(const char *to_log,
if (need_mutex) if (need_mutex)
pthread_mutex_lock(&LOCK_index); pthread_mutex_lock(&LOCK_index);
if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
{
sql_print_error("MYSQL_LOG::purge_logs was called with file %s not "
"listed in the index.", to_log);
goto err; goto err;
}
/*
For crash recovery reasons the index needs to be updated before
any files are deleted. Move files to be deleted into a temp file
to be processed after the index is updated.
*/
if (!my_b_inited(&purge_temp))
{
if (error=open_cached_file(&purge_temp, mysql_tmpdir, TEMP_PREFIX,
DISK_BUFFER_SIZE, MYF(MY_WME)))
{
sql_print_error("MYSQL_LOG::purge_logs failed to open purge_temp");
goto err;
}
}
else
{
if (error=reinit_io_cache(&purge_temp, WRITE_CACHE, 0, 0, 1))
{
sql_print_error("MYSQL_LOG::purge_logs failed to reinit purge_temp "
"for write");
goto err;
}
}
/* /*
File name exists in index file; delete until we find this file File name exists in index file; delete until we find this file
...@@ -1211,6 +1260,59 @@ int MYSQL_LOG::purge_logs(const char *to_log, ...@@ -1211,6 +1260,59 @@ int MYSQL_LOG::purge_logs(const char *to_log,
while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) && while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
!log_in_use(log_info.log_file_name)) !log_in_use(log_info.log_file_name))
{ {
if ((error=my_b_write(&purge_temp, (byte*)log_info.log_file_name,
strlen(log_info.log_file_name))) ||
(error=my_b_write(&purge_temp, (byte*)"\n", 1)))
{
sql_print_error("MYSQL_LOG::purge_logs failed to copy %s to purge_temp",
log_info.log_file_name);
goto err;
}
if (find_next_log(&log_info, 0) || exit_loop)
break;
}
/* We know how many files to delete. Update index file. */
if (error=update_log_index(&log_info, need_update_threads))
{
sql_print_error("MSYQL_LOG::purge_logs failed to update the index file");
goto err;
}
DBUG_EXECUTE_IF("crash_after_update_index", abort(););
/* Switch purge_temp for read. */
if (error=reinit_io_cache(&purge_temp, READ_CACHE, 0, 0, 0))
{
sql_print_error("MSYQL_LOG::purge_logs failed to reinit purge_temp "
"for read");
goto err;
}
/* Read each entry from purge_temp and delete the file. */
for (;;)
{
uint length;
if ((length=my_b_gets(&purge_temp, log_info.log_file_name,
FN_REFLEN)) <= 1)
{
if (purge_temp.error)
{
error= purge_temp.error;
sql_print_error("MSYQL_LOG::purge_logs error %d reading from "
"purge_temp", error);
goto err;
}
/* Reached EOF */
break;
}
/* Get rid of the trailing '\n' */
log_info.log_file_name[length-1]= 0;
MY_STAT s; MY_STAT s;
if (!my_stat(log_info.log_file_name, &s, MYF(0))) if (!my_stat(log_info.log_file_name, &s, MYF(0)))
{ {
...@@ -1304,15 +1406,7 @@ int MYSQL_LOG::purge_logs(const char *to_log, ...@@ -1304,15 +1406,7 @@ int MYSQL_LOG::purge_logs(const char *to_log,
} }
} }
} }
if (find_next_log(&log_info, 0) || exit_loop)
break;
} }
/*
If we get killed -9 here, the sysadmin would have to edit
the log index file after restart - otherwise, this should be safe
*/
error= update_log_index(&log_info, need_update_threads);
err: err:
if (need_mutex) if (need_mutex)
...@@ -1326,7 +1420,6 @@ err: ...@@ -1326,7 +1420,6 @@ err:
SYNOPSIS SYNOPSIS
purge_logs_before_date() purge_logs_before_date()
thd Thread pointer
before_date Delete all log files before given date. before_date Delete all log files before given date.
NOTES NOTES
...@@ -1343,6 +1436,7 @@ err: ...@@ -1343,6 +1436,7 @@ err:
int MYSQL_LOG::purge_logs_before_date(time_t purge_time) int MYSQL_LOG::purge_logs_before_date(time_t purge_time)
{ {
int error; int error;
char to_log[FN_REFLEN];
LOG_INFO log_info; LOG_INFO log_info;
MY_STAT stat_area; MY_STAT stat_area;
THD *thd= current_thd; THD *thd= current_thd;
...@@ -1350,12 +1444,8 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time) ...@@ -1350,12 +1444,8 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time)
DBUG_ENTER("purge_logs_before_date"); DBUG_ENTER("purge_logs_before_date");
pthread_mutex_lock(&LOCK_index); pthread_mutex_lock(&LOCK_index);
to_log[0]= 0;
/*
Delete until we find curren file
or a file that is used or a file
that is older than purge_time.
*/
if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))) if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
goto err; goto err;
...@@ -1405,54 +1495,18 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time) ...@@ -1405,54 +1495,18 @@ int MYSQL_LOG::purge_logs_before_date(time_t purge_time)
} }
else else
{ {
if (stat_area.st_mtime >= purge_time) if (stat_area.st_mtime < purge_time)
strmake(to_log,
log_info.log_file_name,
sizeof(log_info.log_file_name));
else
break; break;
if (my_delete(log_info.log_file_name, MYF(0)))
{
if (my_errno == ENOENT)
{
/* It's not fatal even if we can't delete a log file */
if (thd)
{
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
log_info.log_file_name);
}
sql_print_information("Failed to delete file '%s'",
log_info.log_file_name);
my_errno= 0;
}
else
{
if (thd)
{
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
ER_BINLOG_PURGE_FATAL_ERR,
"a problem with deleting %s; "
"consider examining correspondence "
"of your binlog index file "
"to the actual binlog files",
log_info.log_file_name);
}
else
{
sql_print_information("Failed to delete log file '%s'",
log_info.log_file_name);
}
error= LOG_INFO_FATAL;
goto err;
}
}
} }
if (find_next_log(&log_info, 0)) if (find_next_log(&log_info, 0))
break; break;
} }
/* error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
If we get killed -9 here, the sysadmin would have to edit
the log index file after restart - otherwise, this should be safe
*/
error= update_log_index(&log_info, 1);
err: err:
pthread_mutex_unlock(&LOCK_index); pthread_mutex_unlock(&LOCK_index);
......
...@@ -205,6 +205,13 @@ class MYSQL_LOG: public TC_LOG ...@@ -205,6 +205,13 @@ class MYSQL_LOG: public TC_LOG
time_t last_time,query_start; time_t last_time,query_start;
IO_CACHE log_file; IO_CACHE log_file;
IO_CACHE index_file; IO_CACHE index_file;
/*
purge_temp is a temp file used in purge_logs so that the index file
can be updated before deleting files from disk, yielding better crash
recovery. It is created on demand the first time purge_logs is called
and then reused for subsequent calls. It is cleaned up in cleanup().
*/
IO_CACHE purge_temp;
char *name; char *name;
char time_buff[20],db[NAME_LEN+1]; char time_buff[20],db[NAME_LEN+1];
char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN]; char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment