Commit adb70162 authored by Monty's avatar Monty

MDEV-19117 Don't keep binary log index file locked during show binary logs

On some systems with 10,000+ binlogs, show binary logs could block
log rotation for more than 10 seconds.

This patch fixes this by first caching all binary log names and
releases all mutexes while calculating the sizes of the binary logs.

Other things:
- Ensure that reinit_io_cache() sets end_of_file when moving to read_cache.
  This ensures that external changes of the underlying file is known to
  the cache.
- get_binlog_list() is made more efficent and show_binlogs() is changed
  to call get_binlog_list()

Reviewed by Andrei Elkin
parent e3bffd57
This diff is collapsed.
connect con1,localhost,root,,;
FLUSH LOGS;
FLUSH LOGS;
FLUSH LOGS;
SET DEBUG_SYNC= "at_after_lock_index WAIT_FOR con1_go";
SHOW BINARY LOGS;
connect con2,localhost,root,,;
RESET MASTER;
FLUSH LOGS;
SET DEBUG_SYNC= "now SIGNAL con1_go";
connection con1;
# The correct result must consists of two records
Log_name File_size
master-bin.000001 #
master-bin.000002 #
SET debug_sync = 'reset';
# ==== Purpose ====
#
# functional test for open_binlog call using when
#
# flush log reset master/slave called
#
# ==== Related ====
#
# TXSQL feature 67 binlog rotate perf optimization
let $engine= myisam;
--source include/have_binlog_format_row.inc
--connect(conn1,localhost,root,,test)
reset master;
create database test_rotate_db;
use test_rotate_db;
--echo #currrent engine=$engine
# Create a new table
--replace_column 2 #
show binary logs;
--eval CREATE TABLE t1_$engine (c1 INT) ENGINE=$engine
--eval insert into t1_$engine values(0),(1)
# do batch flush and show
let $loop_times= 100;
--source include/show_master_status.inc
while ($loop_times) {
flush logs;
flush logs;
if ($loop_times < 5)
{
--source include/show_master_status.inc
if ($loop_times == 4)
{
--source include/show_binary_logs.inc
}
reset master;
}
if ($loop_times >= 5)
{
flush logs;
}
#
dec $loop_times;
--echo # left times= $loop_times
}
--echo # [engine=$engine] after first loop_times=$loop_times, show master logs results
--source include/show_binary_logs.inc
# do batch flush and show with restart mysql
--source include/show_master_status.inc
let $loop_times= 10;
while ($loop_times) {
flush logs;
flush logs;
--echo #begin to restart mysqld current loop_times=$loop_times
--source include/restart_mysqld.inc
#
dec $loop_times;
--echo # left restart times= $loop_times
}
--echo # [engine=$engine] after second loop_times=$loop_times, show master logs results
--source include/show_binary_logs.inc
# try to change the log-bin configs and restart
--echo # ======= now try to change the log-bin config for mysqld =======
--let $restart_parameters="--log-bin=new_log_bin"
--echo #begin to restart mysqld
--source include/restart_mysqld.inc
--let $restart_parameters= ""
--source include/show_binary_logs.inc
let $loop_times= 10;
while ($loop_times) {
flush logs;
flush logs;
if ($loop_times < 5)
{
if ($loop_times == 4)
{
--source include/show_binary_logs.inc
}
reset master;
}
if ($loop_times >= 5)
{
flush logs;
}
#
dec $loop_times;
--echo # left times= $loop_times
}
--echo # [engine=$engine] after third loop_times=$loop_times, show master logs results
--source include/show_binary_logs.inc
##cleanup
drop database test_rotate_db;
--source include/have_debug.inc
--source include/have_debug_sync.inc
# mere to limit it run rate
--source include/have_binlog_format_row.inc
connect(con1,localhost,root,,);
FLUSH LOGS;
FLUSH LOGS;
FLUSH LOGS;
SET DEBUG_SYNC= "at_after_lock_index WAIT_FOR con1_go";
--send SHOW BINARY LOGS
connect(con2,localhost,root,,);
RESET MASTER;
FLUSH LOGS;
SET DEBUG_SYNC= "now SIGNAL con1_go";
--connection con1
--echo # The correct result must consists of two records
--replace_column 2 #
--reap
SET debug_sync = 'reset';
...@@ -496,10 +496,17 @@ my_bool reinit_io_cache(IO_CACHE *info, enum cache_type type, ...@@ -496,10 +496,17 @@ my_bool reinit_io_cache(IO_CACHE *info, enum cache_type type,
{ {
/* /*
If we change from WRITE_CACHE to READ_CACHE, assume that everything If we change from WRITE_CACHE to READ_CACHE, assume that everything
after the current positions should be ignored after the current positions should be ignored. In other cases we
update end_of_file as it may have changed since last init.
*/ */
if (info->type == WRITE_CACHE && type == READ_CACHE) if (type == READ_CACHE)
info->end_of_file=my_b_tell(info); {
if (info->type == WRITE_CACHE)
info->end_of_file= my_b_tell(info);
else
info->end_of_file= mysql_file_seek(info->file, 0L, MY_SEEK_END,
MYF(0));
}
/* flush cache if we want to reuse it */ /* flush cache if we want to reuse it */
if (!clear_cache && my_b_flush_io_cache(info,1)) if (!clear_cache && my_b_flush_io_cache(info,1))
DBUG_RETURN(1); DBUG_RETURN(1);
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "debug_sync.h" #include "debug_sync.h"
#include "semisync_master.h" #include "semisync_master.h"
#include "semisync_slave.h" #include "semisync_slave.h"
#include "mysys_err.h"
enum enum_gtid_until_state { enum enum_gtid_until_state {
GTID_UNTIL_NOT_DONE, GTID_UNTIL_NOT_DONE,
...@@ -866,44 +867,88 @@ static int send_heartbeat_event(binlog_send_info *info, ...@@ -866,44 +867,88 @@ static int send_heartbeat_event(binlog_send_info *info,
struct binlog_file_entry struct binlog_file_entry
{ {
binlog_file_entry *next; binlog_file_entry *next;
char *name; LEX_CSTRING name;
my_off_t size;
}; };
/**
Read all binary logs and return as a list
@param memroot Use this for mem_root calls
@param reverse If set filenames returned in latest first order (reverse
order than in the index file)
@param already_locked If set, index file is already locked.
@return 0 error
# pointer to list
@notes
index_file is always unlocked at return
*/
static binlog_file_entry * static binlog_file_entry *
get_binlog_list(MEM_ROOT *memroot) get_binlog_list(MEM_ROOT *memroot, bool reverse= true,
bool already_locked= false)
{ {
IO_CACHE *index_file; IO_CACHE *index_file;
char fname[FN_REFLEN]; char *fname, *buff, *end_pos;
size_t length; binlog_file_entry *current_list= NULL, *current_link= NULL, *e;
binlog_file_entry *current_list= NULL, *e;
DBUG_ENTER("get_binlog_list"); DBUG_ENTER("get_binlog_list");
if (!mysql_bin_log.is_open()) if (!mysql_bin_log.is_open())
{ {
if (already_locked)
mysql_bin_log.unlock_index();
my_error(ER_NO_BINARY_LOGGING, MYF(0)); my_error(ER_NO_BINARY_LOGGING, MYF(0));
DBUG_RETURN(NULL); DBUG_RETURN(NULL);
} }
if (!already_locked)
mysql_bin_log.lock_index(); mysql_bin_log.lock_index();
index_file=mysql_bin_log.get_index_file(); index_file=mysql_bin_log.get_index_file();
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0); reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0);
/* The file ends with EOF or empty line */ if (!(buff= (char*) alloc_root(memroot,
while ((length=my_b_gets(index_file, fname, sizeof(fname))) > 1) (size_t) (index_file->end_of_file+1))))
{ goto err;
--length; /* Remove the newline */ if (my_b_read(index_file, (uchar*) buff, (size_t) index_file->end_of_file))
if (!(e= (binlog_file_entry *)alloc_root(memroot, sizeof(*e))) ||
!(e->name= strmake_root(memroot, fname, length)))
{ {
my_error(EE_READ, MYF(ME_ERROR_LOG), my_filename(index_file->file),
my_errno);
goto err;
}
buff[index_file->end_of_file]= 0; // For strchr
mysql_bin_log.unlock_index(); mysql_bin_log.unlock_index();
/* The file ends with EOF or empty line */
for (fname= buff;
(end_pos= strchr(fname, '\n')) && (end_pos - fname) > 1;
fname= end_pos+1)
{
end_pos[0]= '\0'; // remove the newline
if (!(e= (binlog_file_entry *) alloc_root(memroot, sizeof(*e))))
DBUG_RETURN(NULL); DBUG_RETURN(NULL);
} if (reverse)
{
e->next= current_list; e->next= current_list;
current_list= e; current_list= e;
} }
mysql_bin_log.unlock_index(); else
{
e->next= NULL;
if (!current_link)
current_list= e;
else
current_link->next= e;
current_link= e;
}
e->name.str= fname;
e->name.length= (size_t) (end_pos - fname);
}
DBUG_RETURN(current_list); DBUG_RETURN(current_list);
err:
mysql_bin_log.unlock_index();
DBUG_RETURN(0);
} }
...@@ -1228,8 +1273,7 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name, ...@@ -1228,8 +1273,7 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name,
char buf[FN_REFLEN]; char buf[FN_REFLEN];
init_alloc_root(&memroot, "gtid_find_binlog_file", init_alloc_root(&memroot, "gtid_find_binlog_file",
10*(FN_REFLEN+sizeof(binlog_file_entry)), 8192, 0, MYF(MY_THREAD_SPECIFIC));
0, MYF(MY_THREAD_SPECIFIC));
if (!(list= get_binlog_list(&memroot))) if (!(list= get_binlog_list(&memroot)))
{ {
errormsg= "Out of memory while looking for GTID position in binlog"; errormsg= "Out of memory while looking for GTID position in binlog";
...@@ -1255,7 +1299,7 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name, ...@@ -1255,7 +1299,7 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name,
Read the Gtid_list_log_event at the start of the binlog file to Read the Gtid_list_log_event at the start of the binlog file to
get the binlog state. get the binlog state.
*/ */
if (normalize_binlog_name(buf, list->name, false)) if (normalize_binlog_name(buf, list->name.str, false))
{ {
errormsg= "Failed to determine binlog file name while looking for " errormsg= "Failed to determine binlog file name while looking for "
"GTID position in binlog"; "GTID position in binlog";
...@@ -4193,17 +4237,25 @@ void show_binlogs_get_fields(THD *thd, List<Item> *field_list) ...@@ -4193,17 +4237,25 @@ void show_binlogs_get_fields(THD *thd, List<Item> *field_list)
@retval FALSE success @retval FALSE success
@retval TRUE failure @retval TRUE failure
@notes
We only keep the index locked while reading all file names as
if there are 1000+ binary logs, there can be a serious impact
as getting the file sizes can take some notable time (up to 20 seconds
has been reported) and we don't want to block log rotations for that long.
*/ */
#define BINLOG_INDEX_RETRY_COUNT 5
bool show_binlogs(THD* thd) bool show_binlogs(THD* thd)
{ {
IO_CACHE *index_file;
LOG_INFO cur; LOG_INFO cur;
File file; MEM_ROOT mem_root;
char fname[FN_REFLEN]; binlog_file_entry *list;
List<Item> field_list; List<Item> field_list;
size_t length;
size_t cur_dir_len;
Protocol *protocol= thd->protocol; Protocol *protocol= thd->protocol;
uint retry_count= 0;
size_t cur_dir_len;
DBUG_ENTER("show_binlogs"); DBUG_ENTER("show_binlogs");
if (!mysql_bin_log.is_open()) if (!mysql_bin_log.is_open())
...@@ -4218,54 +4270,70 @@ bool show_binlogs(THD* thd) ...@@ -4218,54 +4270,70 @@ bool show_binlogs(THD* thd)
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
init_alloc_root(&mem_root, "binlog_file_list", 8192, 0,
MYF(MY_THREAD_SPECIFIC));
retry:
/*
The current mutex handling here is to ensure we get the current log position
and all the log files from the index in sync without any index rotation
in between.
*/
mysql_mutex_lock(mysql_bin_log.get_log_lock()); mysql_mutex_lock(mysql_bin_log.get_log_lock());
mysql_bin_log.lock_index(); mysql_bin_log.lock_index();
index_file=mysql_bin_log.get_index_file(); mysql_bin_log.raw_get_current_log(&cur);
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
mysql_bin_log.raw_get_current_log(&cur); // dont take mutex
mysql_mutex_unlock(mysql_bin_log.get_log_lock()); // lockdep, OK
cur_dir_len= dirname_length(cur.log_file_name); /* The following call unlocks lock_index */
if ((!(list= get_binlog_list(&mem_root, false, true))))
goto err;
reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0); DEBUG_SYNC(thd, "at_after_lock_index");
/* The file ends with EOF or empty line */ // the 1st loop computes the sizes; If stat() fails, then retry
while ((length=my_b_gets(index_file, fname, sizeof(fname))) > 1) cur_dir_len= dirname_length(cur.log_file_name);
for (binlog_file_entry *cur_link= list; cur_link; cur_link= cur_link->next)
{ {
size_t dir_len; const char *fname= cur_link->name.str;
ulonglong file_length= 0; // Length if open fails size_t dir_len= dirname_length(fname);
fname[--length] = '\0'; // remove the newline size_t length= cur_link->name.length- dir_len;
protocol->prepare_for_resend(); /* Skip directory name as we shouldn't include this in the result */
dir_len= dirname_length(fname); cur_link->name.str+= dir_len;
length-= dir_len; cur_link->name.length-= dir_len;
protocol->store(fname + dir_len, length, &my_charset_bin);
if (!(strncmp(fname+dir_len, cur.log_file_name+cur_dir_len, length))) if (!(strncmp(fname+dir_len, cur.log_file_name+cur_dir_len, length)))
file_length= cur.pos; /* The active log, use the active position */ cur_link->size= cur.pos; /* The active log, use the active position */
else
{
MY_STAT stat_info;
if (mysql_file_stat(key_file_binlog, fname, &stat_info, MYF(0)))
cur_link->size= stat_info.st_size;
else else
{ {
/* this is an old log, open it and find the size */ if (retry_count++ < BINLOG_INDEX_RETRY_COUNT)
if ((file= mysql_file_open(key_file_binlog,
fname, O_RDONLY | O_SHARE | O_BINARY,
MYF(0))) >= 0)
{ {
file_length= (ulonglong) mysql_file_seek(file, 0L, MY_SEEK_END, MYF(0)); free_root(&mem_root, MYF(MY_MARK_BLOCKS_FREE));
mysql_file_close(file, MYF(0)); goto retry;
}
cur_link->size= 0;
} }
} }
protocol->store(file_length); }
for (binlog_file_entry *cur_link= list; cur_link; cur_link= cur_link->next)
{
protocol->prepare_for_resend();
protocol->store(cur_link->name.str, cur_link->name.length, &my_charset_bin);
protocol->store((ulonglong) cur_link->size);
if (protocol->write()) if (protocol->write())
goto err; goto err;
} }
if (unlikely(index_file->error == -1)) free_root(&mem_root, MYF(0));
goto err;
mysql_bin_log.unlock_index();
my_eof(thd); my_eof(thd);
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
err: err:
mysql_bin_log.unlock_index(); free_root(&mem_root, MYF(0));
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment