Commit 34f11b06 authored by Kristian Nielsen's avatar Kristian Nielsen

Move deletion of old GTID rows to slave background thread

This patch changes how old rows in mysql.gtid_slave_pos* tables are deleted.
Instead of doing it as part of every replicated transaction in
record_gtid(), it is done periodically (every @@gtid_cleanup_batch_size
transaction) in the slave background thread.

This removes the deletion step from the replication process in SQL or worker
threads, which could speed up replication with many small transactions. It
also decreases contention on the global mutex LOCK_slave_state. And it
simplifies the logic, eg. when a replicated transaction fails after having
deleted old rows.

With this patch, the deletion of old GTID rows happens asynchroneously and
slightly non-deterministic. Thus the number of old rows in
mysql.gtid_slave_pos can temporarily exceed @@gtid_cleanup_batch_size. But
all old rows will be deleted eventually after sufficiently many new GTIDs
have been replicated.
parent 24a45d3b
......@@ -294,6 +294,15 @@ The following specify which files/extra groups are read (specified before remain
--group-concat-max-len=#
The maximum length of the result of function
GROUP_CONCAT()
--gtid-cleanup-batch-size=#
Normally does not need tuning. How many old rows must
accumulate in the mysql.gtid_slave_pos table before a
background job will be run to delete them. Can be
increased to reduce number of commits if using many
different engines with --gtid_pos_auto_engines, or to
reduce CPU overhead if using a huge number of different
gtid_domain_ids. Can be decreased to reduce number of old
rows in the table.
--gtid-domain-id=# Used with global transaction ID to identify logically
independent replication streams. When events can
propagate through multiple parallel paths (for example
......@@ -1434,6 +1443,7 @@ gdb FALSE
general-log FALSE
getopt-prefix-matching FALSE
group-concat-max-len 1048576
gtid-cleanup-batch-size 64
gtid-domain-id 0
gtid-ignore-duplicates FALSE
gtid-pos-auto-engines
......
......@@ -16,36 +16,32 @@ INSERT INTO t1 VALUES (1);
connection slave;
connection slave;
include/stop_slave.inc
SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
SET GLOBAL gtid_cleanup_batch_size= 2;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
SET sql_log_bin= 0;
CALL mtr.add_suppression("Can't find file");
CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
SET sql_log_bin= 1;
include/start_slave.inc
connection master;
INSERT INTO t1 VALUES (2);
connection slave;
include/wait_for_slave_sql_error.inc [errno=1942]
STOP SLAVE IO_THREAD;
SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
ORDER BY domain_id, sub_id DESC LIMIT 1;
domain_id server_id seq_no
0 1 3
connection slave;
SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
FROM mysql.gtid_slave_pos;
SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count))
OK
SET GLOBAL debug_dbug= @old_dbug;
include/start_slave.inc
connection master;
INSERT INTO t1 VALUES (3);
connection slave;
connection slave;
SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
ORDER BY domain_id, sub_id DESC LIMIT 1;
domain_id server_id seq_no
0 1 4
SELECT * FROM t1 ORDER BY i;
i
1
2
3
connection slave;
connection slave;
SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
FROM mysql.gtid_slave_pos
WHERE seq_no <= @pre_max_seq_no;
IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
OK
connection master;
DROP TABLE t1;
connection slave;
SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
include/rpl_end.inc
......@@ -171,7 +171,7 @@ include/start_slave.inc
*** MDEV-4692: mysql.gtid_slave_pos accumulates values for a domain ***
SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
domain_id COUNT(*)
0 2
0 3
1 2
connection server_1;
INSERT INTO t1 VALUES (11);
......@@ -179,7 +179,7 @@ connection server_2;
FLUSH NO_WRITE_TO_BINLOG TABLES;
SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
domain_id COUNT(*)
0 2
0 4
1 2
include/start_slave.inc
connection server_1;
......@@ -189,8 +189,8 @@ connection server_2;
FLUSH NO_WRITE_TO_BINLOG TABLES;
SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
domain_id COUNT(*)
0 2
1 2
0 3
1 1
*** MDEV-4650: show variables; ERROR 1946 (HY000): Failed to load replication slave GTID position ***
connection server_2;
SET sql_log_bin=0;
......
......@@ -12,6 +12,8 @@ SET GLOBAL slave_parallel_threads=10;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_mode='optimistic';
SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
SET GLOBAL gtid_cleanup_batch_size= 1000000;
connection server_1;
INSERT INTO t1 VALUES(1,1);
BEGIN;
......@@ -131,6 +133,11 @@ c
204
205
206
SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
FROM mysql.gtid_slave_pos;
IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
OK
SET GLOBAL gtid_cleanup_batch_size=1;
*** Test @@skip_parallel_replication. ***
connection server_2;
include/stop_slave.inc
......@@ -651,9 +658,10 @@ DROP TABLE t1, t2, t3;
include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
count(4) <= 4
SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
FROM mysql.gtid_slave_pos;
COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
1
SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
connection server_1;
include/rpl_end.inc
......@@ -28,37 +28,79 @@ INSERT INTO t1 VALUES (1);
# Inject an artificial error deleting entries, and check that the error handling code works.
--connection slave
--source include/stop_slave.inc
SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
SET GLOBAL gtid_cleanup_batch_size= 2;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
SET sql_log_bin= 0;
CALL mtr.add_suppression("Can't find file");
CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
SET sql_log_bin= 1;
--source include/start_slave.inc
--connection master
INSERT INTO t1 VALUES (2);
--disable_query_log
let $i = 20;
while ($i) {
eval INSERT INTO t1 VALUES ($i+10);
dec $i;
}
--enable_query_log
--save_master_pos
--connection slave
--let $slave_sql_errno= 1942
--source include/wait_for_slave_sql_error.inc
STOP SLAVE IO_THREAD;
SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
ORDER BY domain_id, sub_id DESC LIMIT 1;
--sync_with_master
# Now wait for the slave background thread to try to delete old rows and
# hit the error injection.
--let _TEST_MYSQLD_ERROR_LOG=$MYSQLTEST_VARDIR/log/mysqld.2.err
--perl
open F, '<', $ENV{'_TEST_MYSQLD_ERROR_LOG'} or die;
outer: while (1) {
inner: while (<F>) {
last outer if /<DEBUG> Error deleting old GTID row/;
}
# Easy way to do sub-second sleep without extra modules.
select(undef, undef, undef, 0.1);
}
EOF
# Since we injected error in the cleanup code, the rows should remain in
# mysql.gtid_slave_pos. Check that we have at least 20 (more robust against
# non-deterministic cleanup and future changes than checking for exact number).
SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
FROM mysql.gtid_slave_pos;
SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
SET GLOBAL debug_dbug= @old_dbug;
--source include/start_slave.inc
--connection master
INSERT INTO t1 VALUES (3);
--disable_query_log
let $i = 20;
while ($i) {
eval INSERT INTO t1 VALUES ($i+40);
dec $i;
}
--enable_query_log
--sync_slave_with_master
--connection slave
SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
ORDER BY domain_id, sub_id DESC LIMIT 1;
SELECT * FROM t1 ORDER BY i;
# Now check that 1) rows are being deleted again after removing error
# injection, and 2) old rows are left that failed their delete while errors
# where injected (again compensating for non-deterministic deletion).
# Deletion is async and slightly non-deterministic, so we wait for at
# least 10 of the 20 new rows to be deleted.
let $wait_condition=
SELECT COUNT(*) <= 20-10
FROM mysql.gtid_slave_pos
WHERE seq_no > @pre_max_seq_no;
--source include/wait_condition.inc
SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
FROM mysql.gtid_slave_pos
WHERE seq_no <= @pre_max_seq_no;
# Clean up
--connection master
DROP TABLE t1;
--connection slave
SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
--source include/rpl_end.inc
......@@ -21,6 +21,10 @@ SET GLOBAL slave_parallel_threads=10;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_mode='optimistic';
# Run the first part of the test with high batch size and see that
# old rows remain in the table.
SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
SET GLOBAL gtid_cleanup_batch_size= 1000000;
--connection server_1
......@@ -108,7 +112,12 @@ SELECT * FROM t3 ORDER BY c;
SELECT * FROM t1 ORDER BY a;
SELECT * FROM t2 ORDER BY a;
SELECT * FROM t3 ORDER BY c;
#SHOW STATUS LIKE 'Slave_retried_transactions';
# Check that we have a bunch of old rows left-over - they were not deleted
# due to high @@gtid_cleanup_batch_size. Then set a low
# @@gtid_cleanup_batch_size so we can test that rows start being deleted.
SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
FROM mysql.gtid_slave_pos;
SET GLOBAL gtid_cleanup_batch_size=1;
--echo *** Test @@skip_parallel_replication. ***
......@@ -557,25 +566,18 @@ DROP TABLE t1, t2, t3;
--connection server_2
--source include/sync_with_master_gtid.inc
# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147).
#
# There was a bug when a transaction got a conflict and was rolled back. It
# might have also handled deletion of some old rows, and these deletions would
# then also be rolled back. And since the deletes were never re-tried, old no
# longer needed rows would accumulate in the table without limit.
#
# The earlier part of this test file have plenty of transactions being rolled
# back. But the last DROP TABLE statement runs on its own and should never
# conflict, thus at this point the mysql.gtid_slave_pos table should be clean.
#
# To support @@gtid_pos_auto_engines, when a row is inserted in the table, it
# is associated with the engine of the table at insertion time, and it will
# only be deleted during record_gtid from a table of the same engine. Since we
# alter the table from MyISAM to InnoDB at the start of this test, we should
# end up with 4 rows: two left-over from when the table was MyISAM, and two
# left-over from the InnoDB part.
--echo Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
# Check that old rows are deleted from mysql.gtid_slave_pos.
# Deletion is asynchronous, so use wait_condition.inc.
# Also, there is a small amount of non-determinism in the deletion of old
# rows, so it is not guaranteed that there can never be more than
# @@gtid_cleanup_batch_size rows in the table; so allow a bit of slack
# here.
let $wait_condition=
SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
eval $wait_condition;
SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
--connection server_1
--source include/rpl_end.inc
......@@ -1202,6 +1202,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NULL
VARIABLE_NAME GTID_CLEANUP_BATCH_SIZE
SESSION_VALUE NULL
GLOBAL_VALUE 64
GLOBAL_VALUE_ORIGIN COMPILE-TIME
DEFAULT_VALUE 64
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT Normally does not need tuning. How many old rows must accumulate in the mysql.gtid_slave_pos table before a background job will be run to delete them. Can be increased to reduce number of commits if using many different engines with --gtid_pos_auto_engines, or to reduce CPU overhead if using a huge number of different gtid_domain_ids. Can be decreased to reduce number of old rows in the table.
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 2147483647
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME GTID_CURRENT_POS
SESSION_VALUE NULL
GLOBAL_VALUE
......
......@@ -5601,7 +5601,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
gtid= rgi->current_gtid;
if (unlikely(rpl_global_gtid_slave_state->record_gtid(thd, &gtid,
sub_id,
rgi, false,
true, false,
&hton)))
{
int errcode= thd->get_stmt_da()->sql_errno();
......@@ -8396,7 +8396,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi)
{
if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i],
sub_id_list[i],
NULL, false, &hton)))
false, false, &hton)))
return ret;
rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i],
hton, NULL);
......@@ -8933,7 +8933,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
rgi->gtid_pending= false;
gtid= rgi->current_gtid;
err= rpl_global_gtid_slave_state->record_gtid(thd, &gtid, sub_id, rgi,
err= rpl_global_gtid_slave_state->record_gtid(thd, &gtid, sub_id, true,
false, &hton);
if (unlikely(err))
{
......
......@@ -568,6 +568,7 @@ ulong opt_binlog_commit_wait_count= 0;
ulong opt_binlog_commit_wait_usec= 0;
ulong opt_slave_parallel_max_queued= 131072;
my_bool opt_gtid_ignore_duplicates= FALSE;
uint opt_gtid_cleanup_batch_size= 64;
const double log_10[] = {
1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
......
......@@ -262,6 +262,7 @@ extern ulong opt_slave_parallel_mode;
extern ulong opt_binlog_commit_wait_count;
extern ulong opt_binlog_commit_wait_usec;
extern my_bool opt_gtid_ignore_duplicates;
extern uint opt_gtid_cleanup_batch_size;
extern ulong back_log;
extern ulong executed_events;
extern char language[FN_REFLEN];
......
This diff is collapsed.
......@@ -118,8 +118,9 @@ struct rpl_slave_state
{
struct list_element *next;
uint64 sub_id;
uint64 seq_no;
uint32 domain_id;
uint32 server_id;
uint64 seq_no;
/*
hton of mysql.gtid_slave_pos* table used to record this GTID.
Can be NULL if the gtid table failed to load (eg. missing
......@@ -191,6 +192,8 @@ struct rpl_slave_state
/* Mapping from domain_id to its element. */
HASH hash;
/* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */
uint32 pending_gtid_count;
/* Mutex protecting access to the state. */
mysql_mutex_t LOCK_slave_state;
/* Auxiliary buffer to sort gtid list. */
......@@ -233,7 +236,10 @@ struct rpl_slave_state
int truncate_state_table(THD *thd);
void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename);
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
rpl_group_info *rgi, bool in_statement, void **out_hton);
bool in_transaction, bool in_statement, void **out_hton);
list_element *gtid_grab_pending_delete_list();
LEX_CSTRING *select_gtid_pos_table(void *hton);
void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr);
uint64 next_sub_id(uint32 domain_id);
int iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra,
......@@ -245,7 +251,7 @@ struct rpl_slave_state
bool is_empty();
element *get_element(uint32 domain_id);
int put_back_list(uint32 domain_id, list_element *list);
int put_back_list(list_element *list);
void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton,
rpl_group_info *rgi);
......
......@@ -1820,6 +1820,7 @@ rpl_load_gtid_slave_state(THD *thd)
int err= 0;
uint32 i;
load_gtid_state_cb_data cb_data;
rpl_slave_state::list_element *old_gtids_list;
DBUG_ENTER("rpl_load_gtid_slave_state");
mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
......@@ -1905,6 +1906,13 @@ rpl_load_gtid_slave_state(THD *thd)
rpl_global_gtid_slave_state->loaded= true;
mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
/* Clear out no longer needed elements now. */
old_gtids_list=
rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
rpl_global_gtid_slave_state->gtid_delete_pending(thd, &old_gtids_list);
if (old_gtids_list)
rpl_global_gtid_slave_state->put_back_list(old_gtids_list);
end:
if (array_inited)
delete_dynamic(&array);
......@@ -2086,7 +2094,6 @@ rpl_group_info::reinit(Relay_log_info *rli)
long_find_row_note_printed= false;
did_mark_start_commit= false;
gtid_ev_flags2= 0;
pending_gtid_delete_list= NULL;
last_master_timestamp = 0;
gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL;
speculation= SPECULATE_NO;
......@@ -2217,12 +2224,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
erroneously update the GTID position.
*/
gtid_pending= false;
/*
Rollback will have undone any deletions of old rows we might have made
in mysql.gtid_slave_pos. Put those rows back on the list to be deleted.
*/
pending_gtid_deletes_put_back();
}
m_table_map.clear_tables();
slave_close_thread_tables(thd);
......@@ -2448,78 +2449,6 @@ rpl_group_info::unmark_start_commit()
}
/*
When record_gtid() has deleted any old rows from the table
mysql.gtid_slave_pos as part of a replicated transaction, save the list of
rows deleted here.
If later the transaction fails (eg. optimistic parallel replication), the
deletes will be undone when the transaction is rolled back. Then we can
put back the list of rows into the rpl_global_gtid_slave_state, so that
we can re-do the deletes and avoid accumulating old rows in the table.
*/
void
rpl_group_info::pending_gtid_deletes_save(uint32 domain_id,
rpl_slave_state::list_element *list)
{
/*
We should never get to a state where we try to save a new pending list of
gtid deletes while we still have an old one. But make sure we handle it
anyway just in case, so we avoid leaving stray entries in the
mysql.gtid_slave_pos table.
*/
DBUG_ASSERT(!pending_gtid_delete_list);
if (unlikely(pending_gtid_delete_list))
pending_gtid_deletes_put_back();
pending_gtid_delete_list= list;
pending_gtid_delete_list_domain= domain_id;
}
/*
Take the list recorded by pending_gtid_deletes_save() and put it back into
rpl_global_gtid_slave_state. This is needed if deletion of the rows was
rolled back due to transaction failure.
*/
void
rpl_group_info::pending_gtid_deletes_put_back()
{
if (pending_gtid_delete_list)
{
rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain,
pending_gtid_delete_list);
pending_gtid_delete_list= NULL;
}
}
/*
Free the list recorded by pending_gtid_deletes_save(). Done when the deletes
in the list have been permanently committed.
*/
void
rpl_group_info::pending_gtid_deletes_clear()
{
pending_gtid_deletes_free(pending_gtid_delete_list);
pending_gtid_delete_list= NULL;
}
void
rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list)
{
rpl_slave_state::list_element *next;
while (list)
{
next= list->next;
my_free(list);
list= next;
}
}
rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter)
: rpl_filter(filter)
{
......
......@@ -757,11 +757,6 @@ struct rpl_group_info
/* Needs room for "Gtid D-S-N\x00". */
char gtid_info_buf[5+10+1+10+1+20+1];
/* List of not yet committed deletions in mysql.gtid_slave_pos. */
rpl_slave_state::list_element *pending_gtid_delete_list;
/* Domain associated with pending_gtid_delete_list. */
uint32 pending_gtid_delete_list_domain;
/*
The timestamp, from the master, of the commit event.
Used to do delayed update of rli->last_master_timestamp, for getting
......@@ -903,12 +898,6 @@ struct rpl_group_info
char *gtid_info();
void unmark_start_commit();
static void pending_gtid_deletes_free(rpl_slave_state::list_element *list);
void pending_gtid_deletes_save(uint32 domain_id,
rpl_slave_state::list_element *list);
void pending_gtid_deletes_put_back();
void pending_gtid_deletes_clear();
longlong get_row_stmt_start_timestamp()
{
return row_stmt_start_timestamp;
......
......@@ -465,6 +465,8 @@ static struct slave_background_gtid_pos_create_t {
void *hton;
} *slave_background_gtid_pos_create_list;
static volatile bool slave_background_gtid_pending_delete_flag;
pthread_handler_t
handle_slave_background(void *arg __attribute__((unused)))
......@@ -499,6 +501,7 @@ handle_slave_background(void *arg __attribute__((unused)))
{
slave_background_kill_t *kill_list;
slave_background_gtid_pos_create_t *create_list;
bool pending_deletes;
thd->ENTER_COND(&COND_slave_background, &LOCK_slave_background,
&stage_slave_background_wait_request,
......@@ -508,13 +511,15 @@ handle_slave_background(void *arg __attribute__((unused)))
stop= abort_loop || thd->killed || slave_background_thread_stop;
kill_list= slave_background_kill_list;
create_list= slave_background_gtid_pos_create_list;
if (stop || kill_list || create_list)
pending_deletes= slave_background_gtid_pending_delete_flag;
if (stop || kill_list || create_list || pending_deletes)
break;
mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
}
slave_background_kill_list= NULL;
slave_background_gtid_pos_create_list= NULL;
slave_background_gtid_pending_delete_flag= false;
thd->EXIT_COND(&old_stage);
while (kill_list)
......@@ -541,6 +546,17 @@ handle_slave_background(void *arg __attribute__((unused)))
create_list= next;
}
if (pending_deletes)
{
rpl_slave_state::list_element *list;
slave_background_gtid_pending_delete_flag= false;
list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list);
if (list)
rpl_global_gtid_slave_state->put_back_list(list);
}
mysql_mutex_lock(&LOCK_slave_background);
} while (!stop);
......@@ -614,6 +630,23 @@ slave_background_gtid_pos_create_request(
}
/*
Request the slave background thread to delete no longer used rows from the
mysql.gtid_slave_pos* tables.
This is called from time-critical rpl_slave_state::update(), so we avoid
taking any locks here. This means we may race with the background thread
to occasionally lose a signal. This is not a problem; any pending rows to
be deleted will just be deleted a bit later as part of the next batch.
*/
void
slave_background_gtid_pending_delete_request(void)
{
slave_background_gtid_pending_delete_flag= true;
mysql_cond_signal(&COND_slave_background);
}
/*
Start the slave background thread.
......
......@@ -276,6 +276,7 @@ bool net_request_file(NET* net, const char* fname);
void slave_background_kill_request(THD *to_kill);
void slave_background_gtid_pos_create_request
(rpl_slave_state::gtid_pos_table *table_entry);
void slave_background_gtid_pending_delete_request(void);
extern bool volatile abort_loop;
extern Master_info *active_mi; /* active_mi for multi-master */
......
......@@ -1942,6 +1942,19 @@ Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_CSTRING *base)
}
static Sys_var_uint Sys_gtid_cleanup_batch_size(
"gtid_cleanup_batch_size",
"Normally does not need tuning. How many old rows must accumulate in "
"the mysql.gtid_slave_pos table before a background job will be run to "
"delete them. Can be increased to reduce number of commits if "
"using many different engines with --gtid_pos_auto_engines, or to "
"reduce CPU overhead if using a huge number of different "
"gtid_domain_ids. Can be decreased to reduce number of old rows in the "
"table.",
GLOBAL_VAR(opt_gtid_cleanup_batch_size), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0,2147483647), DEFAULT(64), BLOCK_SIZE(1));
static bool
check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var)
{
......
......@@ -2,6 +2,7 @@ include/master-slave.inc
[connection master]
connection server_2;
include/stop_slave.inc
SET GLOBAL gtid_cleanup_batch_size = 999999999;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
......@@ -41,6 +42,8 @@ a
1
SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
domain_id sub_id server_id seq_no
0 1 1 1
0 2 1 2
0 3 1 3
0 4 1 4
SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
......@@ -121,6 +124,21 @@ Transactions_multi_engine 6
DELETE FROM t1 WHERE a >= 100;
DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
connection server_1;
include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
COUNT(*)>=10
1
SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
COUNT(*)>=10
1
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
COUNT(*)>=10
1
SET GLOBAL gtid_cleanup_batch_size = 3;
connection server_2;
include/stop_slave.inc
SET sql_log_bin=0;
......
......@@ -4,6 +4,12 @@
--connection server_2
--source include/stop_slave.inc
# Set GTID cleanup limit high enough that cleanup will not run and we
# can rely on consistent table output in .result.
--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
SET GLOBAL gtid_cleanup_batch_size = 999999999;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
......@@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
--connection server_1
--disable_query_log
let $i=10;
while ($i) {
eval INSERT INTO t1 VALUES (300+$i);
eval INSERT INTO t2 VALUES (300+$i);
eval INSERT INTO t3 VALUES (300+$i);
dec $i;
}
--enable_query_log
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
# Check that we have many rows in mysql.gtid_slave_pos now (since
# @@gtid_cleanup_batch_size was set to a huge value). No need to check
# for an exact number, since that will require changing .result if
# anything changes prior to this point, and we just need to know that
# we have still have some data in the tables to make the following
# test effective.
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
# Check that old GTID rows will be deleted when batch delete size is
# set reasonably. Old row deletion is not 100% deterministic (by design), so
# we must wait for it to occur, but it should occur eventually.
SET GLOBAL gtid_cleanup_batch_size = 3;
let $i=40;
--disable_query_log
--let $keep_include_silent=1
while ($i) {
let N=`SELECT 1+($i MOD 3)`;
--connection server_1
eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
let $j=50;
while ($j) {
let $is_done=`SELECT SUM(a)=1 FROM (
SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
UNION ALL
SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
UNION ALL
SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_rocksdb) outer_select`;
if ($is_done) {
let $j=0;
}
if (!$is_done) {
real_sleep 0.1;
dec $j;
}
}
dec $i;
if ($is_done) {
let $i=0;
}
}
--enable_query_log
--let $keep_include_silent=0
if (!$is_done) {
--echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
}
--disable_query_log
DELETE FROM t1 WHERE a >= 100;
DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
--enable_query_log
# Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
# Have mysql.gtid_slave_pos* for myisam and innodb but not rocksdb.
--connection server_2
......@@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
SET sql_log_bin=0;
DROP TABLE mysql.gtid_slave_pos_innodb;
SET sql_log_bin=1;
--disable_query_log
eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
--enable_query_log
--connection server_1
DROP TABLE t1;
......
......@@ -2,6 +2,7 @@ include/master-slave.inc
[connection master]
connection server_2;
include/stop_slave.inc
SET GLOBAL gtid_cleanup_batch_size = 999999999;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
......@@ -41,6 +42,8 @@ a
1
SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
domain_id sub_id server_id seq_no
0 1 1 1
0 2 1 2
0 3 1 3
0 4 1 4
SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
......@@ -121,6 +124,21 @@ Transactions_multi_engine 6
DELETE FROM t1 WHERE a >= 100;
DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
connection server_1;
include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
COUNT(*)>=10
1
SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
COUNT(*)>=10
1
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
COUNT(*)>=10
1
SET GLOBAL gtid_cleanup_batch_size = 3;
connection server_2;
include/stop_slave.inc
SET sql_log_bin=0;
......
......@@ -4,6 +4,12 @@
--connection server_2
--source include/stop_slave.inc
# Set GTID cleanup limit high enough that cleanup will not run and we
# can rely on consistent table output in .result.
--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
SET GLOBAL gtid_cleanup_batch_size = 999999999;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
......@@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
--connection server_1
--disable_query_log
let $i=10;
while ($i) {
eval INSERT INTO t1 VALUES (300+$i);
eval INSERT INTO t2 VALUES (300+$i);
eval INSERT INTO t3 VALUES (300+$i);
dec $i;
}
--enable_query_log
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
# Check that we have many rows in mysql.gtid_slave_pos now (since
# @@gtid_cleanup_batch_size was set to a huge value). No need to check
# for an exact number, since that will require changing .result if
# anything changes prior to this point, and we just need to know that
# we have still have some data in the tables to make the following
# test effective.
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
# Check that old GTID rows will be deleted when batch delete size is
# set reasonably. Old row deletion is not 100% deterministic (by design), so
# we must wait for it to occur, but it should occur eventually.
SET GLOBAL gtid_cleanup_batch_size = 3;
let $i=40;
--disable_query_log
--let $keep_include_silent=1
while ($i) {
let N=`SELECT 1+($i MOD 3)`;
--connection server_1
eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
let $j=50;
while ($j) {
let $is_done=`SELECT SUM(a)=1 FROM (
SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
UNION ALL
SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
UNION ALL
SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_tokudb) outer_select`;
if ($is_done) {
let $j=0;
}
if (!$is_done) {
real_sleep 0.1;
dec $j;
}
}
dec $i;
if ($is_done) {
let $i=0;
}
}
--enable_query_log
--let $keep_include_silent=0
if (!$is_done) {
--echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
}
--disable_query_log
DELETE FROM t1 WHERE a >= 100;
DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
--enable_query_log
# Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
# Have mysql.gtid_slave_pos* for myisam and innodb but not tokudb.
--connection server_2
......@@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
SET sql_log_bin=0;
DROP TABLE mysql.gtid_slave_pos_innodb;
SET sql_log_bin=1;
--disable_query_log
eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
--enable_query_log
--connection server_1
DROP TABLE t1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment