Commit b5110969 authored by Monty's avatar Monty

MDEV-10630 rpl.rpl_mdev6020 fails in buildbot with timeout

The issue was that when running with valgrind the wait for master_pos_Wait()
was not long enough.

This patch also fixes two other failures that could affect rpl_mdev6020:
- check_if_conflicting_replication_locks() didn't properly check domains
- 'did_mark_start_commit' was after signals to other threads was sent which could
  get the variable read too early.
parent 5932fa78
......@@ -181,6 +181,7 @@ static uint my_end_arg= 0;
static uint opt_tail_lines= 0;
static uint opt_connect_timeout= 0;
static uint opt_wait_for_pos_timeout= 0;
static char delimiter[MAX_DELIMITER_LENGTH]= ";";
static uint delimiter_length= 1;
......@@ -4659,7 +4660,7 @@ void do_sync_with_master2(struct st_command *command, long offset,
MYSQL_ROW row;
MYSQL *mysql= cur_con->mysql;
char query_buf[FN_REFLEN+128];
int timeout= 300; /* seconds */
int timeout= opt_wait_for_pos_timeout;
if (!master_pos.file[0])
die("Calling 'sync_with_master' without calling 'save_master_pos'");
......@@ -7098,6 +7099,10 @@ static struct my_option my_long_options[] =
"Number of seconds before connection timeout.",
&opt_connect_timeout, &opt_connect_timeout, 0, GET_UINT, REQUIRED_ARG,
120, 0, 3600 * 12, 0, 0, 0},
{"wait_for_pos_timeout", 0,
"Number of seconds to wait for master_pos_wait",
&opt_wait_for_pos_timeout, &opt_wait_for_pos_timeout, 0, GET_UINT,
REQUIRED_ARG, 300, 0, 3600 * 12, 0, 0, 0},
{"plugin_dir", 0, "Directory for client-side plugins.",
&opt_plugin_dir, &opt_plugin_dir, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
......
......@@ -5899,7 +5899,7 @@ sub start_mysqltest ($) {
{
# We are running server under valgrind, which causes some replication
# test to be much slower, notable rpl_mdev6020. Increase timeout.
mtr_add_arg($args, "--wait-for-pos-timeout=1500");
mtr_add_arg($args, "--wait-for-pos-timeout=0");
}
if ( $opt_ssl )
......
# Running this with valgrind can take > 5000 seconds with xtradb
--source include/not_valgrind.inc
--source include/have_innodb.inc
--source include/have_partition.inc
--source include/have_binlog_format_mixed_or_row.inc
--source include/master-slave.inc
--connection slave
--source include/stop_slave.inc
......
......@@ -443,7 +443,9 @@ public:
virtual void notify_conflicting_locks(MDL_context *ctx) = 0;
virtual bitmap_t hog_lock_types_bitmap() const = 0;
#ifndef DBUG_OFF
bool check_if_conflicting_replication_locks(MDL_context *ctx);
#endif
/** List of granted tickets for this lock. */
Ticket_list m_granted;
......@@ -2303,16 +2305,23 @@ void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx)
and trying to get an exclusive lock for the table.
*/
#ifndef DBUG_OFF
bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
{
Ticket_iterator it(m_granted);
MDL_ticket *conflicting_ticket;
rpl_group_info *rgi_slave= ctx->get_thd()->rgi_slave;
if (!rgi_slave->gtid_sub_id)
return 0;
while ((conflicting_ticket= it++))
{
if (conflicting_ticket->get_ctx() != ctx)
{
MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
rpl_group_info *conflicting_rgi_slave;
conflicting_rgi_slave= conflicting_ctx->get_thd()->rgi_slave;
/*
If the conflicting thread is another parallel replication
......@@ -2320,15 +2329,18 @@ bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
the current transaction has started too early and something is
seriously wrong.
*/
if (conflicting_ctx->get_thd()->rgi_slave &&
conflicting_ctx->get_thd()->rgi_slave->rli ==
ctx->get_thd()->rgi_slave->rli &&
!conflicting_ctx->get_thd()->rgi_slave->did_mark_start_commit)
if (conflicting_rgi_slave &&
conflicting_rgi_slave->gtid_sub_id &&
conflicting_rgi_slave->rli == rgi_slave->rli &&
conflicting_rgi_slave->current_gtid.domain_id ==
rgi_slave->current_gtid.domain_id &&
!conflicting_rgi_slave->did_mark_start_commit)
return 1; // Fatal error
}
}
return 0;
}
#endif
/**
......
......@@ -1921,8 +1921,8 @@ rpl_group_info::mark_start_commit_no_lock()
{
if (did_mark_start_commit)
return;
mark_start_commit_inner(parallel_entry, gco, this);
did_mark_start_commit= true;
mark_start_commit_inner(parallel_entry, gco, this);
}
......@@ -1933,12 +1933,12 @@ rpl_group_info::mark_start_commit()
if (did_mark_start_commit)
return;
did_mark_start_commit= true;
e= this->parallel_entry;
mysql_mutex_lock(&e->LOCK_parallel_entry);
mark_start_commit_inner(e, gco, this);
mysql_mutex_unlock(&e->LOCK_parallel_entry);
did_mark_start_commit= true;
}
......@@ -1981,12 +1981,12 @@ rpl_group_info::unmark_start_commit()
if (!did_mark_start_commit)
return;
did_mark_start_commit= false;
e= this->parallel_entry;
mysql_mutex_lock(&e->LOCK_parallel_entry);
--e->count_committing_event_groups;
mysql_mutex_unlock(&e->LOCK_parallel_entry);
did_mark_start_commit= false;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment