Commit f8320210 authored by Sergei Golubchik's avatar Sergei Golubchik

MDEV-7126 replication slave - deadlock in terminate_slave_thread with stop...

MDEV-7126 replication slave - deadlock in terminate_slave_thread with stop slave and show variables of replication filters and show global status

Three-way deadlock:

  T1: SHOW GLOBAL STATUS
      -> acquire LOCK_status
  T2: STOP SLAVE
      -> acquire LOCK_active_mi
      -> terminate_slave_thread()
      -> -> cond_timedwait for handle_slave_sql to stop
  T3: sql slave thread (same applies to io thread)
      -> handle_slave_sql(), when exiting
      -> -> THD::add_status_to_global()
      -> -> -> wait for LOCK_status...
  T1: SHOW GLOBAL STATUS
      -> for "Slave_heartbeat_period" status variable
      -> -> show_heartbeat_period()
      -> -> -> wait for LOCK_active_mi

cherry-pick from 5.6:

  commit fc8b395898f40387b3468122bd0dae31e29a6fde
  Author: Venkatesh Duggirala <venkatesh.duggirala@oracle.com>
  Date:   Wed Jun 12 21:41:05 2013 +0530

    BUG#16904035-SHOW STATUS - EXCESSIVE LOCKING ON LOCK_ACTIVE_MI AND
    ACTIVE_MI->RLI->DATA_LOCK

    Problem: Excessive locking on lock_active_mi and rli->data_lock
    while executing any `show status like 'X'` command.

    Analysis: SHOW_FUNCs for Slave_running, Slave_retried_transactions,
    Slave_heartbeat_period, Slave_received_heartbeats,
    Slave_last_heartbeat are acquiring lock_active_mi and rli->data_lock
    to show their variable value. It is ok to show stale data while showing
    the status variables i.e., even if they miss one update, it will
    not cause any great trouble.

    Fix: Remove the locks from the above mentioned SHOW_FUNC functions.

Add a test case
parent 2d6c0a54
include/master-slave.inc
[connection master]
#
# MDEV-7126 replication slave - deadlock in terminate_slave_thread with stop slave and show variables of replication filters and show global status
#
--source include/master-slave.inc
--connection slave
# If everything is okay, the test will end in several seconds; maybe a minute.
# If the problem shows up, it will hang until testcase timeout is exceeded.
--exec $MYSQL_SLAP --silent --socket=$SLAVE_MYSOCK -q "START SLAVE; STOP SLAVE; SHOW GLOBAL STATUS" -c 2 --number-of-queries=100 --create-schema=test
# All done.
......@@ -6625,12 +6625,10 @@ static int show_rpl_status(THD *thd, SHOW_VAR *var, char *buff)
static int show_slave_running(THD *thd, SHOW_VAR *var, char *buff)
{
var->type= SHOW_MY_BOOL;
mysql_mutex_lock(&LOCK_active_mi);
var->value= buff;
*((my_bool *)buff)= (my_bool) (active_mi &&
active_mi->slave_running == MYSQL_SLAVE_RUN_CONNECT &&
active_mi->rli.slave_running);
mysql_mutex_unlock(&LOCK_active_mi);
return 0;
}
......@@ -6640,41 +6638,32 @@ static int show_slave_retried_trans(THD *thd, SHOW_VAR *var, char *buff)
TODO: with multimaster, have one such counter per line in
SHOW SLAVE STATUS, and have the sum over all lines here.
*/
mysql_mutex_lock(&LOCK_active_mi);
if (active_mi)
{
var->type= SHOW_LONG;
var->value= buff;
mysql_mutex_lock(&active_mi->rli.data_lock);
*((long *)buff)= (long)active_mi->rli.retried_trans;
mysql_mutex_unlock(&active_mi->rli.data_lock);
}
else
var->type= SHOW_UNDEF;
mysql_mutex_unlock(&LOCK_active_mi);
return 0;
}
static int show_slave_received_heartbeats(THD *thd, SHOW_VAR *var, char *buff)
{
mysql_mutex_lock(&LOCK_active_mi);
if (active_mi)
{
var->type= SHOW_LONGLONG;
var->value= buff;
mysql_mutex_lock(&active_mi->rli.data_lock);
*((longlong *)buff)= active_mi->received_heartbeats;
mysql_mutex_unlock(&active_mi->rli.data_lock);
}
else
var->type= SHOW_UNDEF;
mysql_mutex_unlock(&LOCK_active_mi);
return 0;
}
static int show_heartbeat_period(THD *thd, SHOW_VAR *var, char *buff)
{
mysql_mutex_lock(&LOCK_active_mi);
if (active_mi)
{
var->type= SHOW_CHAR;
......@@ -6683,7 +6672,6 @@ static int show_heartbeat_period(THD *thd, SHOW_VAR *var, char *buff)
}
else
var->type= SHOW_UNDEF;
mysql_mutex_unlock(&LOCK_active_mi);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment