Commit eab219d5 authored by Jan Lindström's avatar Jan Lindström

MDEV-22543 : Galera SST donation fails, FLUSH TABLES WITH READ LOCK times out

During SST we need to let FTWRL to use normal timeout method
even when client is disconnected.
parent cf87f3e0
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT);
INSERT INTO t1 VALUES (1, 1);
SET DEBUG_SYNC = "before_lock_tables_takes_lock SIGNAL sync_point_reached WAIT_FOR sync_point_continue";
UPDATE t1 SET f2 = 2 WHERE f1 = 1;
connection node_1_ctrl;
SET DEBUG_SYNC = "now WAIT_FOR sync_point_reached";
connection node_2;
connection node_1_ctrl;
SET DEBUG_SYNC = "now SIGNAL sync_point_continue";
connection node_1;
SET DEBUG_SYNC = "RESET";
connection node_2;
connection node_1;
DROP TABLE t1;
# The test verifies that the FLUSH TABLES WITH READ LOCK does not
# time out if it needs to wait for another MDL lock for short duration
# during SST donation.
--source include/galera_cluster.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--let $node_1 = node_1
--let $node_2 = node_2
--source include/auto_increment_offset_save.inc
--let $galera_connection_name = node_1_ctrl
--let $galera_server_number = 1
--source include/galera_connect.inc
#
# Run UPDATE on node_1 and make it block before table locks are taken.
# This should block FTWRL.
#
--connection node_1
CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT);
INSERT INTO t1 VALUES (1, 1);
SET DEBUG_SYNC = "before_lock_tables_takes_lock SIGNAL sync_point_reached WAIT_FOR sync_point_continue";
--send UPDATE t1 SET f2 = 2 WHERE f1 = 1
--connection node_1_ctrl
SET DEBUG_SYNC = "now WAIT_FOR sync_point_reached";
#
# Restart node_2, force SST.
#
--connection node_2
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
# Restart without waiting. The UPDATE should block FTWRL on node_1,
# so the SST cannot be completed and node_2 cannot join before
# UPDATE connection is signalled to continue.
--exec echo "restart:$start_mysqld_params" > $_expect_file_name
# If the bug is present, FTWRL times out on node_1 in couple of
# seconds and node_2 fails to join.
--sleep 10
--connection node_1_ctrl
SET DEBUG_SYNC = "now SIGNAL sync_point_continue";
--connection node_1
--reap
SET DEBUG_SYNC = "RESET";
--connection node_2
--enable_reconnect
--source include/wait_until_connected_again.inc
--connection node_1
DROP TABLE t1;
--source include/auto_increment_offset_restore.inc
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#include <mysql/plugin.h> #include <mysql/plugin.h>
#include <mysql/service_thd_wait.h> #include <mysql/service_thd_wait.h>
#include <mysql/psi/mysql_stage.h> #include <mysql/psi/mysql_stage.h>
#ifdef WITH_WSREP
#include "wsrep_sst.h"
#endif
#include <tpool.h> #include <tpool.h>
#include <pfs_metadata_provider.h> #include <pfs_metadata_provider.h>
#include <mysql/psi/mysql_mdl.h> #include <mysql/psi/mysql_mdl.h>
...@@ -2332,18 +2335,26 @@ MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout) ...@@ -2332,18 +2335,26 @@ MDL_context::acquire_lock(MDL_request *mdl_request, double lock_wait_timeout)
wait_status= m_wait.timed_wait(m_owner, &abs_shortwait, FALSE, wait_status= m_wait.timed_wait(m_owner, &abs_shortwait, FALSE,
mdl_request->key.get_wait_state_name()); mdl_request->key.get_wait_state_name());
THD* thd= m_owner->get_thd();
if (wait_status != MDL_wait::EMPTY) if (wait_status != MDL_wait::EMPTY)
break; break;
/* Check if the client is gone while we were waiting. */ /* Check if the client is gone while we were waiting. */
if (! thd_is_connected(m_owner->get_thd())) if (! thd_is_connected(thd))
{ {
/* #if defined(WITH_WSREP) && !defined(EMBEDDED_LIBRARY)
* The client is disconnected. Don't wait forever: // During SST client might not be connected
* assume it's the same as a wait timeout, this if (!wsrep_is_sst_progress())
* ensures all error handling is correct. #endif
*/ {
wait_status= MDL_wait::TIMEOUT; /*
break; * The client is disconnected. Don't wait forever:
* assume it's the same as a wait timeout, this
* ensures all error handling is correct.
*/
wait_status= MDL_wait::TIMEOUT;
break;
}
} }
mysql_prlock_wrlock(&lock->m_rwlock); mysql_prlock_wrlock(&lock->m_rwlock);
......
...@@ -54,6 +54,7 @@ my_bool wsrep_sst_donor_rejects_queries= FALSE; ...@@ -54,6 +54,7 @@ my_bool wsrep_sst_donor_rejects_queries= FALSE;
bool sst_joiner_completed = false; bool sst_joiner_completed = false;
bool sst_donor_completed = false; bool sst_donor_completed = false;
bool sst_needed = false;
struct sst_thread_arg struct sst_thread_arg
{ {
...@@ -307,6 +308,7 @@ bool wsrep_before_SE() ...@@ -307,6 +308,7 @@ bool wsrep_before_SE()
&& strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP)); && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP));
} }
static bool sst_in_progress = false;
// Signal end of SST // Signal end of SST
static void wsrep_sst_complete (THD* thd, static void wsrep_sst_complete (THD* thd,
int const rcode) int const rcode)
...@@ -1625,7 +1627,10 @@ static void* sst_donor_thread (void* a) ...@@ -1625,7 +1627,10 @@ static void* sst_donor_thread (void* a)
char out_buf[out_len]; char out_buf[out_len];
wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED; wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED;
wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST // seqno of complete SST
wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED;
// SST is now in progress
sst_in_progress= true;
wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can
// operate with wsrep_ready == OFF // operate with wsrep_ready == OFF
...@@ -1731,6 +1736,8 @@ static void* sst_donor_thread (void* a) ...@@ -1731,6 +1736,8 @@ static void* sst_donor_thread (void* a)
proc.wait(); proc.wait();
wsrep_donor_monitor_end(); wsrep_donor_monitor_end();
sst_in_progress= false;
return NULL; return NULL;
} }
...@@ -1884,3 +1891,8 @@ int wsrep_sst_donate(const std::string& msg, ...@@ -1884,3 +1891,8 @@ int wsrep_sst_donate(const std::string& msg,
return (ret >= 0 ? 0 : 1); return (ret >= 0 ? 0 : 1);
} }
bool wsrep_is_sst_progress()
{
return (sst_in_progress);
}
...@@ -77,6 +77,7 @@ extern void wsrep_SE_init_grab(); /*! grab init critical section */ ...@@ -77,6 +77,7 @@ extern void wsrep_SE_init_grab(); /*! grab init critical section */
extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */ extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */
extern void wsrep_SE_init_done(); /*! signal that SE init is complte */ extern void wsrep_SE_init_done(); /*! signal that SE init is complte */
extern void wsrep_SE_initialized(); /*! mark SE initialization complete */ extern void wsrep_SE_initialized(); /*! mark SE initialization complete */
extern bool wsrep_is_sst_progress();
/** /**
Return a string containing the state transfer request string. Return a string containing the state transfer request string.
...@@ -102,5 +103,6 @@ int wsrep_sst_donate(const std::string& request, ...@@ -102,5 +103,6 @@ int wsrep_sst_donate(const std::string& request,
#define wsrep_SE_init_grab() do { } while(0) #define wsrep_SE_init_grab() do { } while(0)
#define wsrep_SE_init_done() do { } while(0) #define wsrep_SE_init_done() do { } while(0)
#define wsrep_sst_continue() (0) #define wsrep_sst_continue() (0)
#define wsrep_is_sst_progress() (0)
#endif /* WSREP_SST_H */ #endif /* WSREP_SST_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment