Commit 40b7307e authored by stewart@mysql.com's avatar stewart@mysql.com

BUG#18966 Change in stop/shutdown behaviour

fix behaviour of ALL STOP and SHUTDOWN in relation to MGM nodes
parent 70a0f686
......@@ -162,6 +162,7 @@ private:
NdbMgmHandle m_mgmsrv;
NdbMgmHandle m_mgmsrv2;
const char *m_constr;
bool m_connected;
int m_verbose;
int try_reconnect;
......@@ -390,22 +391,7 @@ convert(const char* s, int& val) {
CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
: m_verbose(verbose)
{
m_mgmsrv = ndb_mgm_create_handle();
if(m_mgmsrv == NULL) {
ndbout_c("Cannot create handle to management server.");
exit(-1);
}
m_mgmsrv2 = ndb_mgm_create_handle();
if(m_mgmsrv2 == NULL) {
ndbout_c("Cannot create 2:nd handle to management server.");
exit(-1);
}
if (ndb_mgm_set_connectstring(m_mgmsrv, _host))
{
printError();
exit(-1);
}
m_constr= _host;
m_connected= false;
m_event_thread= 0;
try_reconnect = 0;
......@@ -422,8 +408,6 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
CommandInterpreter::~CommandInterpreter()
{
disconnect();
ndb_mgm_destroy_handle(&m_mgmsrv);
ndb_mgm_destroy_handle(&m_mgmsrv2);
}
static bool
......@@ -447,7 +431,6 @@ CommandInterpreter::printError()
{
if (ndb_mgm_check_connection(m_mgmsrv))
{
m_connected= false;
disconnect();
}
ndbout_c("* %5d: %s",
......@@ -497,78 +480,97 @@ event_thread_run(void* m)
}
bool
CommandInterpreter::connect()
CommandInterpreter::connect()
{
DBUG_ENTER("CommandInterpreter::connect");
if(!m_connected)
if(m_connected)
DBUG_RETURN(m_connected);
m_mgmsrv = ndb_mgm_create_handle();
if(m_mgmsrv == NULL) {
ndbout_c("Cannot create handle to management server.");
exit(-1);
}
m_mgmsrv2 = ndb_mgm_create_handle();
if(m_mgmsrv2 == NULL) {
ndbout_c("Cannot create 2:nd handle to management server.");
exit(-1);
}
if (ndb_mgm_set_connectstring(m_mgmsrv, m_constr))
{
if(!ndb_mgm_connect(m_mgmsrv, try_reconnect-1, 5, 1))
printError();
exit(-1);
}
if(ndb_mgm_connect(m_mgmsrv, try_reconnect-1, 5, 1))
DBUG_RETURN(m_connected); // couldn't connect, always false
const char *host= ndb_mgm_get_connected_host(m_mgmsrv);
unsigned port= ndb_mgm_get_connected_port(m_mgmsrv);
BaseString constr;
constr.assfmt("%s:%d",host,port);
if(!ndb_mgm_set_connectstring(m_mgmsrv2, constr.c_str()) &&
!ndb_mgm_connect(m_mgmsrv2, try_reconnect-1, 5, 1))
{
DBUG_PRINT("info",("2:ndb connected to Management Server ok at: %s:%d",
host, port));
assert(m_event_thread == 0);
assert(do_event_thread == 0);
do_event_thread= 0;
m_event_thread = NdbThread_Create(event_thread_run,
(void**)&m_mgmsrv2,
32768,
"CommandInterpreted_event_thread",
NDB_THREAD_PRIO_LOW);
if (m_event_thread != 0)
{
const char *host= ndb_mgm_get_connected_host(m_mgmsrv);
unsigned port= ndb_mgm_get_connected_port(m_mgmsrv);
BaseString constr;
constr.assfmt("%s:%d",host,port);
if(!ndb_mgm_set_connectstring(m_mgmsrv2, constr.c_str()) &&
!ndb_mgm_connect(m_mgmsrv2, try_reconnect-1, 5, 1))
{
DBUG_PRINT("info",("2:ndb connected to Management Server ok at: %s:%d",
host, port));
assert(m_event_thread == 0);
assert(do_event_thread == 0);
do_event_thread= 0;
m_event_thread = NdbThread_Create(event_thread_run,
(void**)&m_mgmsrv2,
32768,
"CommandInterpreted_event_thread",
NDB_THREAD_PRIO_LOW);
if (m_event_thread != 0)
{
DBUG_PRINT("info",("Thread created ok, waiting for started..."));
int iter= 1000; // try for 30 seconds
while(do_event_thread == 0 &&
iter-- > 0)
NdbSleep_MilliSleep(30);
}
if (m_event_thread == 0 ||
do_event_thread == 0 ||
do_event_thread == -1)
{
DBUG_PRINT("info",("Warning, event thread startup failed, "
"degraded printouts as result, errno=%d",
errno));
printf("Warning, event thread startup failed, "
"degraded printouts as result, errno=%d\n", errno);
do_event_thread= 0;
if (m_event_thread)
{
void *res;
NdbThread_WaitFor(m_event_thread, &res);
NdbThread_Destroy(&m_event_thread);
}
ndb_mgm_disconnect(m_mgmsrv2);
}
}
else
{
DBUG_PRINT("warning",
("Could not do 2:nd connect to mgmtserver for event listening"));
DBUG_PRINT("info", ("code: %d, msg: %s",
ndb_mgm_get_latest_error(m_mgmsrv2),
ndb_mgm_get_latest_error_msg(m_mgmsrv2)));
printf("Warning, event connect failed, degraded printouts as result\n");
printf("code: %d, msg: %s\n",
ndb_mgm_get_latest_error(m_mgmsrv2),
ndb_mgm_get_latest_error_msg(m_mgmsrv2));
}
m_connected= true;
DBUG_PRINT("info",("Connected to Management Server at: %s:%d", host, port));
if (m_verbose)
DBUG_PRINT("info",("Thread created ok, waiting for started..."));
int iter= 1000; // try for 30 seconds
while(do_event_thread == 0 &&
iter-- > 0)
NdbSleep_MilliSleep(30);
}
if (m_event_thread == 0 ||
do_event_thread == 0 ||
do_event_thread == -1)
{
DBUG_PRINT("info",("Warning, event thread startup failed, "
"degraded printouts as result, errno=%d",
errno));
printf("Warning, event thread startup failed, "
"degraded printouts as result, errno=%d\n", errno);
do_event_thread= 0;
if (m_event_thread)
{
printf("Connected to Management Server at: %s:%d\n",
host, port);
void *res;
NdbThread_WaitFor(m_event_thread, &res);
NdbThread_Destroy(&m_event_thread);
}
ndb_mgm_disconnect(m_mgmsrv2);
}
}
else
{
DBUG_PRINT("warning",
("Could not do 2:nd connect to mgmtserver for event listening"));
DBUG_PRINT("info", ("code: %d, msg: %s",
ndb_mgm_get_latest_error(m_mgmsrv2),
ndb_mgm_get_latest_error_msg(m_mgmsrv2)));
printf("Warning, event connect failed, degraded printouts as result\n");
printf("code: %d, msg: %s\n",
ndb_mgm_get_latest_error(m_mgmsrv2),
ndb_mgm_get_latest_error_msg(m_mgmsrv2));
}
m_connected= true;
DBUG_PRINT("info",("Connected to Management Server at: %s:%d", host, port));
if (m_verbose)
{
printf("Connected to Management Server at: %s:%d\n",
host, port);
}
DBUG_RETURN(m_connected);
}
......@@ -576,20 +578,18 @@ bool
CommandInterpreter::disconnect()
{
DBUG_ENTER("CommandInterpreter::disconnect");
if (m_event_thread) {
void *res;
do_event_thread= 0;
NdbThread_WaitFor(m_event_thread, &res);
NdbThread_Destroy(&m_event_thread);
m_event_thread= 0;
ndb_mgm_disconnect(m_mgmsrv2);
ndb_mgm_destroy_handle(&m_mgmsrv2);
}
if (m_connected)
{
if (ndb_mgm_disconnect(m_mgmsrv) == -1) {
ndbout_c("Could not disconnect from management server");
printError();
}
ndb_mgm_destroy_handle(&m_mgmsrv);
m_connected= false;
}
DBUG_RETURN(true);
......@@ -1066,28 +1066,39 @@ CommandInterpreter::executeShutdown(char* parameters)
ndbout << result << " NDB Cluster node(s) have shutdown." << endl;
int mgm_id= 0;
mgm_id= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
if (mgm_id == 0)
int nodeId= 0;
int this_mgmd= 0;
this_mgmd= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
while(get_next_nodeid(state, &nodeId, NDB_MGM_NODE_TYPE_MGM))
{
ndbout << "Unable to locate management server, "
<< "shutdown manually with <id> STOP"
<< endl;
return 1;
if(nodeId==this_mgmd)
continue;
ndbout << "Shutting down NDB Cluster management server nodeId="
<< nodeId << "...";
result = ndb_mgm_stop(m_mgmsrv, 1, &nodeId);
if (result <= 0) {
ndbout << " failed." << endl;
printError();
}
else
ndbout << "Done." << endl;
}
result = ndb_mgm_stop(m_mgmsrv, 1, &mgm_id);
ndbout << "Shutting down NDB Cluster management server nodeId="
<< this_mgmd << "...";
result= ndb_mgm_stop(m_mgmsrv, 1, &this_mgmd);
if (result <= 0) {
ndbout << "Shutdown of NDB Cluster management server failed." << endl;
ndbout << " failed." << endl;
printError();
if (result == 0)
return 1;
return result;
}
m_connected= false;
disconnect();
ndbout << "NDB Cluster management server shutdown." << endl;
else
{
ndbout << "Done." << endl;
ndbout << "Disconnecting to allow management server to shutdown."
<< endl;
disconnect();
}
ndbout << "NDB Cluster management servers shutdown." << endl;
return 0;
}
......@@ -1311,12 +1322,7 @@ CommandInterpreter::executeConnect(char* parameters)
{
disconnect();
if (!emptyString(parameters)) {
if (ndb_mgm_set_connectstring(m_mgmsrv,
BaseString(parameters).trim().c_str()))
{
printError();
return;
}
m_constr= BaseString(parameters).trim().c_str();
}
connect();
}
......@@ -1507,10 +1513,25 @@ CommandInterpreter::executeStop(Vector<BaseString> &command_list,
ndbout_c("NDB Cluster has shutdown.");
else
{
int mgm_id= 0;
int need_reconnect= 0;
mgm_id= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
ndbout << "Node";
for (int i= 0; i < no_of_nodes; i++)
ndbout << " " << node_ids[i];
{
if(node_ids[i] == mgm_id)
need_reconnect= 1;
else
ndbout << " " << node_ids[i];
}
ndbout_c(" has shutdown.");
if(need_reconnect)
{
ndbout << "You are connected to node " << mgm_id
<< ", disconnecting to allow it to shutdown"
<< endl;
disconnect();
}
}
}
}
......@@ -1640,9 +1661,16 @@ CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
ndbout_c("NDB Cluster is being restarted.");
else
{
int mgm_id= 0;
mgm_id= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
ndbout << "Node";
for (int i= 0; i < no_of_nodes; i++)
{
if(node_ids[i] == mgm_id)
disconnect();
ndbout << " " << node_ids[i];
}
ndbout_c(" is being restarted");
}
}
......
......@@ -60,9 +60,6 @@
#include <SignalSender.hpp>
extern bool g_StopServer;
extern bool g_RestartServer;
//#define MGM_SRV_DEBUG
#ifdef MGM_SRV_DEBUG
#define DEBUG(x) do ndbout << x << endl; while(0)
......@@ -932,6 +929,13 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId,
* client connection to that mgmd and stop it that way.
* This allows us to stop mgm servers when there isn't any real
* distributed communication up.
*
* node_ids.size()==0 means to stop all DB nodes.
* MGM nodes will *NOT* be stopped.
*
* If we work out we should be stopping or restarting ourselves,
* we return <0 in stopSelf for restart, >0 for stop
* and 0 for do nothing.
*/
int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
......@@ -941,7 +945,8 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
bool stop,
bool restart,
bool nostart,
bool initialStart)
bool initialStart,
int* stopSelf)
{
int error = 0;
DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
......@@ -990,12 +995,13 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
NodeId nodeId= 0;
int use_master_node= 0;
int do_send= 0;
int do_stop_self= 0;
*stopSelf= 0;
NdbNodeBitmask nodes_to_stop;
{
for (unsigned i= 0; i < node_ids.size(); i++)
{
nodeId= node_ids[i];
ndbout << "asked to stop " << nodeId << endl;
if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM)
nodes_to_stop.set(nodeId);
else if (nodeId != getOwnNodeId())
......@@ -1006,7 +1012,11 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
stoppedNodes.set(nodeId);
}
else
do_stop_self= 1;;
{
ndbout << "which is me" << endl;
*stopSelf= (restart)? -1 : 1;
stoppedNodes.set(nodeId);
}
}
}
int no_of_nodes_to_stop= nodes_to_stop.count();
......@@ -1039,14 +1049,6 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
nodes.set(nodeId);
}
}
nodeId= 0;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
{
if(nodeId==getOwnNodeId())
continue;
if(sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart)==0)
stoppedNodes.set(nodeId);
}
}
// now wait for the replies
......@@ -1153,11 +1155,9 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
}
}
if (!error && do_stop_self)
if (error && *stopSelf)
{
if (restart)
g_RestartServer= true;
g_StopServer= true;
*stopSelf= 0;
}
DBUG_RETURN(error);
}
......@@ -1167,7 +1167,7 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
*/
int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
int *stopCount, bool abort)
int *stopCount, bool abort, int* stopSelf)
{
if (!abort)
{
......@@ -1189,20 +1189,25 @@ int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
false,
false,
false,
false);
false,
stopSelf);
if (stopCount)
*stopCount= nodes.count();
return ret;
}
/*
* Perform system shutdown
* Perform DB nodes shutdown.
* MGM servers are left in their current state
*/
int MgmtSrvr::stop(int * stopCount, bool abort)
int MgmtSrvr::shutdownDB(int * stopCount, bool abort)
{
NodeBitmask nodes;
Vector<NodeId> node_ids;
int tmp;
int ret = sendSTOP_REQ(node_ids,
nodes,
0,
......@@ -1210,7 +1215,8 @@ int MgmtSrvr::stop(int * stopCount, bool abort)
true,
false,
false,
false);
false,
&tmp);
if (stopCount)
*stopCount = nodes.count();
return ret;
......@@ -1235,6 +1241,7 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
}
NodeBitmask nodes;
Vector<NodeId> node_ids;
int stopSelf;
int ret = sendSTOP_REQ(node_ids,
nodes,
singleUserNodeId,
......@@ -1242,7 +1249,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
false,
false,
false,
false);
false,
&stopSelf);
if (stopCount)
*stopCount = nodes.count();
return ret;
......@@ -1254,7 +1262,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
int * stopCount, bool nostart,
bool initialStart, bool abort)
bool initialStart, bool abort,
int *stopSelf)
{
NodeBitmask nodes;
int ret= sendSTOP_REQ(node_ids,
......@@ -1264,21 +1273,24 @@ int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
false,
true,
nostart,
initialStart);
initialStart,
stopSelf);
if (stopCount)
*stopCount = nodes.count();
return ret;
}
/*
* Perform system restart
* Perform restart of all DB nodes
*/
int MgmtSrvr::restart(bool nostart, bool initialStart,
bool abort, int * stopCount )
int MgmtSrvr::restartDB(bool nostart, bool initialStart,
bool abort, int * stopCount)
{
NodeBitmask nodes;
Vector<NodeId> node_ids;
int tmp;
int ret = sendSTOP_REQ(node_ids,
nodes,
0,
......@@ -1286,7 +1298,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart,
true,
true,
true,
initialStart);
initialStart,
&tmp);
if (ret)
return ret;
......
......@@ -253,12 +253,13 @@ public:
* @param processId: Id of the DB process to stop
* @return 0 if succeeded, otherwise: as stated above, plus:
*/
int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort);
int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort,
int *stopSelf);
/**
* Stop the system
* shutdown the DB nodes
*/
int stop(int * cnt = 0, bool abort = false);
int shutdownDB(int * cnt = 0, bool abort = false);
/**
* print version info about a node
......@@ -292,14 +293,14 @@ public:
*/
int restartNodes(const Vector<NodeId> &node_ids,
int *stopCount, bool nostart,
bool initialStart, bool abort);
bool initialStart, bool abort, int *stopSelf);
/**
* Restart the system
* Restart all DB nodes
*/
int restart(bool nostart, bool initialStart,
bool abort = false,
int * stopCount = 0);
int restartDB(bool nostart, bool initialStart,
bool abort = false,
int * stopCount = 0);
struct BackupEvent {
enum Event {
......@@ -503,7 +504,8 @@ private:
bool stop,
bool restart,
bool nostart,
bool initialStart);
bool initialStart,
int *stopSelf);
/**
* Check if it is possible to send a signal to a (DB) process
......
......@@ -35,6 +35,7 @@
#include <base64.h>
extern bool g_StopServer;
extern bool g_RestartServer;
extern EventLogger g_eventLogger;
static const unsigned int MAX_READ_TIMEOUT = 1000 ;
......@@ -267,6 +268,7 @@ MgmApiSession::MgmApiSession(class MgmtSrvr & mgm, NDB_SOCKET_TYPE sock)
m_output = new SocketOutputStream(sock);
m_parser = new Parser_t(commands, *m_input, true, true, true);
m_allocated_resources= new MgmtSrvr::Allocated_resources(m_mgmsrv);
m_stopSelf= 0;
DBUG_VOID_RETURN;
}
......@@ -286,6 +288,10 @@ MgmApiSession::~MgmApiSession()
NDB_CLOSE_SOCKET(m_socket);
m_socket= NDB_INVALID_SOCKET;
}
if(m_stopSelf < 0)
g_RestartServer= true;
if(m_stopSelf)
g_StopServer= true;
DBUG_VOID_RETURN;
}
......@@ -870,7 +876,8 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &,
&restarted,
nostart != 0,
initialstart != 0,
abort != 0);
abort != 0,
&m_stopSelf);
m_output->println("restart reply");
if(result != 0){
......@@ -894,7 +901,7 @@ MgmApiSession::restartAll(Parser<MgmApiSession>::Context &,
args.get("nostart", &nostart);
int count = 0;
int result = m_mgmsrv.restart(nostart, initialstart, abort, &count);
int result = m_mgmsrv.restartDB(nostart, initialstart, abort, &count);
m_output->println("restart reply");
if(result != 0)
......@@ -1013,7 +1020,7 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
int stopped= 0;
int result= 0;
if (nodes.size())
result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0);
result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0, &m_stopSelf);
m_output->println("stop reply");
if(result != 0)
......@@ -1032,7 +1039,7 @@ MgmApiSession::stopAll(Parser<MgmApiSession>::Context &,
Uint32 abort;
args.get("abort", &abort);
int result = m_mgmsrv.stop(&stopped, abort != 0);
int result = m_mgmsrv.shutdownDB(&stopped, abort != 0);
m_output->println("stop reply");
if(result != 0)
......
......@@ -40,6 +40,7 @@ private:
Parser_t *m_parser;
MgmtSrvr::Allocated_resources *m_allocated_resources;
char m_err_str[1024];
int m_stopSelf; // -1 is restart, 0 do nothing, 1 stop
void getConfig_common(Parser_t::Context &ctx,
const class Properties &args,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment