Commit 80abad58 authored by unknown's avatar unknown

bug#10358 - ndb

  Cluster failure with non started nodes can result in timedout transactions


ndb/src/mgmapi/mgmapi.cpp:
  Increase timeout for restarts
ndb/src/ndbapi/ClusterMgr.cpp:
  Report NFCOMPLETEREP if no alive node exists 
    (instead of no connected node exists)
ndb/src/ndbapi/ClusterMgr.hpp:
  Report NFCOMPLETEREP if no alive node exists 
    (instead of no connected node exists)
parent 55c9c4d7
...@@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list, ...@@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
args.put("initialstart", initial); args.put("initialstart", initial);
args.put("nostart", nostart); args.put("nostart", nostart);
const Properties *reply; const Properties *reply;
const int timeout = handle->read_timeout;
handle->read_timeout= 5*60*1000; // 5 minutes
reply = ndb_mgm_call(handle, restart_reply, "restart all", &args); reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
handle->read_timeout= timeout;
CHECK_REPLY(reply, -1); CHECK_REPLY(reply, -1);
BaseString result; BaseString result;
...@@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list, ...@@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
args.put("nostart", nostart); args.put("nostart", nostart);
const Properties *reply; const Properties *reply;
const int timeout = handle->read_timeout;
handle->read_timeout= 5*60*1000; // 5 minutes
reply = ndb_mgm_call(handle, restart_reply, "restart node", &args); reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
handle->read_timeout= timeout;
if(reply != NULL) { if(reply != NULL) {
BaseString result; BaseString result;
reply->get("result", result); reply->get("result", result);
......
...@@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): ...@@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
{ {
ndbSetOwnVersion(); ndbSetOwnVersion();
clusterMgrThreadMutex = NdbMutex_Create(); clusterMgrThreadMutex = NdbMutex_Create();
noOfAliveNodes= 0;
noOfConnectedNodes= 0; noOfConnectedNodes= 0;
theClusterMgrThread= 0; theClusterMgrThread= 0;
} }
...@@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ ...@@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
node.m_state = apiRegConf->nodeState; node.m_state = apiRegConf->nodeState;
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED || if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){ node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
node.m_alive = true; set_node_alive(node, true);
} else { } else {
node.m_alive = false; set_node_alive(node, false);
}//if }//if
node.hbSent = 0; node.hbSent = 0;
node.hbCounter = 0; node.hbCounter = 0;
...@@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){ ...@@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
assert(node.defined == true); assert(node.defined == true);
node.compatible = false; node.compatible = false;
node.m_alive = false; set_node_alive(node, false);
node.m_state = NodeState::SL_NOTHING; node.m_state = NodeState::SL_NOTHING;
node.m_info.m_version = ref->version; node.m_info.m_version = ref->version;
...@@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){ ...@@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
Node & theNode = theNodes[nodeId]; Node & theNode = theNodes[nodeId];
theNode.m_alive = false; set_node_alive(theNode, false);
if(theNode.connected) if(theNode.connected)
theFacade.doDisconnect(nodeId); theFacade.doDisconnect(nodeId);
...@@ -449,8 +450,8 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){ ...@@ -449,8 +450,8 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
} }
theNode.nfCompleteRep = false; theNode.nfCompleteRep = false;
if(noOfConnectedNodes == 0){ if(noOfAliveNodes == 0){
NFCompleteRep rep; NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NODES; i++){ for(Uint32 i = 1; i<MAX_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){ if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
......
...@@ -80,6 +80,7 @@ public: ...@@ -80,6 +80,7 @@ public:
Uint32 getNoOfConnectedNodes() const; Uint32 getNoOfConnectedNodes() const;
private: private:
Uint32 noOfAliveNodes;
Uint32 noOfConnectedNodes; Uint32 noOfConnectedNodes;
Node theNodes[MAX_NODES]; Node theNodes[MAX_NODES];
NdbThread* theClusterMgrThread; NdbThread* theClusterMgrThread;
...@@ -100,6 +101,19 @@ private: ...@@ -100,6 +101,19 @@ private:
void execAPI_REGREF (const Uint32 * theData); void execAPI_REGREF (const Uint32 * theData);
void execNODE_FAILREP (const Uint32 * theData); void execNODE_FAILREP (const Uint32 * theData);
void execNF_COMPLETEREP(const Uint32 * theData); void execNF_COMPLETEREP(const Uint32 * theData);
inline void set_node_alive(Node& node, bool alive){
if(node.m_alive && !alive)
{
assert(noOfAliveNodes);
noOfAliveNodes--;
}
else if(!node.m_alive && alive)
{
noOfAliveNodes++;
}
node.m_alive = alive;
}
}; };
inline inline
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment