Commit 0c7ed64d authored by unknown's avatar unknown

Bug #13461 Slave Cluster crashed on restart of two data nodes in seperate groups

- ensure in ndb_mgmd that the stop command is not issued if a node is restarting
- added some new error messages
- in ndbcntr on master check so that node does not shutdown id shutdoen in progress
parent 62941621
...@@ -104,6 +104,7 @@ typedef ndbd_exit_classification_enum ndbd_exit_classification; ...@@ -104,6 +104,7 @@ typedef ndbd_exit_classification_enum ndbd_exit_classification;
/* NDBCNTR 6100-> */ /* NDBCNTR 6100-> */
#define NDBD_EXIT_RESTART_TIMEOUT 6100 #define NDBD_EXIT_RESTART_TIMEOUT 6100
#define NDBD_EXIT_RESTART_DURING_SHUTDOWN 6101
/* TC 6200-> */ /* TC 6200-> */
/* DIH 6300-> */ /* DIH 6300-> */
......
...@@ -525,6 +525,9 @@ Ndbcntr::execCNTR_START_REF(Signal * signal){ ...@@ -525,6 +525,9 @@ Ndbcntr::execCNTR_START_REF(Signal * signal){
cmasterNodeId = ref->masterNodeId; cmasterNodeId = ref->masterNodeId;
sendCntrStartReq(signal); sendCntrStartReq(signal);
return; return;
case CntrStartRef::StopInProgress:
jam();
progError(__LINE__, NDBD_EXIT_RESTART_DURING_SHUTDOWN);
} }
ndbrequire(false); ndbrequire(false);
} }
...@@ -2022,7 +2025,9 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ ...@@ -2022,7 +2025,9 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
return; return;
} }
if(c_stopRec.stopReq.senderRef != 0){ if(c_stopRec.stopReq.senderRef != 0 ||
(cmasterNodeId == getOwnNodeId() && !c_start.m_starting.isclear()))
{
/** /**
* Requested a system shutdown * Requested a system shutdown
*/ */
...@@ -2036,7 +2041,8 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ ...@@ -2036,7 +2041,8 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
/** /**
* Requested a node shutdown * Requested a node shutdown
*/ */
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) if(c_stopRec.stopReq.senderRef &&
StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
ref->errorCode = StopRef::SystemShutdownInProgress; ref->errorCode = StopRef::SystemShutdownInProgress;
else else
ref->errorCode = StopRef::NodeShutdownInProgress; ref->errorCode = StopRef::NodeShutdownInProgress;
......
...@@ -101,6 +101,9 @@ static const ErrStruct errArray[] = ...@@ -101,6 +101,9 @@ static const ErrStruct errArray[] =
{NDBD_EXIT_RESTART_TIMEOUT, XCE, {NDBD_EXIT_RESTART_TIMEOUT, XCE,
"Total restart time too long, consider increasing StartFailureTimeout " "Total restart time too long, consider increasing StartFailureTimeout "
"or investigate error(s) on other node(s)"}, "or investigate error(s) on other node(s)"},
{NDBD_EXIT_RESTART_DURING_SHUTDOWN, XRE,
"Node started while node shutdown in progress. "
"Please wait until shutdown complete before starting node"},
/* DIH */ /* DIH */
{NDBD_EXIT_MAX_CRASHED_REPLICAS, XFL, {NDBD_EXIT_MAX_CRASHED_REPLICAS, XFL,
......
...@@ -277,15 +277,13 @@ static ErrorItem errorTable[] = ...@@ -277,15 +277,13 @@ static ErrorItem errorTable[] =
{MgmtSrvr::NOT_POSSIBLE_TO_SEND_CONFIG_UPDATE_TO_PROCESS_TYPE, {MgmtSrvr::NOT_POSSIBLE_TO_SEND_CONFIG_UPDATE_TO_PROCESS_TYPE,
"It is not possible to send an update of a configuration variable " "It is not possible to send an update of a configuration variable "
"to this kind of process."}, "to this kind of process."},
{5026, "Node shutdown in progress" }, {MgmtSrvr::NODE_SHUTDOWN_IN_PROGESS, "Node shutdown in progress" },
{5027, "System shutdown in progress" }, {MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" },
{5028, "Node shutdown would cause system crash" }, {MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH,
{5029, "Only one shutdown at a time is possible via mgm server" }, "Node shutdown would cause system crash" },
{5060, "Operation not allowed in single user mode." }, {MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." },
{5061, "DB is not in single user mode." }, {MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP,
{5062, "The specified node is not an API node." }, "Operation not allowed while nodes are starting or stopping."},
{5063,
"Cannot enter single user mode. DB nodes in inconsistent startlevel."},
{MgmtSrvr::NO_CONTACT_WITH_DB_NODES, "No contact with database nodes" } {MgmtSrvr::NO_CONTACT_WITH_DB_NODES, "No contact with database nodes" }
}; };
...@@ -293,13 +291,13 @@ int MgmtSrvr::translateStopRef(Uint32 errCode) ...@@ -293,13 +291,13 @@ int MgmtSrvr::translateStopRef(Uint32 errCode)
{ {
switch(errCode){ switch(errCode){
case StopRef::NodeShutdownInProgress: case StopRef::NodeShutdownInProgress:
return 5026; return NODE_SHUTDOWN_IN_PROGESS;
break; break;
case StopRef::SystemShutdownInProgress: case StopRef::SystemShutdownInProgress:
return 5027; return SYSTEM_SHUTDOWN_IN_PROGRESS;
break; break;
case StopRef::NodeShutdownWouldCauseSystemCrash: case StopRef::NodeShutdownWouldCauseSystemCrash:
return 5028; return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
break; break;
} }
return 4999; return 4999;
...@@ -989,6 +987,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -989,6 +987,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
int MgmtSrvr::stopNode(int nodeId, bool abort) int MgmtSrvr::stopNode(int nodeId, bool abort)
{ {
if (!abort)
{
NodeId nodeId = 0;
ClusterMgr::Node node;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
{
node = theFacade->theClusterMgr->getNodeInfo(nodeId);
if((node.m_state.startLevel != NodeState::SL_STARTED) &&
(node.m_state.startLevel != NodeState::SL_NOTHING))
return OPERATION_NOT_ALLOWED_START_STOP;
}
}
NodeBitmask nodes; NodeBitmask nodes;
return sendSTOP_REQ(nodeId, return sendSTOP_REQ(nodeId,
nodes, nodes,
...@@ -1027,7 +1037,7 @@ int MgmtSrvr::stop(int * stopCount, bool abort) ...@@ -1027,7 +1037,7 @@ int MgmtSrvr::stop(int * stopCount, bool abort)
int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
{ {
if (getNodeType(singleUserNodeId) != NDB_MGM_NODE_TYPE_API) if (getNodeType(singleUserNodeId) != NDB_MGM_NODE_TYPE_API)
return 5062; return NODE_NOT_API_NODE;
NodeId nodeId = 0; NodeId nodeId = 0;
ClusterMgr::Node node; ClusterMgr::Node node;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
...@@ -1035,7 +1045,7 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) ...@@ -1035,7 +1045,7 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
node = theFacade->theClusterMgr->getNodeInfo(nodeId); node = theFacade->theClusterMgr->getNodeInfo(nodeId);
if((node.m_state.startLevel != NodeState::SL_STARTED) && if((node.m_state.startLevel != NodeState::SL_STARTED) &&
(node.m_state.startLevel != NodeState::SL_NOTHING)) (node.m_state.startLevel != NodeState::SL_NOTHING))
return 5063; return OPERATION_NOT_ALLOWED_START_STOP;
} }
NodeBitmask nodes; NodeBitmask nodes;
int ret = sendSTOP_REQ(0, int ret = sendSTOP_REQ(0,
......
...@@ -174,10 +174,12 @@ public: ...@@ -174,10 +174,12 @@ public:
STATIC_CONST( NODE_SHUTDOWN_IN_PROGESS = 5026 ); STATIC_CONST( NODE_SHUTDOWN_IN_PROGESS = 5026 );
STATIC_CONST( SYSTEM_SHUTDOWN_IN_PROGRESS = 5027 ); STATIC_CONST( SYSTEM_SHUTDOWN_IN_PROGRESS = 5027 );
STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 );
STATIC_CONST( NO_CONTACT_WITH_CLUSTER = 6666 );
STATIC_CONST( OPERATION_IN_PROGRESS = 6667 );
STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 );
STATIC_CONST( NODE_NOT_API_NODE = 5062 );
STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 );
/** /**
* This enum specifies the different signal loggig modes possible to set * This enum specifies the different signal loggig modes possible to set
* with the setSignalLoggingMode method. * with the setSignalLoggingMode method.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment