ndb - bug#22013

    Fix bug in event handling wrt early node shutdown
parent 12911bb5
...@@ -137,7 +137,10 @@ MgmtSrvr::logLevelThreadRun() ...@@ -137,7 +137,10 @@ MgmtSrvr::logLevelThreadRun()
m_started_nodes.erase(0, false); m_started_nodes.erase(0, false);
m_started_nodes.unlock(); m_started_nodes.unlock();
setEventReportingLevelImpl(node, req); if (setEventReportingLevelImpl(node, req))
{
ndbout_c("setEventReportingLevelImpl(%d): failed", node);
}
SetLogLevelOrd ord; SetLogLevelOrd ord;
ord = m_nodeLogLevel[node]; ord = m_nodeLogLevel[node];
...@@ -155,10 +158,16 @@ MgmtSrvr::logLevelThreadRun() ...@@ -155,10 +158,16 @@ MgmtSrvr::logLevelThreadRun()
m_log_level_requests.erase(0, false); m_log_level_requests.erase(0, false);
m_log_level_requests.unlock(); m_log_level_requests.unlock();
if(req.blockRef == 0){ if(req.blockRef == 0)
{
req.blockRef = _ownReference; req.blockRef = _ownReference;
setEventReportingLevelImpl(0, req); if (setEventReportingLevelImpl(0, req))
} else { {
ndbout_c("setEventReportingLevelImpl: failed 2!");
}
}
else
{
SetLogLevelOrd ord; SetLogLevelOrd ord;
ord = req; ord = req;
setNodeLogLevelImpl(req.blockRef, ord); setNodeLogLevelImpl(req.blockRef, ord);
...@@ -1376,9 +1385,6 @@ int MgmtSrvr::restartDB(bool nostart, bool initialStart, ...@@ -1376,9 +1385,6 @@ int MgmtSrvr::restartDB(bool nostart, bool initialStart,
NodeId nodeId = 0; NodeId nodeId = 0;
NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime; NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
ndbout_c(" %d", nodes.get(1));
ndbout_c(" %d", nodes.get(2));
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) { while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
if (!nodes.get(nodeId)) if (!nodes.get(nodeId))
continue; continue;
...@@ -1584,6 +1590,11 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId, ...@@ -1584,6 +1590,11 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
} }
} }
if (nodes.isclear())
{
return SEND_OR_RECEIVE_FAILED;
}
int error = 0; int error = 0;
while (!nodes.isclear()) while (!nodes.isclear())
{ {
...@@ -1600,16 +1611,24 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId, ...@@ -1600,16 +1611,24 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
error = 1; error = 1;
break; break;
} }
// Since sending okToSend(true),
// there is no guarantee that NF_COMPLETEREP will come
// i.e listen also to NODE_FAILREP
case GSN_NODE_FAILREP: {
const NodeFailRep * const rep =
CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
NdbNodeBitmask mask;
mask.assign(NdbNodeBitmask::Size, rep->theNodes);
nodes.bitANDC(mask);
break;
}
case GSN_NF_COMPLETEREP:{ case GSN_NF_COMPLETEREP:{
const NFCompleteRep * const rep = const NFCompleteRep * const rep =
CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr()); CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
nodes.clear(rep->failedNodeId); nodes.clear(rep->failedNodeId);
break; break;
} }
case GSN_NODE_FAILREP:{
// ignore, NF_COMPLETEREP will arrive later
break;
}
default: default:
report_unknown_signal(signal); report_unknown_signal(signal);
return SEND_OR_RECEIVE_FAILED; return SEND_OR_RECEIVE_FAILED;
...@@ -1909,7 +1928,10 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete) ...@@ -1909,7 +1928,10 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete)
theData[1] = nodeId; theData[1] = nodeId;
if (alive) { if (alive) {
if (nodeTypes[nodeId] == NODE_TYPE_DB)
{
m_started_nodes.push_back(nodeId); m_started_nodes.push_back(nodeId);
}
rep->setEventType(NDB_LE_Connected); rep->setEventType(NDB_LE_Connected);
} else { } else {
rep->setEventType(NDB_LE_Disconnected); rep->setEventType(NDB_LE_Disconnected);
......
...@@ -507,6 +507,7 @@ ClusterMgr::reportConnected(NodeId nodeId){ ...@@ -507,6 +507,7 @@ ClusterMgr::reportConnected(NodeId nodeId){
theNode.m_info.m_version = 0; theNode.m_info.m_version = 0;
theNode.compatible = true; theNode.compatible = true;
theNode.nfCompleteRep = true; theNode.nfCompleteRep = true;
theNode.m_state.startLevel = NodeState::SL_NOTHING;
theFacade.ReportNodeAlive(nodeId); theFacade.ReportNodeAlive(nodeId);
} }
...@@ -518,14 +519,13 @@ ClusterMgr::reportDisconnected(NodeId nodeId){ ...@@ -518,14 +519,13 @@ ClusterMgr::reportDisconnected(NodeId nodeId){
noOfConnectedNodes--; noOfConnectedNodes--;
theNodes[nodeId].connected = false; theNodes[nodeId].connected = false;
theNodes[nodeId].m_state.m_connected_nodes.clear(); theNodes[nodeId].m_state.m_connected_nodes.clear();
reportNodeFailed(nodeId); reportNodeFailed(nodeId, true);
} }
void void
ClusterMgr::reportNodeFailed(NodeId nodeId){ ClusterMgr::reportNodeFailed(NodeId nodeId, bool disconnect){
Node & theNode = theNodes[nodeId]; Node & theNode = theNodes[nodeId];
...@@ -536,10 +536,11 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){ ...@@ -536,10 +536,11 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
{ {
theFacade.doDisconnect(nodeId); theFacade.doDisconnect(nodeId);
} }
const bool report = (theNode.m_state.startLevel != NodeState::SL_NOTHING); const bool report = (theNode.m_state.startLevel != NodeState::SL_NOTHING);
theNode.m_state.startLevel = NodeState::SL_NOTHING; theNode.m_state.startLevel = NodeState::SL_NOTHING;
if(report) if(disconnect || report)
{ {
theFacade.ReportNodeDead(nodeId); theFacade.ReportNodeDead(nodeId);
} }
......
...@@ -97,7 +97,7 @@ private: ...@@ -97,7 +97,7 @@ private:
NdbMutex* clusterMgrThreadMutex; NdbMutex* clusterMgrThreadMutex;
void showState(NodeId nodeId); void showState(NodeId nodeId);
void reportNodeFailed(NodeId nodeId); void reportNodeFailed(NodeId nodeId, bool disconnect = false);
/** /**
* Signals received * Signals received
......
...@@ -19,6 +19,14 @@ ...@@ -19,6 +19,14 @@
#include <signaldata/NFCompleteRep.hpp> #include <signaldata/NFCompleteRep.hpp>
#include <signaldata/NodeFailRep.hpp> #include <signaldata/NodeFailRep.hpp>
static
void
require(bool x)
{
if (!x)
abort();
}
SimpleSignal::SimpleSignal(bool dealloc){ SimpleSignal::SimpleSignal(bool dealloc){
memset(this, 0, sizeof(* this)); memset(this, 0, sizeof(* this));
deallocSections = dealloc; deallocSections = dealloc;
...@@ -145,6 +153,7 @@ SignalSender::waitFor(Uint32 timeOutMillis, T & t) ...@@ -145,6 +153,7 @@ SignalSender::waitFor(Uint32 timeOutMillis, T & t)
{ {
SimpleSignal * s = t.check(m_jobBuffer); SimpleSignal * s = t.check(m_jobBuffer);
if(s != 0){ if(s != 0){
m_usedBuffer.push_back(s);
return s; return s;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment