Commit 5a25026d authored by unknown's avatar unknown

ndb - bug#15695 bug#16447 bug#18612

  For various reasone have a partitioned cluster been created
  This patch makes sure that when they connect
  1) it's detected
  2) shutdown is forced


ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
  New dump/error insert for simulating network failure
ndb/src/kernel/blocks/qmgr/Qmgr.hpp:
  1) Activly detect paritioned cluster(s)
  2) add some documentation
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  1) Activly detect paritioned cluster(s)
  2) add some documentation
parent 2a00c516
...@@ -133,6 +133,9 @@ Cmvmi::~Cmvmi() ...@@ -133,6 +133,9 @@ Cmvmi::~Cmvmi()
{ {
} }
#ifdef ERROR_INSERT
NodeBitmask c_error_9000_nodes_mask;
#endif
void Cmvmi::execNDB_TAMPER(Signal* signal) void Cmvmi::execNDB_TAMPER(Signal* signal)
{ {
...@@ -390,6 +393,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) ...@@ -390,6 +393,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const Uint32 len = signal->getLength(); const Uint32 len = signal->getLength();
if(len == 2){ if(len == 2){
#ifdef ERROR_INSERT
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
globalTransporterRegistry.do_connect(tStartingNode); globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO); globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
...@@ -400,11 +408,18 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) ...@@ -400,11 +408,18 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
signal->theData[1] = tStartingNode; signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//----------------------------------------------------- //-----------------------------------------------------
}
} else { } else {
for(unsigned int i = 1; i < MAX_NODES; i++ ) { for(unsigned int i = 1; i < MAX_NODES; i++ ) {
jam(); jam();
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
jam(); jam();
#ifdef ERROR_INSERT
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
continue;
#endif
globalTransporterRegistry.do_connect(i); globalTransporterRegistry.do_connect(i);
globalTransporterRegistry.setIOState(i, HaltIO); globalTransporterRegistry.setIOState(i, HaltIO);
...@@ -1010,7 +1025,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1010,7 +1025,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){ Uint32 arg = dumpState->args[0];
if (arg == DumpStateOrd::CmvmiDumpConnections){
for(unsigned int i = 1; i < MAX_NODES; i++ ){ for(unsigned int i = 1; i < MAX_NODES; i++ ){
const char* nodeTypeStr = ""; const char* nodeTypeStr = "";
switch(getNodeInfo(i).m_type){ switch(getNodeInfo(i).m_type){
...@@ -1043,13 +1059,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1043,13 +1059,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){ if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getSize(),
g_sectionSegmentPool.getNoOfFree()); g_sectionSegmentPool.getNoOfFree());
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
{ {
if(signal->getLength() == 1) if(signal->getLength() == 1)
{ {
...@@ -1069,7 +1085,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1069,7 +1085,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
unsigned i; unsigned i;
Uint32 loopCount = dumpState->args[1]; Uint32 loopCount = dumpState->args[1];
const unsigned len0 = 11; const unsigned len0 = 11;
...@@ -1097,6 +1113,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1097,6 +1113,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2); sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
} }
#ifdef ERROR_INSERT
if (arg == 9000)
{
SET_ERROR_INSERT_VALUE(9000);
for (Uint32 i = 1; i<signal->getLength(); i++)
c_error_9000_nodes_mask.set(signal->theData[i]);
}
if (arg == 9001)
{
CLEAR_ERROR_INSERT_VALUE;
for (Uint32 i = 0; i<MAX_NODES; i++)
{
if (c_error_9000_nodes_mask.get(i))
{
signal->theData[0] = 0;
signal->theData[1] = i;
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
}
}
c_error_9000_nodes_mask.clear();
}
#endif
#ifdef VM_TRACE #ifdef VM_TRACE
#if 0 #if 0
{ {
......
...@@ -100,7 +100,12 @@ public: ...@@ -100,7 +100,12 @@ public:
}; };
struct StartRecord { struct StartRecord {
void reset(){ m_startKey++; m_startNode = 0;} void reset(){
m_startKey++;
m_startNode = 0;
m_gsn = RNIL;
m_nodes.clearWaitingFor();
}
Uint32 m_startKey; Uint32 m_startKey;
Uint32 m_startNode; Uint32 m_startNode;
Uint64 m_startTimeout; Uint64 m_startTimeout;
...@@ -112,6 +117,14 @@ public: ...@@ -112,6 +117,14 @@ public:
NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes NodeBitmask c_connectedNodes; // All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_cmregreq_nodes;
Uint32 c_maxDynamicId; Uint32 c_maxDynamicId;
// Records // Records
...@@ -251,8 +264,10 @@ private: ...@@ -251,8 +264,10 @@ private:
// Generated statement blocks // Generated statement blocks
void startphase1(Signal* signal); void startphase1(Signal* signal);
void electionWon(); void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal); void cmInfoconf010Lab(Signal* signal);
bool check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
void apiHbHandlingLab(Signal* signal); void apiHbHandlingLab(Signal* signal);
void timerHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal); void hbReceivedLab(Signal* signal);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment