bug#15632 - ndb

  Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a) by also waiting for starting nodes
parent c4e8859b
...@@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ]; ...@@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
*/ */
/*#define NDB_VERSION_ID 0*/ /*#define NDB_VERSION_ID 0*/
#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
#endif #endif
...@@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ. ...@@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ.
5007: 5007:
Delay GCP_SAVEREQ by 10 secs Delay GCP_SAVEREQ by 10 secs
7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId) ...@@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
signal->theData[2] = c_nodeStartMaster.failNr; signal->theData[2] = c_nodeStartMaster.failNr;
signal->theData[3] = 0; signal->theData[3] = 0;
signal->theData[4] = currentgcp; signal->theData[4] = currentgcp;
sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB); sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
}//Dbdih::sendINCL_NODEREQ() }//Dbdih::sendINCL_NODEREQ()
void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId) void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
...@@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal) ...@@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal)
// global checkpoint id and the correct state. We do not wait for any reply // global checkpoint id and the correct state. We do not wait for any reply
// since the starting node will not send any. // since the starting node will not send any.
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
(getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
{
c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
}
sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode); sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
}//Dbdih::gcpBlockedLab() }//Dbdih::gcpBlockedLab()
...@@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) ...@@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
jamEntry(); jamEntry();
Uint32 retRef = signal->theData[0]; Uint32 retRef = signal->theData[0];
Uint32 nodeId = signal->theData[1]; Uint32 nodeId = signal->theData[1];
if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
{
CLEAR_ERROR_INSERT_VALUE;
sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
return;
}
Uint32 tnodeStartFailNr = signal->theData[2]; Uint32 tnodeStartFailNr = signal->theData[2];
currentgcp = signal->theData[4]; currentgcp = signal->theData[4];
CRASH_INSERTION(7127); CRASH_INSERTION(7127);
...@@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal) ...@@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
// id's and the lcp status. // id's and the lcp status.
/*-----------------------------------------------------------------------*/ /*-----------------------------------------------------------------------*/
CRASH_INSERTION(7171); CRASH_INSERTION(7171);
Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
(NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
{
signal->theData[0] = getOwnNodeId();
signal->theData[1] = getOwnNodeId();
sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
}
return; return;
}//if }//if
if (getNodeStatus(nodeId) != NodeRecord::STARTING) { if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
...@@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal) ...@@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
/*------------------------------------------------------------------------*/ /*------------------------------------------------------------------------*/
// Verify that a starting node has also crashed. Reset the node start record. // Verify that a starting node has also crashed. Reset the node start record.
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
if (c_nodeStartMaster.startNode != RNIL) { if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE); {
BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
SystemError * const sysErr = (SystemError*)&signal->theData[0];
sysErr->errorCode = SystemError::StartInProgressError;
sysErr->errorRef = reference();
sysErr->data1= 0;
sysErr->data2= __LINE__;
sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
nodeResetStart();
}//if }//if
/*--------------------------------------------------*/ /*--------------------------------------------------*/
......
...@@ -446,6 +446,56 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -446,6 +446,56 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK; return NDBT_OK;
} }
int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
int nodeId = restarter.getDbNodeId(1);
ndbout << "Restart node " << nodeId << endl;
if (restarter.restartOneDbNode(nodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.insertErrorInNode(nodeId, 7165))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&nodeId, 1))
return NDBT_FAILED;
if (restarter.restartOneDbNode(nodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.insertErrorInNode(nodeId, 7171))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&nodeId, 1))
return NDBT_FAILED;
ctx->stopTest();
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
...@@ -596,6 +646,8 @@ TESTCASE("RestartNFDuringNR", ...@@ -596,6 +646,8 @@ TESTCASE("RestartNFDuringNR",
INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable); INITIALIZER(runLoadTable);
STEP(runRestarts); STEP(runRestarts);
STEP(runPkUpdateUntilStopped);
STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify); FINALIZER(runScanReadVerify);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
...@@ -685,6 +737,8 @@ TESTCASE("RestartNodeDuringLCP", ...@@ -685,6 +737,8 @@ TESTCASE("RestartNodeDuringLCP",
INITIALIZER(runCheckAllNodesStarted); INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable); INITIALIZER(runLoadTable);
STEP(runRestarts); STEP(runRestarts);
STEP(runPkUpdateUntilStopped);
STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify); FINALIZER(runScanReadVerify);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
...@@ -716,6 +770,12 @@ TESTCASE("Bug15587", ...@@ -716,6 +770,12 @@ TESTCASE("Bug15587",
STEP(runBug15587); STEP(runBug15587);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
TESTCASE("Bug15632",
"Test bug with NF during NR"){
INITIALIZER(runLoadTable);
STEP(runBug15632);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment