Commit d6531851 authored by unknown's avatar unknown

ndb - bug#34216

  testcases


storage/ndb/src/kernel/blocks/ERROR_codes.txt:
  new error codes
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  new error codes
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  new error codes
storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp:
  remove assert
storage/ndb/test/ndbapi/testNodeRestart.cpp:
  new testcase
  1) -n Bug34216
     Which tests node diying during multip-op commit
     Very controlled
  
  2) -n mixedmultiop
     Runs several threads "load" of same scenario...not very controlled
storage/ndb/test/run-test/daily-basic-tests.txt:
  new testcases
parent 2849e6df
......@@ -3,10 +3,10 @@ Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4029
Next DBLQH 5047
Next DBLQH 5050
Next DBDICT 6008
Next DBDIH 7195
Next DBTC 8054
Next DBTC 8058
Next CMVMI 9000
Next BACKUP 10038
Next DBUTIL 11002
......@@ -263,6 +263,9 @@ Delay execution of ABORTCONF signal 2 seconds to generate time-out.
8053: Crash in timeOutFoundLab, state CS_WAIT_COMMIT_CONF
5048: Crash in execCOMMIT
5049: SET_ERROR_INSERT_VALUE(5048)
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
......@@ -319,6 +322,8 @@ ABORT OF TCKEYREQ
8038 : Simulate API disconnect just after SCAN_TAB_REQ
8057 : Send only 1 COMMIT per timeslice
8052 : Simulate failure of TransactionBufferMemory allocation for OI lookup
8051 : Simulate failure of allocation for saveINDXKEYINFO
......
......@@ -5959,6 +5959,12 @@ void Dblqh::execCOMMIT(Signal* signal)
TcConnectionrec * const regTcPtr = tcConnectptr.p;
TRACE_OP(regTcPtr, "COMMIT");
CRASH_INSERTION(5048);
if (ERROR_INSERTED(5049))
{
SET_ERROR_INSERT_VALUE(5048);
}
commitReqLab(signal, gci);
return;
......
......@@ -4495,7 +4495,7 @@ void Dbtc::commit020Lab(Signal* signal)
if (localTcConnectptr.i != RNIL) {
Tcount = Tcount + 1;
if (Tcount < 16) {
if (Tcount < 16 && !ERROR_INSERTED(8057)) {
ptrCheckGuard(localTcConnectptr,
TtcConnectFilesize, localTcConnectRecord);
jam();
......@@ -4514,6 +4514,9 @@ void Dbtc::commit020Lab(Signal* signal)
}//if
} else {
jam();
if (ERROR_INSERTED(8057))
CLEAR_ERROR_INSERT_VALUE;
regApiPtr->apiConnectstate = CS_COMMIT_SENT;
return;
}//if
......
......@@ -486,7 +486,7 @@ void Dbtup::execTUP_COMMITREQ(Signal* signal)
*/
fix_commit_order(regOperPtr);
}
ndbassert(regOperPtr.p->is_first_operation());
//ndbassert(regOperPtr.p->is_first_operation());
regFragPtr.i= regOperPtr.p->fragmentPtr;
trans_state= get_trans_state(regOperPtr.p);
......
......@@ -23,6 +23,7 @@
#include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
#include <RefConvert.hpp>
#include <NdbEnv.h>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
......@@ -121,15 +122,57 @@ int runPkReadUntilStopped(NDBT_Context* ctx, NDBT_Step* step){
int runPkUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int records = ctx->getNumRecords();
int multiop = ctx->getProperty("MULTI_OP", 1);
Ndb* pNdb = GETNDB(step);
int i = 0;
HugoTransactions hugoTrans(*ctx->getTab());
while (ctx->isTestStopped() == false) {
HugoOperations hugoOps(*ctx->getTab());
while (ctx->isTestStopped() == false)
{
g_info << i << ": ";
int rows = (rand()%records)+1;
int batch = (rand()%rows)+1;
if (hugoTrans.pkUpdateRecords(GETNDB(step), rows, batch) != 0){
return NDBT_FAILED;
int batch = (rand()%records)+1;
int row = rand() % records;
if (batch > 25)
batch = 25;
if(row + batch > records)
batch = records - row;
if(hugoOps.startTransaction(pNdb) != 0)
goto err;
if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
goto err;
for (int j = 1; j<multiop; j++)
{
if(hugoOps.execute_NoCommit(pNdb) != 0)
goto err;
if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
goto err;
}
if(hugoOps.execute_Commit(pNdb) != 0)
goto err;
hugoOps.closeTransaction(pNdb);
continue;
err:
NdbConnection* pCon = hugoOps.getTransaction();
if(pCon == 0)
continue;
NdbError error = pCon->getNdbError();
hugoOps.closeTransaction(pNdb);
if (error.status == NdbError::TemporaryError){
NdbSleep_MilliSleep(50);
continue;
}
return NDBT_FAILED;
i++;
}
return result;
......@@ -230,7 +273,7 @@ int runRestarter(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
if(restarter.waitClusterStarted(60) != 0){
if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
return NDBT_FAILED;
}
......@@ -241,13 +284,27 @@ int runRestarter(NDBT_Context* ctx, NDBT_Step* step){
int id = lastId % restarter.getNumDbNodes();
int nodeId = restarter.getDbNodeId(id);
ndbout << "Restart node " << nodeId << endl;
if(restarter.restartOneDbNode(nodeId, false, false, true) != 0){
if(restarter.restartOneDbNode(nodeId, false, true, true) != 0){
g_err << "Failed to restartNextDbNode" << endl;
result = NDBT_FAILED;
break;
}
if(restarter.waitClusterStarted(60) != 0){
if (restarter.waitNodesNoStart(&nodeId, 1))
{
g_err << "Failed to waitNodesNoStart" << endl;
result = NDBT_FAILED;
break;
}
if (restarter.startNodes(&nodeId, 1))
{
g_err << "Failed to start node" << endl;
result = NDBT_FAILED;
break;
}
if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
result = NDBT_FAILED;
break;
......@@ -1883,6 +1940,178 @@ runBug32160(NDBT_Context* ctx, NDBT_Step* step)
return NDBT_OK;
}
int
runBug34216(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
NdbRestarter restarter;
int i = 0;
int lastId = 0;
HugoOperations hugoOps(*ctx->getTab());
int records = ctx->getNumRecords();
Ndb* pNdb = GETNDB(step);
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
return NDBT_FAILED;
}
char buf[100];
const char * off = NdbEnv_GetEnv("NDB_ERR_OFFSET", buf, sizeof(buf));
int offset = off ? atoi(off) : 0;
while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped())
{
int id = lastId % restarter.getNumDbNodes();
int nodeId = restarter.getDbNodeId(id);
int err = 5048 + ((i+offset) % 2);
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if(hugoOps.startTransaction(pNdb) != 0)
goto err;
nodeId = hugoOps.getTransaction()->getConnectedNodeId();
ndbout << "Restart node " << nodeId << " " << err <<endl;
if (restarter.dumpStateOneNode(nodeId, val2, 2))
return NDBT_FAILED;
if(restarter.insertErrorInNode(nodeId, err) != 0){
g_err << "Failed to restartNextDbNode" << endl;
result = NDBT_FAILED;
break;
}
if (restarter.insertErrorInNode(nodeId, 8057) != 0)
{
g_err << "Failed to insert error 8057" << endl;
result = NDBT_FAILED;
break;
}
int rows = 1;
int batch = 1;
int row = (records - rows) ? rand() % (records - rows) : 0;
if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
goto err;
for (int l = 1; l<5; l++)
{
if (hugoOps.execute_NoCommit(pNdb) != 0)
goto err;
if(hugoOps.pkUpdateRecord(pNdb, row, batch, rand()) != 0)
goto err;
}
hugoOps.execute_Commit(pNdb);
hugoOps.closeTransaction(pNdb);
if (restarter.waitNodesNoStart(&nodeId, 1))
{
g_err << "Failed to waitNodesNoStart" << endl;
result = NDBT_FAILED;
break;
}
if (restarter.startNodes(&nodeId, 1))
{
g_err << "Failed to startNodes" << endl;
result = NDBT_FAILED;
break;
}
if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
result = NDBT_FAILED;
break;
}
lastId++;
i++;
}
ctx->stopTest();
return result;
err:
return NDBT_FAILED;
}
int
runNF_commit(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
return NDBT_FAILED;
}
int i = 0;
while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped())
{
int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
int err = 5048;
ndbout << "Restart node " << nodeId << " " << err <<endl;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(nodeId, val2, 2))
return NDBT_FAILED;
if(restarter.insertErrorInNode(nodeId, err) != 0){
g_err << "Failed to restartNextDbNode" << endl;
result = NDBT_FAILED;
break;
}
if (restarter.waitNodesNoStart(&nodeId, 1))
{
g_err << "Failed to waitNodesNoStart" << endl;
result = NDBT_FAILED;
break;
}
if (restarter.startNodes(&nodeId, 1))
{
g_err << "Failed to startNodes" << endl;
result = NDBT_FAILED;
break;
}
if(restarter.waitClusterStarted() != 0){
g_err << "Cluster failed to start" << endl;
result = NDBT_FAILED;
break;
}
i++;
}
ctx->stopTest();
return result;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
......@@ -2255,6 +2484,21 @@ TESTCASE("Bug29364", ""){
TESTCASE("Bug32160", ""){
INITIALIZER(runBug32160);
}
TESTCASE("Bug34216", ""){
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runBug34216);
FINALIZER(runClearTable);
}
TESTCASE("mixedmultiop", ""){
TC_PROPERTY("MULTI_OP", 5);
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runNF_commit);
STEP(runPkUpdateUntilStopped);
STEP(runPkUpdateUntilStopped);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -1050,3 +1050,11 @@ max-time: 300
cmd: test_event
args: -n Bug33793 T1
max-time: 600
cmd: testNodeRestart
args: --nologging -n Bug34216 -l 10 T1 I3 D2
max-time: 1200
cmd: testNodeRestart
args: -n mixedmultiop -l 10 T1 I2 I3 D2
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment