Commit 526f2ddb authored by unknown's avatar unknown

Bug fix for testBasic -n MassiveRollback, a bug in LCP in LQH


ndb/src/kernel/blocks/dblqh/Dblqh.hpp:
  max 4 replicas -> 3 next nodes
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  1) If abort while in fragwaitqueue - just abort
  2) Fix for ACC_LCPSTARTED arrives after one ACC_LCPCONF has arrived
ndb/src/ndbapi/Ndbif.cpp:
  Be more "forgiving" in debug mode
ndb/test/ndbapi/testBasic.cpp:
  Accept timeouts in MassiveTimeout
ndb/test/src/HugoTransactions.cpp:
  Print batch size
parent 90153034
......@@ -410,7 +410,6 @@
*/
class Dblqh: public SimulatedBlock {
public:
enum LcpCloseState {
LCP_IDLE = 0,
LCP_RUNNING = 1, // LCP is running
......@@ -1990,7 +1989,6 @@ public:
UintR nextTcLogQueue;
UintR nextTc;
UintR nextTcConnectrec;
Uint16 nodeAfterNext[2];
UintR prevHashRec;
UintR prevLogTcrec;
UintR prevTc;
......@@ -2027,6 +2025,7 @@ public:
Uint16 nextReplica;
Uint16 primKeyLen;
Uint16 save1;
Uint16 nodeAfterNext[3];
Uint8 activeCreat;
Uint8 apiVersionNo;
......
......@@ -3574,7 +3574,6 @@ void Dblqh::prepareContinueAfterBlockedLab(Signal* signal)
key.scanNumber = KeyInfo20::getScanNo(regTcPtr->tcScanInfo);
key.fragPtrI = fragptr.i;
c_scanTakeOverHash.find(scanptr, key);
ndbassert(scanptr.i != RNIL);
}
if (scanptr.i == RNIL) {
jam();
......@@ -5995,10 +5994,15 @@ void Dblqh::abortStateHandlerLab(Signal* signal)
break;
case TcConnectionrec::STOPPED:
jam();
/* ------------------------------------------------------------------------- */
/*WE ARE CURRENTLY QUEUED FOR ACCESS TO THE FRAGMENT BY A LOCAL CHECKPOINT. */
/* ------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------
* WE ARE CURRENTLY QUEUED FOR ACCESS TO THE FRAGMENT BY A LCP
* Since nothing has been done, just release operation
* i.e. no prepare log record has been written
* so no abort log records needs to be written
*/
releaseWaitQueue(signal);
continueAfterLogAbortWriteLab(signal);
return;
break;
case TcConnectionrec::WAIT_AI_AFTER_ABORT:
jam();
......@@ -9953,9 +9957,11 @@ void Dblqh::execLCP_HOLDOPCONF(Signal* signal)
return;
} else {
jam();
/* NO MORE HOLDOPS NEEDED */
lcpLocptr.p->lcpLocstate = LcpLocRecord::HOLDOP_READY;
checkLcpHoldop(signal);
if (lcpPtr.p->lcpState == LcpRecord::LCP_WAIT_ACTIVE_FINISH) {
if (fragptr.p->activeList == RNIL) {
jam();
......@@ -9973,6 +9979,7 @@ void Dblqh::execLCP_HOLDOPCONF(Signal* signal)
}//if
}//if
}//if
/* ----------------------- */
/* ELSE */
/* ------------------------------------------------------------------------
......@@ -10045,7 +10052,6 @@ void Dblqh::execTUP_LCPSTARTED(Signal* signal)
void Dblqh::lcpStartedLab(Signal* signal)
{
checkLcpStarted(signal);
if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) {
jam();
/* ----------------------------------------------------------------------
......@@ -10064,7 +10070,7 @@ void Dblqh::lcpStartedLab(Signal* signal)
sendAccContOp(signal); /* START OPERATIONS IN ACC */
moveAccActiveFrag(signal); /* MOVE FROM ACC BLOCKED LIST TO ACTIVE LIST
ON FRAGMENT */
}//if
}
/*---------------*/
/* ELSE */
/*-------------------------------------------------------------------------*/
......@@ -10125,7 +10131,7 @@ void Dblqh::execLQH_RESTART_OP(Signal* signal)
lcpPtr.i = signal->theData[1];
ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
if (fragptr.p->fragStatus == Fragrecord::BLOCKED) {
ndbrequire(fragptr.p->fragStatus == Fragrecord::BLOCKED);
if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) {
jam();
/***********************************************************************/
......@@ -10134,10 +10140,7 @@ void Dblqh::execLQH_RESTART_OP(Signal* signal)
* REMOVED AS SOON AS ALL OPERATIONS HAVE BEEN STARTED.
***********************************************************************/
restartOperationsLab(signal);
return;
} else {
jam();
if (lcpPtr.p->lcpState == LcpRecord::LCP_BLOCKED_COMP) {
} else if (lcpPtr.p->lcpState == LcpRecord::LCP_BLOCKED_COMP) {
jam();
/*******************************************************************>
* THE CHECKPOINT IS COMPLETED BUT HAS NOT YET STARTED UP
......@@ -10146,11 +10149,9 @@ void Dblqh::execLQH_RESTART_OP(Signal* signal)
* FRAGMENT OF THE LOCAL CHECKPOINT TO AVOID ANY STRANGE ERRORS.
*******************************************************************> */
restartOperationsLab(signal);
return;
}//if
}//if
}//if
} else {
ndbrequire(false);
}
}//Dblqh::execLQH_RESTART_OP()
void Dblqh::restartOperationsLab(Signal* signal)
......@@ -10203,13 +10204,13 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
* WHEN ARRIVING HERE THE OPERATION IS ALREADY SET IN THE ACTIVE LIST.
* THUS WE CAN IMMEDIATELY CALL THE METHODS THAT EXECUTE FROM WHERE
* THE OPERATION WAS STOPPED.
*------------------------------------------------------------------------- */
*------------------------------------------------------------------------ */
switch (tcConnectptr.p->transactionState) {
case TcConnectionrec::STOPPED:
jam();
/*-----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND ACCKEYREQ
*----------------------------------------------------------------------- */
*---------------------------------------------------------------------- */
prepareContinueAfterBlockedLab(signal);
return;
break;
......@@ -10217,7 +10218,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND ACC_COMMITREQ
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
releaseActiveFrag(signal);
commitContinueAfterBlockedLab(signal);
return;
......@@ -10226,7 +10227,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND ACC_ABORTREQ
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
abortContinueAfterBlockedLab(signal, true);
return;
break;
......@@ -10234,7 +10235,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING COPY FRAGMENT
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
continueCopyAfterBlockedLab(signal);
return;
break;
......@@ -10242,7 +10243,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING COPY FRAGMENT
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
continueFirstCopyAfterBlockedLab(signal);
return;
break;
......@@ -10250,7 +10251,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING SCAN
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED;
continueFirstScanAfterBlockedLab(signal);
return;
......@@ -10259,7 +10260,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING SCAN
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED;
continueAfterCheckLcpStopBlocked(signal);
return;
......@@ -10268,7 +10269,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING SCAN
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED;
continueScanAfterBlockedLab(signal);
return;
......@@ -10278,7 +10279,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING RELEASE
* LOCKS IN SCAN
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
tcConnectptr.p->transactionState = TcConnectionrec::SCAN_STATE_USED;
continueScanReleaseAfterBlockedLab(signal);
return;
......@@ -10287,7 +10288,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING CLOSE OF SCAN
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
continueCloseScanAfterBlockedLab(signal);
return;
break;
......@@ -10295,7 +10296,7 @@ void Dblqh::restartOperationsAfterStopLab(Signal* signal)
jam();
/* ----------------------------------------------------------------------
* STOPPED BEFORE TRYING TO SEND NEXT_SCANREQ DURING CLOSE OF COPY
* ---------------------------------------------------------------------- */
* --------------------------------------------------------------------- */
continueCloseCopyAfterBlockedLab(signal);
return;
break;
......@@ -10421,7 +10422,12 @@ void Dblqh::contChkpNextFragLab(Signal* signal)
* ----------------------------------------------------------------------- */
if (fragptr.p->fragStatus == Fragrecord::BLOCKED) {
jam();
/**
* LCP of fragment complete
* but restarting of operations isn't
*/
lcpPtr.p->lcpState = LcpRecord::LCP_BLOCKED_COMP;
//restartOperationsLab(signal);
return;
}//if
......@@ -10698,25 +10704,25 @@ void Dblqh::checkLcpStarted(Signal* signal)
terrorCode = ZOK;
clsLcpLocptr.i = lcpPtr.p->firstLcpLocAcc;
int i = 0;
do {
ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord);
if (clsLcpLocptr.p->lcpLocstate != LcpLocRecord::ACC_STARTED) {
ndbrequire((clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_COMPLETED) ||
(clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_WAIT_STARTED));
if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_WAIT_STARTED){
return;
}//if
clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc;
i++;
} while (clsLcpLocptr.i != RNIL);
i = 0;
clsLcpLocptr.i = lcpPtr.p->firstLcpLocTup;
do {
ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord);
if (clsLcpLocptr.p->lcpLocstate != LcpLocRecord::TUP_STARTED) {
ndbrequire((clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_COMPLETED) ||
(clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_WAIT_STARTED));
if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_WAIT_STARTED){
return;
}//if
clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc;
i++;
} while (clsLcpLocptr.i != RNIL);
lcpPtr.p->lcpState = LcpRecord::LCP_STARTED;
}//Dblqh::checkLcpStarted()
......@@ -10874,18 +10880,28 @@ void Dblqh::sendAccContOp(Signal* signal)
{
LcpLocRecordPtr sacLcpLocptr;
int count = 0;
sacLcpLocptr.i = lcpPtr.p->firstLcpLocAcc;
do {
ptrCheckGuard(sacLcpLocptr, clcpLocrecFileSize, lcpLocRecord);
sacLcpLocptr.p->accContCounter = 0;
/* ------------------------------------------------------------------------- */
/*SEND START OPERATIONS TO ACC AGAIN */
/* ------------------------------------------------------------------------- */
if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_STARTED){
/* ------------------------------------------------------------------- */
/*SEND START OPERATIONS TO ACC AGAIN */
/* ------------------------------------------------------------------- */
signal->theData[0] = lcpPtr.p->lcpAccptr;
signal->theData[1] = sacLcpLocptr.p->locFragid;
sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA);
count++;
} else if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_COMPLETED){
signal->theData[0] = sacLcpLocptr.i;
sendSignal(reference(), GSN_ACC_CONTOPCONF, signal, 1, JBB);
} else {
ndbrequire(false);
}
sacLcpLocptr.i = sacLcpLocptr.p->nextLcpLoc;
} while (sacLcpLocptr.i != RNIL);
}//Dblqh::sendAccContOp()
/* ------------------------------------------------------------------------- */
......
......@@ -350,16 +350,11 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
return;
}
case GSN_TRANSID_AI:
{
case GSN_TRANSID_AI:{
tFirstDataPtr = int2void(tFirstData);
assert(tFirstDataPtr);
if (tFirstDataPtr == 0) goto InvalidSignal;
NdbReceiver* tRec = void2rec(tFirstDataPtr);
assert(tRec->checkMagicNumber());
assert(tRec->getTransaction());
assert(tRec->getTransaction()->checkState_TransId(((const TransIdAI*)tDataPtr)->transId));
if(tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) &&
NdbReceiver* tRec;
if (tFirstDataPtr && (tRec = void2rec(tFirstDataPtr)) &&
tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) &&
tCon->checkState_TransId(((const TransIdAI*)tDataPtr)->transId)){
Uint32 com;
if(aSignal->m_noOfSections > 0){
......@@ -380,7 +375,7 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
break;
case NdbReceiver::NDB_SCANRECEIVER:
tCon->theScanningOp->receiver_delivered(tRec);
theWaiter.m_state = (tWaitState == WAIT_SCAN? NO_WAIT: tWaitState);
theWaiter.m_state = (tWaitState == WAIT_SCAN ? NO_WAIT : tWaitState);
break;
default:
goto InvalidSignal;
......@@ -388,7 +383,11 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
}
break;
} else {
goto InvalidSignal;
/**
* This is ok as transaction can have been aborted before TRANSID_AI
* arrives (if TUP on other node than TC)
*/
return;
}
}
case GSN_TCKEY_FAILCONF:
......@@ -695,7 +694,8 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
(tCon = void2con(tFirstDataPtr)) && (tCon->checkMagicNumber() == 0)){
if(aSignal->m_noOfSections > 0){
tReturnCode = tCon->receiveSCAN_TABCONF(aSignal, ptr[0].p, ptr[0].sz);
tReturnCode = tCon->receiveSCAN_TABCONF(aSignal,
ptr[0].p, ptr[0].sz);
} else {
tReturnCode =
tCon->receiveSCAN_TABCONF(aSignal,
......@@ -730,10 +730,9 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
}
case GSN_KEYINFO20: {
tFirstDataPtr = int2void(tFirstData);
if (tFirstDataPtr == 0) goto InvalidSignal;
NdbReceiver* tRec = void2rec(tFirstDataPtr);
if(tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) &&
NdbReceiver* tRec;
if (tFirstDataPtr && (tRec = void2rec(tFirstDataPtr)) &&
tRec->checkMagicNumber() && (tCon = tRec->getTransaction()) &&
tCon->checkState_TransId(&((const KeyInfo20*)tDataPtr)->transId1)){
Uint32 len = ((const KeyInfo20*)tDataPtr)->keyLen;
......@@ -756,8 +755,13 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3])
goto InvalidSignal;
}
break;
} else {
/**
* This is ok as transaction can have been aborted before KEYINFO20
* arrives (if TUP on other node than TC)
*/
return;
}
goto InvalidSignal;
}
case GSN_TCINDXCONF:{
tFirstDataPtr = int2void(tFirstData);
......
......@@ -962,6 +962,7 @@ int runMassiveRollback(NDBT_Context* ctx, NDBT_Step* step){
const Uint32 OPS_TOTAL = 4096;
for(int row = 0; row < records; row++){
int res;
CHECK(hugoOps.startTransaction(pNdb) == 0);
for(int i = 0; i<OPS_TOTAL; i += OPS_PER_TRANS){
for(int j = 0; j<OPS_PER_TRANS; j++){
......@@ -972,7 +973,12 @@ int runMassiveRollback(NDBT_Context* ctx, NDBT_Step* step){
if(result != NDBT_OK){
break;
}
CHECK(hugoOps.execute_NoCommit(pNdb) == 0);
res = hugoOps.execute_NoCommit(pNdb);
if(res != 0){
NdbError err = pNdb->getNdbError(res);
CHECK(err.classification == NdbError::TimeoutExpired);
break;
}
}
if(result != NDBT_OK){
break;
......
......@@ -1364,7 +1364,7 @@ HugoTransactions::pkUpdateRecords(Ndb* pNdb,
allocRows(batch);
g_info << "|- Updating records..." << endl;
g_info << "|- Updating records (batch=" << batch << ")..." << endl;
while (r < records){
if (retryAttempt >= retryMax){
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment