Commit 165d5390 authored by unknown's avatar unknown

bug#10987 - ndb - unable to find restorable replica

  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
    
  This variable is used when cutting redo (calcKeepGci)
  
  Also make sure complete GCI is run inbetween LCP's


ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
  
  This variable is used when cutting redo (calcKeepGci)
  Also make sure complete GCI is run inbetween LCP's
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
  
  This variable is used when cutting redo (calcKeepGci)
  Also make sure complete GCI is run inbetween LCP's
parent bbcb4a56
......@@ -774,7 +774,7 @@ private:
//------------------------------------
// Methods for LCP functionality
//------------------------------------
void checkKeepGci(Uint32 replicaStartIndex);
void checkKeepGci(TabRecordPtr, Uint32, Fragmentstore*, Uint32);
void checkLcpStart(Signal *, Uint32 lineNo);
void checkStartMoreLcp(Signal *, Uint32 nodeId);
bool reportLcpCompletion(const class LcpFragRep *);
......@@ -1292,7 +1292,7 @@ private:
}
Uint32 lcpStart;
Uint32 lcpStartGcp;
Uint32 lcpStopGcp;
Uint32 keepGci; /* USED TO CALCULATE THE GCI TO KEEP AFTER A LCP */
Uint32 oldestRestorableGci;
......@@ -1361,6 +1361,7 @@ private:
Uint32 cstarttype;
Uint32 csystemnodes;
Uint32 currentgcp;
Uint32 c_newest_restorable_gci;
enum GcpMasterTakeOverState {
GMTOS_IDLE = 0,
......
......@@ -674,6 +674,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
jam();
coldgcp = SYSFILE->newestRestorableGCI;
crestartGci = SYSFILE->newestRestorableGCI;
c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
currentgcp = coldgcp + 1;
cnewgcp = coldgcp + 1;
......@@ -692,6 +693,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
ok = true;
jam();
cgcpParticipantState = GCP_PARTICIPANT_COPY_GCI_RECEIVED;
c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
setNodeInfo(signal);
break;
}//if
......@@ -7749,6 +7751,8 @@ void Dbdih::execCOPY_GCICONF(Signal* signal)
signal->theData[1] = coldgcp;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
c_newest_restorable_gci = coldgcp;
CRASH_INSERTION(7004);
emptyWaitGCPMasterQueue(signal);
cgcpStatus = GCP_READY;
......@@ -9155,7 +9159,7 @@ void Dbdih::checkTcCounterLab(Signal* signal)
}//if
c_lcpState.ctimer += 32;
if ((c_nodeStartMaster.blockLcp == true) ||
((c_lcpState.lcpStartGcp + 1) > currentgcp)) {
(c_lcpState.lcpStopGcp >= c_newest_restorable_gci)) {
jam();
/* --------------------------------------------------------------------- */
// No reason to start juggling the states and checking for start of LCP if
......@@ -9238,7 +9242,6 @@ void Dbdih::execTCGETOPSIZECONF(Signal* signal)
/* ----------------------------------------------------------------------- */
c_lcpState.ctimer = 0;
c_lcpState.keepGci = coldgcp;
c_lcpState.lcpStartGcp = currentgcp;
/* ----------------------------------------------------------------------- */
/* UPDATE THE NEW LATEST LOCAL CHECKPOINT ID. */
/* ----------------------------------------------------------------------- */
......@@ -9310,7 +9313,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
cnoOfActiveTables++;
FragmentstorePtr fragPtr;
getFragstore(tabPtr.p, fragId, fragPtr);
checkKeepGci(fragPtr.p->storedReplicas);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
fragId++;
if (fragId >= tabPtr.p->totalfragments) {
jam();
......@@ -10168,6 +10171,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal)
signal->theData[0] = EventReport::LocalCheckpointCompleted; //Event type
signal->theData[1] = SYSFILE->latestLCP_ID;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
c_lcpState.lcpStopGcp = c_newest_restorable_gci;
/**
* Start checking for next LCP
......@@ -10522,7 +10526,8 @@ void Dbdih::checkEscalation()
/* DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL */
/* CHECKPOINT. */
/*************************************************************************/
void Dbdih::checkKeepGci(Uint32 replicaStartIndex)
void Dbdih::checkKeepGci(TabRecordPtr tabPtr, Uint32 fragId, Fragmentstore*,
Uint32 replicaStartIndex)
{
ReplicaRecordPtr ckgReplicaPtr;
ckgReplicaPtr.i = replicaStartIndex;
......@@ -10544,7 +10549,6 @@ void Dbdih::checkKeepGci(Uint32 replicaStartIndex)
if (oldestRestorableGci > c_lcpState.oldestRestorableGci) {
jam();
c_lcpState.oldestRestorableGci = oldestRestorableGci;
ndbrequire(((int)c_lcpState.oldestRestorableGci) >= 0);
}//if
ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
}//while
......@@ -10838,7 +10842,7 @@ void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr,
do {
ndbrequire(lcpNo < MAX_LCP_STORED);
if (fmgReplicaPtr.p->lcpStatus[lcpNo] == ZVALID &&
fmgReplicaPtr.p->maxGciStarted[lcpNo] <= coldgcp)
fmgReplicaPtr.p->maxGciStarted[lcpNo] < c_newest_restorable_gci)
{
jam();
keepGci = fmgReplicaPtr.p->maxGciCompleted[lcpNo];
......@@ -10960,7 +10964,7 @@ void Dbdih::initCommonData()
c_lcpState.clcpDelay = 0;
c_lcpState.lcpStart = ZIDLE;
c_lcpState.lcpStartGcp = 0;
c_lcpState.lcpStopGcp = 0;
c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
c_lcpState.currentFragment.tableId = 0;
c_lcpState.currentFragment.fragmentId = 0;
......@@ -10996,6 +11000,7 @@ void Dbdih::initCommonData()
csystemnodes = 0;
c_updateToLock = RNIL;
currentgcp = 0;
c_newest_restorable_gci = 0;
cverifyQueueCounter = 0;
cwaitLcpSr = false;
......@@ -11067,6 +11072,7 @@ void Dbdih::initRestartInfo()
currentgcp = 2;
cnewgcp = 2;
crestartGci = 1;
c_newest_restorable_gci = 1;
SYSFILE->keepGCI = 1;
SYSFILE->oldestRestorableGCI = 1;
......@@ -13038,9 +13044,9 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
if (signal->theData[0] == 7001) {
infoEvent("c_lcpState.keepGci = %d",
c_lcpState.keepGci);
infoEvent("c_lcpState.lcpStatus = %d, clcpStartGcp = %d",
infoEvent("c_lcpState.lcpStatus = %d, clcpStopGcp = %d",
c_lcpState.lcpStatus,
c_lcpState.lcpStartGcp);
c_lcpState.lcpStopGcp);
infoEvent("cgcpStartCounter = %d, cimmediateLcpStart = %d",
cgcpStartCounter, c_lcpState.immediateLcpStart);
}//if
......@@ -13221,8 +13227,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
infoEvent("lcpStatus = %d (update place = %d) ",
c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
infoEvent
("lcpStart = %d lcpStartGcp = %d keepGci = %d oldestRestorable = %d",
c_lcpState.lcpStart, c_lcpState.lcpStartGcp,
("lcpStart = %d lcpStopGcp = %d keepGci = %d oldestRestorable = %d",
c_lcpState.lcpStart, c_lcpState.lcpStopGcp,
c_lcpState.keepGci, c_lcpState.oldestRestorableGci);
infoEvent
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment