Commit cce815f7 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com

Merge perch.ndb.mysql.com:/home/jonas/src/50-work

into  perch.ndb.mysql.com:/home/jonas/src/51-work
parents 94b8e561 32ded2a6
...@@ -5,7 +5,7 @@ Next DBACC 3002 ...@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4024 Next DBTUP 4024
Next DBLQH 5045 Next DBLQH 5045
Next DBDICT 6007 Next DBDICT 6007
Next DBDIH 7177 Next DBDIH 7178
Next DBTC 8038 Next DBTC 8038
Next CMVMI 9000 Next CMVMI 9000
Next BACKUP 10022 Next BACKUP 10022
...@@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs ...@@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs
7030: Delay in GCP_PREPARE until node has completed a node failure 7030: Delay in GCP_PREPARE until node has completed a node failure
7031: Delay in GCP_PREPARE and die 3s later 7031: Delay in GCP_PREPARE and die 3s later
7177: Delay copying of sysfileData in execCOPY_GCIREQ
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -1056,6 +1056,8 @@ private: ...@@ -1056,6 +1056,8 @@ private:
void removeStoredReplica(FragmentstorePtr regFragptr, void removeStoredReplica(FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr); ReplicaRecordPtr replicaPtr);
void searchStoredReplicas(FragmentstorePtr regFragptr); void searchStoredReplicas(FragmentstorePtr regFragptr);
bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*,
ConstPtr<ReplicaRecord>);
void updateNodeInfo(FragmentstorePtr regFragptr); void updateNodeInfo(FragmentstorePtr regFragptr);
//------------------------------------ //------------------------------------
......
...@@ -636,23 +636,49 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal) ...@@ -636,23 +636,49 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE); ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart); ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
ndbrequire(reason != CopyGCIReq::IDLE); ndbrequire(reason != CopyGCIReq::IDLE);
bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
if (ERROR_INSERTED(7177))
{
jam();
if (signal->getLength() == 3)
{
jam();
goto done;
}
}
arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4); arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++) for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
cdata[tstart+i] = copyGCI->data[i]; cdata[tstart+i] = copyGCI->data[i];
if ((tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32) { if (ERROR_INSERTED(7177) && isMaster() && isdone)
{
sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
return;
}
done:
if (isdone)
{
jam(); jam();
c_copyGCISlave.m_expectedNextWord = 0; c_copyGCISlave.m_expectedNextWord = 0;
} else { }
else
{
jam(); jam();
c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE; c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
return; return;
}//if }
if (cmasterdihref != reference())
{
jam();
Uint32 tmp= SYSFILE->m_restart_seq; Uint32 tmp= SYSFILE->m_restart_seq;
memcpy(sysfileData, cdata, sizeof(sysfileData)); memcpy(sysfileData, cdata, sizeof(sysfileData));
SYSFILE->m_restart_seq = tmp; SYSFILE->m_restart_seq = tmp;
}
c_copyGCISlave.m_copyReason = reason; c_copyGCISlave.m_copyReason = reason;
c_copyGCISlave.m_senderRef = signal->senderBlockRef(); c_copyGCISlave.m_senderRef = signal->senderBlockRef();
...@@ -8735,14 +8761,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ ...@@ -8735,14 +8761,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
resetReplicaLcp(replicaPtr.p, newestRestorableGCI); resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
/* ----------------------------------------------------------------- /**
* LINK THE REPLICA INTO THE STORED REPLICA LIST. WE WILL USE THIS * Make sure we can also find REDO for restoring replica...
* NODE AS A STORED REPLICA. */
* WE MUST FIRST LINK IT OUT OF THE LIST OF OLD STORED REPLICAS. {
* --------------------------------------------------------------- */ CreateReplicaRecord createReplica;
ConstPtr<ReplicaRecord> constReplicaPtr;
constReplicaPtr.i = replicaPtr.i;
constReplicaPtr.p = replicaPtr.p;
if (setup_create_replica(fragPtr,
&createReplica, constReplicaPtr))
{
removeOldStoredReplica(fragPtr, replicaPtr); removeOldStoredReplica(fragPtr, replicaPtr);
linkStoredReplica(fragPtr, replicaPtr); linkStoredReplica(fragPtr, replicaPtr);
}
else
{
infoEvent("Forcing take-over of node %d due to unsufficient REDO"
" for table %d fragment: %d",
nodePtr.i, tabPtr.i, i);
setNodeActiveStatus(nodePtr.i,
Sysfile::NS_NotActive_NotTakenOver);
}
}
} }
default: default:
jam(); jam();
...@@ -9864,6 +9906,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId) ...@@ -9864,6 +9906,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
FragmentstorePtr fragPtr; FragmentstorePtr fragPtr;
getFragstore(tabPtr.p, fragId, fragPtr); getFragstore(tabPtr.p, fragId, fragPtr);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas); checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
fragId++; fragId++;
if (fragId >= tabPtr.p->totalfragments) { if (fragId >= tabPtr.p->totalfragments) {
jam(); jam();
...@@ -12875,37 +12918,14 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr) ...@@ -12875,37 +12918,14 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr)
/* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/ /* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
/* A MAXIMUM OF FOUR NODES IS RETRIEVED. */ /* A MAXIMUM OF FOUR NODES IS RETRIEVED. */
/*************************************************************************/ /*************************************************************************/
void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) bool
Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
CreateReplicaRecord* createReplicaPtrP,
ConstPtr<ReplicaRecord> replicaPtr)
{ {
Uint32 nextReplicaPtrI; createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
ConstPtr<ReplicaRecord> replicaPtr; createReplicaPtrP->replicaRec = replicaPtr.i;
replicaPtr.i = fragPtr.p->storedReplicas;
while (replicaPtr.i != RNIL) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
nextReplicaPtrI = replicaPtr.p->nextReplica;
NodeRecordPtr nodePtr;
nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
jam();
switch (nodePtr.p->activeStatus) {
case Sysfile::NS_Active:
case Sysfile::NS_ActiveMissed_1:
case Sysfile::NS_ActiveMissed_2:{
/* ----------------------------------------------------------------- */
/* INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
/* TO LQH START_FRAGREQ. */
/* SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO */
/* SET A REFERENCE TO THE REPLICA POINTER OF THAT. */
/* ----------------------------------------------------------------- */
CreateReplicaRecordPtr createReplicaPtr;
createReplicaPtr.i = cnoOfCreateReplicas;
ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
cnoOfCreateReplicas++;
createReplicaPtr.p->dataNodeId = replicaPtr.p->procNode;
createReplicaPtr.p->replicaRec = replicaPtr.i;
/* ----------------------------------------------------------------- */ /* ----------------------------------------------------------------- */
/* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */ /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
/* SYSTEM RESTART. */ /* SYSTEM RESTART. */
...@@ -12917,7 +12937,8 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) ...@@ -12917,7 +12937,8 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
stopGci, stopGci,
startGci, startGci,
startLcpNo); startLcpNo);
if (!result) { if (!result)
{
jam(); jam();
/* --------------------------------------------------------------- */ /* --------------------------------------------------------------- */
/* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/ /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
...@@ -12929,22 +12950,20 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) ...@@ -12929,22 +12950,20 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
/* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */ /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
/* LOCAL CHECKPOINT TO ZNIL. */ /* LOCAL CHECKPOINT TO ZNIL. */
/* --------------------------------------------------------------- */ /* --------------------------------------------------------------- */
createReplicaPtr.p->lcpNo = ZNIL; createReplicaPtrP->lcpNo = ZNIL;
} else { }
else
{
jam(); jam();
/* --------------------------------------------------------------- */ /* --------------------------------------------------------------- */
/* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */ /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
/* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */ /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
/* --------------------------------------------------------------- */ /* --------------------------------------------------------------- */
createReplicaPtr.p->lcpNo = startLcpNo; createReplicaPtrP->lcpNo = startLcpNo;
arrGuard(startLcpNo, MAX_LCP_STORED); arrGuard(startLcpNo, MAX_LCP_STORED);
createReplicaPtr.p->createLcpId = replicaPtr.p->lcpId[startLcpNo]; createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
}//if }//if
if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
jam();
nodePtr.p->nodeStatus = NodeRecord::DEAD;
}
/* ----------------------------------------------------------------- */ /* ----------------------------------------------------------------- */
/* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */ /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
...@@ -12952,21 +12971,48 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) ...@@ -12952,21 +12971,48 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
/* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */ /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
/* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */ /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
/* -_--------------------------------------------------------------- */ /* -_--------------------------------------------------------------- */
if (!findLogNodes(createReplicaPtr.p, fragPtr, startGci, stopGci)) { return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
jam(); }
/* --------------------------------------------------------------- */
/* WE WERE NOT ABLE TO FIND ANY WAY OF RESTORING THIS REPLICA. */
/* THIS IS A POTENTIAL SYSTEM ERROR. */
/* --------------------------------------------------------------- */
cnoOfCreateReplicas--;
return;
}//if
if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
{
Uint32 nextReplicaPtrI;
Ptr<ReplicaRecord> replicaPtr;
replicaPtr.i = fragPtr.p->storedReplicas;
while (replicaPtr.i != RNIL) {
jam(); jam();
nodePtr.p->nodeStatus = NodeRecord::ALIVE; ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
} nextReplicaPtrI = replicaPtr.p->nextReplica;
ConstPtr<ReplicaRecord> constReplicaPtr;
constReplicaPtr.i = replicaPtr.i;
constReplicaPtr.p = replicaPtr.p;
NodeRecordPtr nodePtr;
nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
jam();
switch (nodePtr.p->activeStatus) {
case Sysfile::NS_Active:
case Sysfile::NS_ActiveMissed_1:
case Sysfile::NS_ActiveMissed_2:{
/* ----------------------------------------------------------------- */
/* INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
/* TO LQH START_FRAGREQ. */
/* SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO */
/* SET A REFERENCE TO THE REPLICA POINTER OF THAT. */
/* ----------------------------------------------------------------- */
CreateReplicaRecordPtr createReplicaPtr;
createReplicaPtr.i = cnoOfCreateReplicas;
ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
cnoOfCreateReplicas++;
/**
* Should have been checked in resetReplicaSr
*/
ndbrequire(setup_create_replica(fragPtr,
createReplicaPtr.p,
constReplicaPtr));
break; break;
} }
default: default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment