Commit 32ded2a6 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com

Merge perch.ndb.mysql.com:/home/jonas/src/41-work

into  perch.ndb.mysql.com:/home/jonas/src/50-work
parents c3017359 564d461f
...@@ -5,7 +5,7 @@ Next DBACC 3002 ...@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014 Next DBTUP 4014
Next DBLQH 5043 Next DBLQH 5043
Next DBDICT 6007 Next DBDICT 6007
Next DBDIH 7177 Next DBDIH 7178
Next DBTC 8038 Next DBTC 8038
Next CMVMI 9000 Next CMVMI 9000
Next BACKUP 10022 Next BACKUP 10022
...@@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs ...@@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs
7030: Delay in GCP_PREPARE until node has completed a node failure 7030: Delay in GCP_PREPARE until node has completed a node failure
7031: Delay in GCP_PREPARE and die 3s later 7031: Delay in GCP_PREPARE and die 3s later
7177: Delay copying of sysfileData in execCOPY_GCIREQ
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -1048,6 +1048,8 @@ private: ...@@ -1048,6 +1048,8 @@ private:
void removeStoredReplica(FragmentstorePtr regFragptr, void removeStoredReplica(FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr); ReplicaRecordPtr replicaPtr);
void searchStoredReplicas(FragmentstorePtr regFragptr); void searchStoredReplicas(FragmentstorePtr regFragptr);
bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*,
ConstPtr<ReplicaRecord>);
void updateNodeInfo(FragmentstorePtr regFragptr); void updateNodeInfo(FragmentstorePtr regFragptr);
//------------------------------------ //------------------------------------
......
...@@ -627,22 +627,48 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal) ...@@ -627,22 +627,48 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE); ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart); ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
ndbrequire(reason != CopyGCIReq::IDLE); ndbrequire(reason != CopyGCIReq::IDLE);
bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
if (ERROR_INSERTED(7177))
{
jam();
if (signal->getLength() == 3)
{
jam();
goto done;
}
}
arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4); arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++) for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
cdata[tstart+i] = copyGCI->data[i]; cdata[tstart+i] = copyGCI->data[i];
if ((tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32) { if (ERROR_INSERTED(7177) && isMaster() && isdone)
{
sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
return;
}
done:
if (isdone)
{
jam(); jam();
c_copyGCISlave.m_expectedNextWord = 0; c_copyGCISlave.m_expectedNextWord = 0;
} else { }
else
{
jam(); jam();
c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE; c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
return; return;
}//if }
memcpy(sysfileData, cdata, sizeof(sysfileData));
if (cmasterdihref != reference())
{
jam();
memcpy(sysfileData, cdata, sizeof(sysfileData));
}
c_copyGCISlave.m_copyReason = reason; c_copyGCISlave.m_copyReason = reason;
c_copyGCISlave.m_senderRef = signal->senderBlockRef(); c_copyGCISlave.m_senderRef = signal->senderBlockRef();
c_copyGCISlave.m_senderData = copyGCI->anyData; c_copyGCISlave.m_senderData = copyGCI->anyData;
...@@ -8441,14 +8467,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){ ...@@ -8441,14 +8467,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
resetReplicaLcp(replicaPtr.p, newestRestorableGCI); resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
/* ----------------------------------------------------------------- /**
* LINK THE REPLICA INTO THE STORED REPLICA LIST. WE WILL USE THIS * Make sure we can also find REDO for restoring replica...
* NODE AS A STORED REPLICA. */
* WE MUST FIRST LINK IT OUT OF THE LIST OF OLD STORED REPLICAS. {
* --------------------------------------------------------------- */ CreateReplicaRecord createReplica;
removeOldStoredReplica(fragPtr, replicaPtr); ConstPtr<ReplicaRecord> constReplicaPtr;
linkStoredReplica(fragPtr, replicaPtr); constReplicaPtr.i = replicaPtr.i;
constReplicaPtr.p = replicaPtr.p;
if (setup_create_replica(fragPtr,
&createReplica, constReplicaPtr))
{
removeOldStoredReplica(fragPtr, replicaPtr);
linkStoredReplica(fragPtr, replicaPtr);
}
else
{
infoEvent("Forcing take-over of node %d due to unsufficient REDO"
" for table %d fragment: %d",
nodePtr.i, tabPtr.i, i);
setNodeActiveStatus(nodePtr.i,
Sysfile::NS_NotActive_NotTakenOver);
}
}
} }
default: default:
jam(); jam();
...@@ -9492,6 +9534,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId) ...@@ -9492,6 +9534,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
FragmentstorePtr fragPtr; FragmentstorePtr fragPtr;
getFragstore(tabPtr.p, fragId, fragPtr); getFragstore(tabPtr.p, fragId, fragPtr);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas); checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
fragId++; fragId++;
if (fragId >= tabPtr.p->totalfragments) { if (fragId >= tabPtr.p->totalfragments) {
jam(); jam();
...@@ -12487,16 +12530,75 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr) ...@@ -12487,16 +12530,75 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr)
/* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/ /* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
/* A MAXIMUM OF FOUR NODES IS RETRIEVED. */ /* A MAXIMUM OF FOUR NODES IS RETRIEVED. */
/*************************************************************************/ /*************************************************************************/
bool
Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
CreateReplicaRecord* createReplicaPtrP,
ConstPtr<ReplicaRecord> replicaPtr)
{
createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
createReplicaPtrP->replicaRec = replicaPtr.i;
/* ----------------------------------------------------------------- */
/* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
/* SYSTEM RESTART. */
/* ----------------------------------------------------------------- */
Uint32 startGci;
Uint32 startLcpNo;
Uint32 stopGci = SYSFILE->newestRestorableGCI;
bool result = findStartGci(replicaPtr,
stopGci,
startGci,
startLcpNo);
if (!result)
{
jam();
/* --------------------------------------------------------------- */
/* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
/* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */
/* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
/* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */
/* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */
/* */
/* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
/* LOCAL CHECKPOINT TO ZNIL. */
/* --------------------------------------------------------------- */
createReplicaPtrP->lcpNo = ZNIL;
}
else
{
jam();
/* --------------------------------------------------------------- */
/* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
/* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
/* --------------------------------------------------------------- */
createReplicaPtrP->lcpNo = startLcpNo;
arrGuard(startLcpNo, MAX_LCP_STORED);
createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
}//if
/* ----------------------------------------------------------------- */
/* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
/* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
/* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
/* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
/* -_--------------------------------------------------------------- */
return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
}
void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
{ {
Uint32 nextReplicaPtrI; Uint32 nextReplicaPtrI;
ConstPtr<ReplicaRecord> replicaPtr; Ptr<ReplicaRecord> replicaPtr;
replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i = fragPtr.p->storedReplicas;
while (replicaPtr.i != RNIL) { while (replicaPtr.i != RNIL) {
jam(); jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
nextReplicaPtrI = replicaPtr.p->nextReplica; nextReplicaPtrI = replicaPtr.p->nextReplica;
ConstPtr<ReplicaRecord> constReplicaPtr;
constReplicaPtr.i = replicaPtr.i;
constReplicaPtr.p = replicaPtr.p;
NodeRecordPtr nodePtr; NodeRecordPtr nodePtr;
nodePtr.i = replicaPtr.p->procNode; nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
...@@ -12516,69 +12618,13 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr) ...@@ -12516,69 +12618,13 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
createReplicaPtr.i = cnoOfCreateReplicas; createReplicaPtr.i = cnoOfCreateReplicas;
ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord); ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
cnoOfCreateReplicas++; cnoOfCreateReplicas++;
createReplicaPtr.p->dataNodeId = replicaPtr.p->procNode;
createReplicaPtr.p->replicaRec = replicaPtr.i;
/* ----------------------------------------------------------------- */
/* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
/* SYSTEM RESTART. */
/* ----------------------------------------------------------------- */
Uint32 startGci;
Uint32 startLcpNo;
Uint32 stopGci = SYSFILE->newestRestorableGCI;
bool result = findStartGci(replicaPtr,
stopGci,
startGci,
startLcpNo);
if (!result) {
jam();
/* --------------------------------------------------------------- */
/* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
/* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */
/* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
/* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */
/* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */
/* */
/* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
/* LOCAL CHECKPOINT TO ZNIL. */
/* --------------------------------------------------------------- */
createReplicaPtr.p->lcpNo = ZNIL;
} else {
jam();
/* --------------------------------------------------------------- */
/* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
/* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
/* --------------------------------------------------------------- */
createReplicaPtr.p->lcpNo = startLcpNo;
arrGuard(startLcpNo, MAX_LCP_STORED);
createReplicaPtr.p->createLcpId = replicaPtr.p->lcpId[startLcpNo];
}//if
if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
jam();
nodePtr.p->nodeStatus = NodeRecord::DEAD;
}
/* ----------------------------------------------------------------- */
/* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
/* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
/* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
/* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
/* -_--------------------------------------------------------------- */
if (!findLogNodes(createReplicaPtr.p, fragPtr, startGci, stopGci)) {
jam();
/* --------------------------------------------------------------- */
/* WE WERE NOT ABLE TO FIND ANY WAY OF RESTORING THIS REPLICA. */
/* THIS IS A POTENTIAL SYSTEM ERROR. */
/* --------------------------------------------------------------- */
cnoOfCreateReplicas--;
return;
}//if
if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
jam();
nodePtr.p->nodeStatus = NodeRecord::ALIVE;
}
/**
* Should have been checked in resetReplicaSr
*/
ndbrequire(setup_create_replica(fragPtr,
createReplicaPtr.p,
constReplicaPtr));
break; break;
} }
default: default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment