Commit 32ded2a6 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com

Merge perch.ndb.mysql.com:/home/jonas/src/41-work

into  perch.ndb.mysql.com:/home/jonas/src/50-work
parents c3017359 564d461f
......@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014
Next DBLQH 5043
Next DBDICT 6007
Next DBDIH 7177
Next DBDIH 7178
Next DBTC 8038
Next CMVMI 9000
Next BACKUP 10022
......@@ -66,6 +66,8 @@ Delay GCP_SAVEREQ by 10 secs
7030: Delay in GCP_PREPARE until node has completed a node failure
7031: Delay in GCP_PREPARE and die 3s later
7177: Delay copying of sysfileData in execCOPY_GCIREQ
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
......
......@@ -1048,6 +1048,8 @@ private:
void removeStoredReplica(FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr);
void searchStoredReplicas(FragmentstorePtr regFragptr);
bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*,
ConstPtr<ReplicaRecord>);
void updateNodeInfo(FragmentstorePtr regFragptr);
//------------------------------------
......
......@@ -627,21 +627,47 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
ndbrequire(c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
ndbrequire(reason != CopyGCIReq::IDLE);
bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
if (ERROR_INSERTED(7177))
{
jam();
if (signal->getLength() == 3)
{
jam();
goto done;
}
}
arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
cdata[tstart+i] = copyGCI->data[i];
if ((tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32) {
if (ERROR_INSERTED(7177) && isMaster() && isdone)
{
sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
return;
}
done:
if (isdone)
{
jam();
c_copyGCISlave.m_expectedNextWord = 0;
} else {
}
else
{
jam();
c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
return;
}//if
}
if (cmasterdihref != reference())
{
jam();
memcpy(sysfileData, cdata, sizeof(sysfileData));
}
c_copyGCISlave.m_copyReason = reason;
c_copyGCISlave.m_senderRef = signal->senderBlockRef();
......@@ -8441,14 +8467,30 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
/* -----------------------------------------------------------------
* LINK THE REPLICA INTO THE STORED REPLICA LIST. WE WILL USE THIS
* NODE AS A STORED REPLICA.
* WE MUST FIRST LINK IT OUT OF THE LIST OF OLD STORED REPLICAS.
* --------------------------------------------------------------- */
/**
* Make sure we can also find REDO for restoring replica...
*/
{
CreateReplicaRecord createReplica;
ConstPtr<ReplicaRecord> constReplicaPtr;
constReplicaPtr.i = replicaPtr.i;
constReplicaPtr.p = replicaPtr.p;
if (setup_create_replica(fragPtr,
&createReplica, constReplicaPtr))
{
removeOldStoredReplica(fragPtr, replicaPtr);
linkStoredReplica(fragPtr, replicaPtr);
}
else
{
infoEvent("Forcing take-over of node %d due to unsufficient REDO"
" for table %d fragment: %d",
nodePtr.i, tabPtr.i, i);
setNodeActiveStatus(nodePtr.i,
Sysfile::NS_NotActive_NotTakenOver);
}
}
}
default:
jam();
......@@ -9492,6 +9534,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
FragmentstorePtr fragPtr;
getFragstore(tabPtr.p, fragId, fragPtr);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
fragId++;
if (fragId >= tabPtr.p->totalfragments) {
jam();
......@@ -12487,37 +12530,14 @@ void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr)
/* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
/* A MAXIMUM OF FOUR NODES IS RETRIEVED. */
/*************************************************************************/
void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
bool
Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
CreateReplicaRecord* createReplicaPtrP,
ConstPtr<ReplicaRecord> replicaPtr)
{
Uint32 nextReplicaPtrI;
ConstPtr<ReplicaRecord> replicaPtr;
createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
createReplicaPtrP->replicaRec = replicaPtr.i;
replicaPtr.i = fragPtr.p->storedReplicas;
while (replicaPtr.i != RNIL) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
nextReplicaPtrI = replicaPtr.p->nextReplica;
NodeRecordPtr nodePtr;
nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
jam();
switch (nodePtr.p->activeStatus) {
case Sysfile::NS_Active:
case Sysfile::NS_ActiveMissed_1:
case Sysfile::NS_ActiveMissed_2:{
/* ----------------------------------------------------------------- */
/* INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
/* TO LQH START_FRAGREQ. */
/* SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO */
/* SET A REFERENCE TO THE REPLICA POINTER OF THAT. */
/* ----------------------------------------------------------------- */
CreateReplicaRecordPtr createReplicaPtr;
createReplicaPtr.i = cnoOfCreateReplicas;
ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
cnoOfCreateReplicas++;
createReplicaPtr.p->dataNodeId = replicaPtr.p->procNode;
createReplicaPtr.p->replicaRec = replicaPtr.i;
/* ----------------------------------------------------------------- */
/* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
/* SYSTEM RESTART. */
......@@ -12529,7 +12549,8 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
stopGci,
startGci,
startLcpNo);
if (!result) {
if (!result)
{
jam();
/* --------------------------------------------------------------- */
/* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
......@@ -12541,22 +12562,20 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
/* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
/* LOCAL CHECKPOINT TO ZNIL. */
/* --------------------------------------------------------------- */
createReplicaPtr.p->lcpNo = ZNIL;
} else {
createReplicaPtrP->lcpNo = ZNIL;
}
else
{
jam();
/* --------------------------------------------------------------- */
/* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
/* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
/* --------------------------------------------------------------- */
createReplicaPtr.p->lcpNo = startLcpNo;
createReplicaPtrP->lcpNo = startLcpNo;
arrGuard(startLcpNo, MAX_LCP_STORED);
createReplicaPtr.p->createLcpId = replicaPtr.p->lcpId[startLcpNo];
createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
}//if
if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
jam();
nodePtr.p->nodeStatus = NodeRecord::DEAD;
}
/* ----------------------------------------------------------------- */
/* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
......@@ -12564,21 +12583,48 @@ void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
/* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
/* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
/* -_--------------------------------------------------------------- */
if (!findLogNodes(createReplicaPtr.p, fragPtr, startGci, stopGci)) {
jam();
/* --------------------------------------------------------------- */
/* WE WERE NOT ABLE TO FIND ANY WAY OF RESTORING THIS REPLICA. */
/* THIS IS A POTENTIAL SYSTEM ERROR. */
/* --------------------------------------------------------------- */
cnoOfCreateReplicas--;
return;
}//if
return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
}
if(ERROR_INSERTED(7073) || ERROR_INSERTED(7074)){
void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
{
Uint32 nextReplicaPtrI;
Ptr<ReplicaRecord> replicaPtr;
replicaPtr.i = fragPtr.p->storedReplicas;
while (replicaPtr.i != RNIL) {
jam();
nodePtr.p->nodeStatus = NodeRecord::ALIVE;
}
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
nextReplicaPtrI = replicaPtr.p->nextReplica;
ConstPtr<ReplicaRecord> constReplicaPtr;
constReplicaPtr.i = replicaPtr.i;
constReplicaPtr.p = replicaPtr.p;
NodeRecordPtr nodePtr;
nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
jam();
switch (nodePtr.p->activeStatus) {
case Sysfile::NS_Active:
case Sysfile::NS_ActiveMissed_1:
case Sysfile::NS_ActiveMissed_2:{
/* ----------------------------------------------------------------- */
/* INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
/* TO LQH START_FRAGREQ. */
/* SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO */
/* SET A REFERENCE TO THE REPLICA POINTER OF THAT. */
/* ----------------------------------------------------------------- */
CreateReplicaRecordPtr createReplicaPtr;
createReplicaPtr.i = cnoOfCreateReplicas;
ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
cnoOfCreateReplicas++;
/**
* Should have been checked in resetReplicaSr
*/
ndbrequire(setup_create_replica(fragPtr,
createReplicaPtr.p,
constReplicaPtr));
break;
}
default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment