Commit 1f908546 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com

Merge perch.ndb.mysql.com:/home/jonas/src/41-work

into  perch.ndb.mysql.com:/home/jonas/src/mysql-4.1-ndb
parents f819cac8 49d87c7e
......@@ -2313,7 +2313,8 @@ void Dbdict::checkSchemaStatus(Signal* signal)
tablePtr.p->tableType = (DictTabInfo::TableType)oldEntry->m_tableType;
// On NR get index from master because index state is not on file
const bool file = c_systemRestart || tablePtr.p->isTable();
const bool file = (* newEntry == * oldEntry) &&
(c_systemRestart || tablePtr.p->isTable());
restartCreateTab(signal, tableId, oldEntry, file);
return;
......
......@@ -1265,9 +1265,9 @@ void Dbdih::execNDB_STTOR(Signal* signal)
if (isMaster()) {
jam();
systemRestartTakeOverLab(signal);
if (anyActiveTakeOver() && false) {
if (anyActiveTakeOver())
{
jam();
ndbout_c("1 - anyActiveTakeOver == true");
return;
}
}
......@@ -2260,6 +2260,8 @@ Dbdih::systemRestartTakeOverLab(Signal* signal)
// NOT ACTIVE NODES THAT HAVE NOT YET BEEN TAKEN OVER NEEDS TAKE OVER
// IMMEDIATELY. IF WE ARE ALIVE WE TAKE OVER OUR OWN NODE.
/*-------------------------------------------------------------------*/
infoEvent("Take over of node %d started",
nodePtr.i);
startTakeOver(signal, RNIL, nodePtr.i, nodePtr.i);
}//if
break;
......@@ -2372,6 +2374,12 @@ void Dbdih::nodeRestartTakeOver(Signal* signal, Uint32 startNodeId)
*--------------------------------------------------------------------*/
Uint32 takeOverNode = Sysfile::getTakeOverNode(startNodeId,
SYSFILE->takeOver);
if(takeOverNode == 0){
jam();
warningEvent("Bug in take-over code restarting");
takeOverNode = startNodeId;
}
startTakeOver(signal, RNIL, startNodeId, takeOverNode);
break;
}
......@@ -2525,7 +2533,14 @@ void Dbdih::startTakeOver(Signal* signal,
Sysfile::setTakeOverNode(takeOverPtr.p->toFailedNode, SYSFILE->takeOver,
startNode);
takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_START_COPY;
if (getNodeState().getSystemRestartInProgress())
{
jam();
checkToCopy();
checkToCopyCompleted(signal);
return;
}
cstartGcpNow = true;
}//Dbdih::startTakeOver()
......@@ -3273,6 +3288,18 @@ void Dbdih::toCopyCompletedLab(Signal * signal, TakeOverRecordPtr takeOverPtr)
signal->theData[1] = takeOverPtr.p->toStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
if (getNodeState().getSystemRestartInProgress())
{
jam();
infoEvent("Take over of node %d complete", takeOverPtr.p->toStartingNode);
setNodeActiveStatus(takeOverPtr.p->toStartingNode, Sysfile::NS_Active);
takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
takeOverCompleted(takeOverPtr.p->toStartingNode);
checkToCopy();
checkToCopyCompleted(signal);
return;
}
c_lcpState.immediateLcpStart = true;
takeOverPtr.p->toMasterStatus = TakeOverRecord::WAIT_LCP;
......@@ -3379,16 +3406,12 @@ void Dbdih::execEND_TOCONF(Signal* signal)
}//if
endTakeOver(takeOverPtr.i);
ndbout_c("2 - endTakeOver");
if (cstartPhase == ZNDB_SPH4) {
jam();
ndbrequire(false);
if (anyActiveTakeOver()) {
jam();
ndbout_c("4 - anyActiveTakeOver == true");
return;
}//if
ndbout_c("5 - anyActiveTakeOver == false -> ndbsttorry10Lab");
ndbsttorry10Lab(signal, __LINE__);
return;
}//if
......@@ -9561,73 +9584,84 @@ void Dbdih::startNextChkpt(Signal* signal)
nodePtr.i = replicaPtr.p->procNode;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (replicaPtr.p->lcpOngoingFlag &&
replicaPtr.p->lcpIdStarted < lcpId) {
jam();
//-------------------------------------------------------------------
// We have found a replica on a node that performs local checkpoint
// that is alive and that have not yet been started.
//-------------------------------------------------------------------
if (nodePtr.p->noOfStartedChkpt < 2) {
jam();
/**
* Send LCP_FRAG_ORD to LQH
*/
/**
* Mark the replica so with lcpIdStarted == true
*/
replicaPtr.p->lcpIdStarted = lcpId;
Uint32 i = nodePtr.p->noOfStartedChkpt;
nodePtr.p->startedChkpt[i].tableId = tabPtr.i;
nodePtr.p->startedChkpt[i].fragId = curr.fragmentId;
nodePtr.p->startedChkpt[i].replicaPtr = replicaPtr.i;
nodePtr.p->noOfStartedChkpt = i + 1;
sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
} else if (nodePtr.p->noOfQueuedChkpt < 2) {
jam();
/**
* Put LCP_FRAG_ORD "in queue"
*/
/**
* Mark the replica so with lcpIdStarted == true
*/
replicaPtr.p->lcpIdStarted = lcpId;
if (c_lcpState.m_participatingLQH.get(nodePtr.i))
{
if (replicaPtr.p->lcpOngoingFlag &&
replicaPtr.p->lcpIdStarted < lcpId)
{
jam();
//-------------------------------------------------------------------
// We have found a replica on a node that performs local checkpoint
// that is alive and that have not yet been started.
//-------------------------------------------------------------------
Uint32 i = nodePtr.p->noOfQueuedChkpt;
nodePtr.p->queuedChkpt[i].tableId = tabPtr.i;
nodePtr.p->queuedChkpt[i].fragId = curr.fragmentId;
nodePtr.p->queuedChkpt[i].replicaPtr = replicaPtr.i;
nodePtr.p->noOfQueuedChkpt = i + 1;
} else {
jam();
if (nodePtr.p->noOfStartedChkpt < 2)
{
jam();
/**
* Send LCP_FRAG_ORD to LQH
*/
/**
* Mark the replica so with lcpIdStarted == true
*/
replicaPtr.p->lcpIdStarted = lcpId;
if(save){
Uint32 i = nodePtr.p->noOfStartedChkpt;
nodePtr.p->startedChkpt[i].tableId = tabPtr.i;
nodePtr.p->startedChkpt[i].fragId = curr.fragmentId;
nodePtr.p->startedChkpt[i].replicaPtr = replicaPtr.i;
nodePtr.p->noOfStartedChkpt = i + 1;
sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
}
else if (nodePtr.p->noOfQueuedChkpt < 2)
{
jam();
/**
* Stop increasing value on first that was "full"
* Put LCP_FRAG_ORD "in queue"
*/
c_lcpState.currentFragment = curr;
save = false;
}
busyNodes.set(nodePtr.i);
if(busyNodes.count() == lcpNodes){
/**
* There were no possibility to start the local checkpoint
* and it was not possible to queue it up. In this case we
* stop the start of local checkpoints until the nodes with a
* backlog have performed more checkpoints. We will return and
* will not continue the process of starting any more checkpoints.
* Mark the replica so with lcpIdStarted == true
*/
return;
replicaPtr.p->lcpIdStarted = lcpId;
Uint32 i = nodePtr.p->noOfQueuedChkpt;
nodePtr.p->queuedChkpt[i].tableId = tabPtr.i;
nodePtr.p->queuedChkpt[i].fragId = curr.fragmentId;
nodePtr.p->queuedChkpt[i].replicaPtr = replicaPtr.i;
nodePtr.p->noOfQueuedChkpt = i + 1;
}
else
{
jam();
if(save)
{
/**
* Stop increasing value on first that was "full"
*/
c_lcpState.currentFragment = curr;
save = false;
}
busyNodes.set(nodePtr.i);
if(busyNodes.count() == lcpNodes)
{
/**
* There were no possibility to start the local checkpoint
* and it was not possible to queue it up. In this case we
* stop the start of local checkpoints until the nodes with a
* backlog have performed more checkpoints. We will return and
* will not continue the process of starting any more checkpoints.
*/
return;
}//if
}//if
}//if
}
}//while
}
}//while
}
curr.fragmentId++;
if (curr.fragmentId >= tabPtr.p->totalfragments) {
jam();
......
......@@ -984,13 +984,6 @@ Dbtc::handleFailedApiNode(Signal* signal,
TloopCount += 64;
break;
case CS_CONNECTED:
/*********************************************************************/
// The api record is connected to failed node. We need to release the
// connection and set it in a disconnected state.
/*********************************************************************/
jam();
releaseApiCon(signal, apiConnectptr.i);
break;
case CS_REC_COMMITTING:
case CS_RECEIVING:
case CS_STARTED:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment