Commit cb3b2a36 authored by unknown's avatar unknown

Merge perch.ndb.mysql.com:/home/jonas/src/50-work

into  perch.ndb.mysql.com:/home/jonas/src/51-telco-gca


storage/ndb/src/kernel/blocks/ERROR_codes.txt:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
  Auto merged
storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp:
  Auto merged
storage/ndb/test/src/NdbRestarter.cpp:
  Auto merged
storage/ndb/test/include/NdbRestarter.hpp:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  merge
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  merge
storage/ndb/src/mgmsrv/InitConfigFileParser.cpp:
  merge
storage/ndb/test/ndbapi/testNodeRestart.cpp:
  merge
storage/ndb/test/run-test/daily-basic-tests.txt:
  merge
parents 5af0cbc7 85fdd106
......@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4024
Next DBLQH 5045
Next DBDICT 6007
Next DBDIH 7178
Next DBDIH 7181
Next DBTC 8039
Next CMVMI 9000
Next BACKUP 10038
......@@ -73,6 +73,8 @@ Delay GCP_SAVEREQ by 10 secs
7177: Delay copying of sysfileData in execCOPY_GCIREQ
7180: Crash master during master-take-over in execMASTER_LCPCONF
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
......
......@@ -1381,6 +1381,7 @@ private:
Uint32 csystemnodes;
Uint32 currentgcp;
Uint32 c_newest_restorable_gci;
Uint32 c_set_initial_start_flag;
enum GcpMasterTakeOverState {
GMTOS_IDLE = 0,
......
......@@ -61,6 +61,7 @@ void Dbdih::initData()
c_blockCommit = false;
c_blockCommitNo = 1;
cntrlblockref = RNIL;
c_set_initial_start_flag = FALSE;
}//Dbdih::initData()
void Dbdih::initRecords()
......
......@@ -677,6 +677,12 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
Uint32 tmp= SYSFILE->m_restart_seq;
memcpy(sysfileData, cdata, sizeof(sysfileData));
SYSFILE->m_restart_seq = tmp;
if (c_set_initial_start_flag)
{
jam();
Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
}
}
c_copyGCISlave.m_copyReason = reason;
......@@ -1290,6 +1296,11 @@ void Dbdih::execNDB_STTOR(Signal* signal)
// The permission is given by the master node in the alive set.
/*-----------------------------------------------------------------------*/
createMutexes(signal, 0);
if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
{
jam();
c_set_initial_start_flag = TRUE; // In sysfile...
}
break;
case ZNDB_SPH3:
......@@ -4828,6 +4839,8 @@ void
Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
jam();
Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
c_lcpMasterTakeOverState.minTableId = ~0;
c_lcpMasterTakeOverState.minFragId = ~0;
c_lcpMasterTakeOverState.failedNodeId = nodeId;
......@@ -4846,7 +4859,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
/**
* Node failure during master take over...
*/
ndbout_c("Nodefail during master take over");
ndbout_c("Nodefail during master take over (old: %d)", oldNode);
}
NodeRecordPtr nodePtr;
nodePtr.i = oldNode;
if (oldNode > 0 && oldNode < MAX_NDB_NODES)
{
jam();
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
{
jam();
checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
}
}
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
......@@ -5862,6 +5888,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal)
jamEntry();
const BlockReference newMasterBlockref = req->masterRef;
if (newMasterBlockref != cmasterdihref)
{
jam();
ndbout_c("resending GSN_MASTER_LCPREQ");
sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
signal->getLength(), 50);
return;
}
Uint32 failedNodeId = req->failedNodeId;
/**
......@@ -6158,6 +6192,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
nodePtr.p->lcpStateAtTakeOver = lcpState;
CRASH_INSERTION(7180);
#ifdef VM_TRACE
ndbout_c("MASTER_LCPCONF");
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
......@@ -10716,6 +10752,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
LcpCompleteRep::SignalLength, JBB);
/**
* Say that an initial node restart does not need to be redone
* once node has been part of first LCP
*/
if (c_set_initial_start_flag &&
c_lcpState.m_participatingLQH.get(getOwnNodeId()))
{
jam();
c_set_initial_start_flag = FALSE;
}
}
/*-------------------------------------------------------------------------- */
......
......@@ -11893,7 +11893,7 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal)
return;
}
if (getNodeState().getNodeRestartInProgress())
if (getNodeState().getNodeRestartInProgress() && cstartRecReq == ZFALSE)
{
GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
saveRef->dihPtr = dihPtr;
......@@ -11940,7 +11940,6 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal)
}//if
ndbrequire(ccurrentGcprec == RNIL);
ccurrentGcprec = 0;
gcpPtr.i = ccurrentGcprec;
ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
......
......@@ -75,8 +75,8 @@ static BlockInfo ALL_BLOCKS[] = {
{ DBTUP_REF, 1 , 4000, 4007 },
{ DBDICT_REF, 1 , 6000, 6003 },
{ NDBCNTR_REF, 0 , 1000, 1999 },
{ CMVMI_REF, 1 , 9000, 9999 }, // before QMGR
{ QMGR_REF, 1 , 1, 999 },
{ CMVMI_REF, 1 , 9000, 9999 },
{ TRIX_REF, 1 , 0, 0 },
{ BACKUP_REF, 1 , 10000, 10999 },
{ DBUTIL_REF, 1 , 11000, 11999 },
......
......@@ -836,7 +836,7 @@ InitConfigFileParser::parse_mycnf()
opt.arg_type = REQUIRED_ARG;
options.push_back(opt);
opt.name = "api";
opt.name = "ndbapi";
opt.id = 256;
opt.value = (gptr*)malloc(sizeof(char*));
opt.var_type = GET_STR;
......@@ -851,7 +851,6 @@ InitConfigFileParser::parse_mycnf()
mysqld = &options[idx+2];
api = &options[idx+3];
}
Context ctx(m_info, m_errstream);
const char *groups[]= { "cluster_config", 0 };
......
......@@ -61,6 +61,8 @@ public:
int dumpStateAllNodes(const int * _args, int _num_args);
int getMasterNodeId();
int getNextMasterNodeId(int nodeId);
int getNodeGroup(int nodeId);
int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
int getRandomNotMasterNodeId(int randomNumber);
......
......@@ -1178,6 +1178,85 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int
runBug26457(NDBT_Context* ctx, NDBT_Step* step)
{
NdbRestarter res;
if (res.getNumDbNodes() < 4)
return NDBT_OK;
int loops = ctx->getNumLoops();
while (loops --)
{
retry:
int master = res.getMasterNodeId();
int next = res.getNextMasterNodeId(master);
ndbout_c("master: %d next: %d", master, next);
if (res.getNodeGroup(master) == res.getNodeGroup(next))
{
res.restartOneDbNode(next, false, false, true);
if (res.waitClusterStarted())
return NDBT_FAILED;
goto retry;
}
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 };
if (res.dumpStateOneNode(next, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(next, 7180))
return NDBT_FAILED;
res.restartOneDbNode(master, false, false, true);
if (res.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug26481(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter res;
int node = res.getRandomNotMasterNodeId(rand());
ndbout_c("node: %d", node);
if (res.restartOneDbNode(node, true, true, true))
return NDBT_FAILED;
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (res.dumpStateOneNode(node, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(node, 7018))
return NDBT_FAILED;
if (res.startNodes(&node, 1))
return NDBT_FAILED;
res.waitNodesStartPhase(&node, 1, 3);
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
res.startNodes(&node, 1);
if (res.waitClusterStarted())
return NDBT_FAILED;
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
......@@ -1514,6 +1593,12 @@ TESTCASE("Bug25468", ""){
TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554);
}
TESTCASE("Bug26457", ""){
INITIALIZER(runBug26457);
}
TESTCASE("Bug26481", ""){
INITIALIZER(runBug26481);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -521,6 +521,10 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug25554 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug26457 T1
#
# DICT TESTS
max-time: 1500
......
......@@ -127,6 +127,68 @@ NdbRestarter::getMasterNodeId(){
return node;
}
int
NdbRestarter::getNodeGroup(int nodeId){
if (!isConnected())
return -1;
if (getStatus() != 0)
return -1;
for(size_t i = 0; i < ndbNodes.size(); i++)
{
if(ndbNodes[i].node_id == nodeId)
{
return ndbNodes[i].node_group;
}
}
return -1;
}
int
NdbRestarter::getNextMasterNodeId(int nodeId){
if (!isConnected())
return -1;
if (getStatus() != 0)
return -1;
size_t i;
for(i = 0; i < ndbNodes.size(); i++)
{
if(ndbNodes[i].node_id == nodeId)
{
break;
}
}
assert(i < ndbNodes.size());
if (i == ndbNodes.size())
return -1;
int dynid = ndbNodes[i].dynamic_id;
int minid = dynid;
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id > minid)
minid = ndbNodes[i].dynamic_id;
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id > dynid &&
ndbNodes[i].dynamic_id < minid)
{
minid = ndbNodes[i].dynamic_id;
}
if (minid != ~0)
{
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id == minid)
return ndbNodes[i].node_id;
}
return getMasterNodeId();
}
int
NdbRestarter::getRandomNotMasterNodeId(int rand){
int master = getMasterNodeId();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment