Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
0e410aa1
Commit
0e410aa1
authored
Apr 05, 2006
by
jonas@perch.ndb.mysql.com
Browse files
Options
Browse Files
Download
Plain Diff
Merge perch.ndb.mysql.com:/home/jonas/src/41-work
into perch.ndb.mysql.com:/home/jonas/src/50-work
parents
4e819b73
2abc5e2f
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
978 additions
and
129 deletions
+978
-129
ndb/include/kernel/signaldata/DumpStateOrd.hpp
ndb/include/kernel/signaldata/DumpStateOrd.hpp
+1
-0
ndb/include/kernel/signaldata/FailRep.hpp
ndb/include/kernel/signaldata/FailRep.hpp
+13
-3
ndb/include/kernel/signaldata/StopReq.hpp
ndb/include/kernel/signaldata/StopReq.hpp
+22
-16
ndb/include/kernel/signaldata/WaitGCP.hpp
ndb/include/kernel/signaldata/WaitGCP.hpp
+5
-2
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
+53
-13
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+28
-0
ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
+11
-0
ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
+1
-0
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+301
-50
ndb/src/kernel/blocks/qmgr/Qmgr.hpp
ndb/src/kernel/blocks/qmgr/Qmgr.hpp
+25
-3
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
+5
-2
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+290
-36
ndb/test/ndbapi/testNodeRestart.cpp
ndb/test/ndbapi/testNodeRestart.cpp
+213
-1
ndb/test/run-test/daily-basic-tests.txt
ndb/test/run-test/daily-basic-tests.txt
+9
-1
ndb/test/src/NdbRestarts.cpp
ndb/test/src/NdbRestarts.cpp
+1
-2
No files found.
ndb/include/kernel/signaldata/DumpStateOrd.hpp
View file @
0e410aa1
...
...
@@ -64,6 +64,7 @@ public:
// 19 NDBFS Fipple with O_SYNC, O_CREATE etc.
// 20-24 BACKUP
NdbcntrTestStopOnError
=
25
,
NdbcntrStopNodes
=
70
,
// 100-105 TUP and ACC
// 200-240 UTIL
// 300-305 TRIX
...
...
ndb/include/kernel/signaldata/FailRep.hpp
View file @
0e410aa1
...
...
@@ -18,6 +18,7 @@
#define FAIL_REP_HPP
#include "SignalData.hpp"
#include <NodeBitmask.hpp>
/**
*
...
...
@@ -27,6 +28,7 @@ class FailRep {
* Sender(s) & Reciver(s)
*/
friend
class
Qmgr
;
friend
class
Ndbcntr
;
/**
* For printing
...
...
@@ -35,7 +37,8 @@ class FailRep {
public:
STATIC_CONST
(
SignalLength
=
2
);
STATIC_CONST
(
ExtraLength
=
1
+
NdbNodeBitmask
::
Size
);
enum
FailCause
{
ZOWN_FAILURE
=
0
,
ZOTHER_NODE_WHEN_WE_START
=
1
,
...
...
@@ -43,13 +46,20 @@ public:
ZSTART_IN_REGREQ
=
3
,
ZHEARTBEAT_FAILURE
=
4
,
ZLINK_FAILURE
=
5
,
ZOTHERNODE_FAILED_DURING_START
=
6
ZOTHERNODE_FAILED_DURING_START
=
6
,
ZMULTI_NODE_SHUTDOWN
=
7
,
ZPARTITIONED_CLUSTER
=
8
};
private:
Uint32
failNodeId
;
Uint32
failCause
;
/**
* Used when failCause == ZPARTITIONED_CLUSTER
*/
Uint32
president
;
Uint32
partition
[
NdbNodeBitmask
::
Size
];
};
...
...
ndb/include/kernel/signaldata/StopReq.hpp
View file @
0e410aa1
...
...
@@ -32,7 +32,7 @@ class StopReq
friend
class
MgmtSrvr
;
public:
STATIC_CONST
(
SignalLength
=
9
);
STATIC_CONST
(
SignalLength
=
9
+
NdbNodeBitmask
::
Size
);
public:
Uint32
senderRef
;
...
...
@@ -49,29 +49,34 @@ public:
Int32
readOperationTimeout
;
// Timeout before read operations are aborted
Int32
operationTimeout
;
// Timeout before all operations are aborted
Uint32
nodes
[
NdbNodeBitmask
::
Size
];
static
void
setSystemStop
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setPerformRestart
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setNoStart
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setInitialStart
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setEscalateOnNodeFail
(
Uint32
&
requestInfo
,
bool
value
);
/**
* Don't perform "graceful" shutdown/restart...
*/
static
void
setStopAbort
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setStopNodes
(
Uint32
&
requestInfo
,
bool
value
);
static
bool
getSystemStop
(
const
Uint32
&
requestInfo
);
static
bool
getPerformRestart
(
const
Uint32
&
requestInfo
);
static
bool
getNoStart
(
const
Uint32
&
requestInfo
);
static
bool
getInitialStart
(
const
Uint32
&
requestInfo
);
static
bool
getEscalateOnNodeFail
(
const
Uint32
&
requestInfo
);
static
bool
getStopAbort
(
const
Uint32
&
requestInfo
);
static
bool
getStopNodes
(
const
Uint32
&
requestInfo
);
};
struct
StopConf
{
STATIC_CONST
(
SignalLength
=
2
);
Uint32
senderData
;
Uint32
nodeState
;
union
{
Uint32
nodeState
;
Uint32
nodeId
;
};
};
class
StopRef
...
...
@@ -94,7 +99,9 @@ public:
NodeShutdownInProgress
=
1
,
SystemShutdownInProgress
=
2
,
NodeShutdownWouldCauseSystemCrash
=
3
,
TransactionAbortFailed
=
4
TransactionAbortFailed
=
4
,
UnsupportedNodeShutdown
=
5
,
MultiNodeShutdownNotMaster
=
6
};
public:
...
...
@@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo)
inline
bool
StopReq
::
get
EscalateOnNodeFail
(
const
Uint32
&
requestInfo
)
StopReq
::
get
StopAbort
(
const
Uint32
&
requestInfo
)
{
return
requestInfo
&
16
;
return
requestInfo
&
32
;
}
inline
bool
StopReq
::
getStop
Abort
(
const
Uint32
&
requestInfo
)
StopReq
::
getStop
Nodes
(
const
Uint32
&
requestInfo
)
{
return
requestInfo
&
32
;
return
requestInfo
&
64
;
}
...
...
@@ -187,24 +194,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value)
inline
void
StopReq
::
set
EscalateOnNodeFail
(
Uint32
&
requestInfo
,
bool
value
)
StopReq
::
set
StopAbort
(
Uint32
&
requestInfo
,
bool
value
)
{
if
(
value
)
requestInfo
|=
16
;
requestInfo
|=
32
;
else
requestInfo
&=
~
16
;
requestInfo
&=
~
32
;
}
inline
void
StopReq
::
setStop
Abort
(
Uint32
&
requestInfo
,
bool
value
)
StopReq
::
setStop
Nodes
(
Uint32
&
requestInfo
,
bool
value
)
{
if
(
value
)
requestInfo
|=
32
;
requestInfo
|=
64
;
else
requestInfo
&=
~
32
;
requestInfo
&=
~
64
;
}
#endif
ndb/include/kernel/signaldata/WaitGCP.hpp
View file @
0e410aa1
...
...
@@ -46,7 +46,9 @@ public:
Complete
=
1
,
///< Wait for a GCP to complete
CompleteForceStart
=
2
,
///< Wait for a GCP to complete start one if needed
CompleteIfRunning
=
3
,
///< Wait for ongoing GCP
CurrentGCI
=
8
///< Immediately return current GCI
CurrentGCI
=
8
,
///< Immediately return current GCI
BlockStartGcp
=
9
,
UnblockStartGcp
=
10
};
Uint32
senderRef
;
...
...
@@ -70,11 +72,12 @@ class WaitGCPConf {
//friend class Grep::PSCoord;
public:
STATIC_CONST
(
SignalLength
=
2
);
STATIC_CONST
(
SignalLength
=
3
);
public:
Uint32
senderData
;
Uint32
gcp
;
Uint32
blockStatus
;
};
class
WaitGCPRef
{
...
...
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
View file @
0e410aa1
...
...
@@ -134,6 +134,9 @@ Cmvmi::~Cmvmi()
{
}
#ifdef ERROR_INSERT
NodeBitmask
c_error_9000_nodes_mask
;
#endif
void
Cmvmi
::
execNDB_TAMPER
(
Signal
*
signal
)
{
...
...
@@ -419,21 +422,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const
Uint32
len
=
signal
->
getLength
();
if
(
len
==
2
){
globalTransporterRegistry
.
do_connect
(
tStartingNode
);
globalTransporterRegistry
.
setIOState
(
tStartingNode
,
HaltIO
);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal
->
theData
[
0
]
=
NDB_LE_CommunicationOpened
;
signal
->
theData
[
1
]
=
tStartingNode
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
2
,
JBB
);
//-----------------------------------------------------
#ifdef ERROR_INSERT
if
(
!
(
ERROR_INSERTED
(
9000
)
&&
c_error_9000_nodes_mask
.
get
(
tStartingNode
)))
#endif
{
globalTransporterRegistry
.
do_connect
(
tStartingNode
);
globalTransporterRegistry
.
setIOState
(
tStartingNode
,
HaltIO
);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal
->
theData
[
0
]
=
NDB_LE_CommunicationOpened
;
signal
->
theData
[
1
]
=
tStartingNode
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
2
,
JBB
);
//-----------------------------------------------------
}
}
else
{
for
(
unsigned
int
i
=
1
;
i
<
MAX_NODES
;
i
++
)
{
jam
();
if
(
i
!=
getOwnNodeId
()
&&
getNodeInfo
(
i
).
m_type
==
tData2
){
jam
();
#ifdef ERROR_INSERT
if
(
ERROR_INSERTED
(
9000
)
&&
c_error_9000_nodes_mask
.
get
(
i
))
continue
;
#endif
globalTransporterRegistry
.
do_connect
(
i
);
globalTransporterRegistry
.
setIOState
(
i
,
HaltIO
);
...
...
@@ -1039,7 +1054,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
DumpStateOrd
*
const
&
dumpState
=
(
DumpStateOrd
*
)
&
signal
->
theData
[
0
];
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiDumpConnections
){
Uint32
arg
=
dumpState
->
args
[
0
];
if
(
arg
==
DumpStateOrd
::
CmvmiDumpConnections
){
for
(
unsigned
int
i
=
1
;
i
<
MAX_NODES
;
i
++
){
const
char
*
nodeTypeStr
=
""
;
switch
(
getNodeInfo
(
i
).
m_type
){
...
...
@@ -1072,13 +1088,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiDumpLongSignalMemory
){
if
(
arg
==
DumpStateOrd
::
CmvmiDumpLongSignalMemory
){
infoEvent
(
"Cmvmi: g_sectionSegmentPool size: %d free: %d"
,
g_sectionSegmentPool
.
getSize
(),
g_sectionSegmentPool
.
getNoOfFree
());
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
)
if
(
arg
==
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
)
{
if
(
signal
->
getLength
()
==
1
)
{
...
...
@@ -1098,7 +1114,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiTestLongSigWithDelay
)
{
if
(
arg
==
DumpStateOrd
::
CmvmiTestLongSigWithDelay
)
{
unsigned
i
;
Uint32
loopCount
=
dumpState
->
args
[
1
];
const
unsigned
len0
=
11
;
...
...
@@ -1126,6 +1142,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal
(
reference
(),
GSN_TESTSIG
,
signal
,
8
,
JBB
,
ptr
,
2
);
}
#ifdef ERROR_INSERT
if
(
arg
==
9000
)
{
SET_ERROR_INSERT_VALUE
(
9000
);
for
(
Uint32
i
=
1
;
i
<
signal
->
getLength
();
i
++
)
c_error_9000_nodes_mask
.
set
(
signal
->
theData
[
i
]);
}
if
(
arg
==
9001
)
{
CLEAR_ERROR_INSERT_VALUE
;
for
(
Uint32
i
=
0
;
i
<
MAX_NODES
;
i
++
)
{
if
(
c_error_9000_nodes_mask
.
get
(
i
))
{
signal
->
theData
[
0
]
=
0
;
signal
->
theData
[
1
]
=
i
;
EXECUTE_DIRECT
(
CMVMI
,
GSN_OPEN_COMREQ
,
signal
,
2
);
}
}
c_error_9000_nodes_mask
.
clear
();
}
#endif
#ifdef VM_TRACE
#if 0
{
...
...
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
View file @
0e410aa1
...
...
@@ -14273,11 +14273,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
cnewgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
return
;
}
//if
if
(
requestType
==
WaitGCPReq
::
BlockStartGcp
)
{
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
cnewgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
cgcpOrderBlocked
=
1
;
return
;
}
if
(
requestType
==
WaitGCPReq
::
UnblockStartGcp
)
{
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
cnewgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
cgcpOrderBlocked
=
0
;
return
;
}
if
(
isMaster
())
{
/**
* Master
...
...
@@ -14289,6 +14314,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
coldgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
return
;
...
...
@@ -14375,6 +14401,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal)
conf
->
senderData
=
ptr
.
p
->
clientData
;
conf
->
gcp
=
gcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
ptr
.
p
->
clientRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
...
...
@@ -14442,6 +14469,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
c_waitGCPMasterList
.
next
(
ptr
);
conf
->
senderData
=
clientData
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
clientRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
...
...
ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
View file @
0e410aa1
...
...
@@ -203,6 +203,7 @@ private:
void
execWAIT_GCP_CONF
(
Signal
*
signal
);
void
execSTOP_REQ
(
Signal
*
signal
);
void
execSTOP_CONF
(
Signal
*
signal
);
void
execRESUME_REQ
(
Signal
*
signal
);
void
execCHANGE_NODE_STATE_CONF
(
Signal
*
signal
);
...
...
@@ -338,6 +339,16 @@ public:
void
progError
(
int
line
,
int
cause
,
const
char
*
extra
)
{
cntr
.
progError
(
line
,
cause
,
extra
);
}
enum
StopNodesStep
{
SR_BLOCK_GCP_START_GCP
=
0
,
SR_WAIT_COMPLETE_GCP
=
1
,
SR_UNBLOCK_GCP_START_GCP
=
2
,
SR_QMGR_STOP_REQ
=
3
,
SR_WAIT_NODE_FAILURES
=
4
,
SR_CLUSTER_SHUTDOWN
=
12
}
m_state
;
SignalCounter
m_stop_req_counter
;
};
private
:
StopRecord
c_stopRec
;
...
...
ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
View file @
0e410aa1
...
...
@@ -87,6 +87,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf):
addRecSignal
(
GSN_STOP_ME_CONF
,
&
Ndbcntr
::
execSTOP_ME_CONF
);
addRecSignal
(
GSN_STOP_REQ
,
&
Ndbcntr
::
execSTOP_REQ
);
addRecSignal
(
GSN_STOP_CONF
,
&
Ndbcntr
::
execSTOP_CONF
);
addRecSignal
(
GSN_RESUME_REQ
,
&
Ndbcntr
::
execRESUME_REQ
);
addRecSignal
(
GSN_WAIT_GCP_REF
,
&
Ndbcntr
::
execWAIT_GCP_REF
);
...
...
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
View file @
0e410aa1
...
...
@@ -42,6 +42,8 @@
#include <signaldata/FsRemoveReq.hpp>
#include <signaldata/ReadConfig.hpp>
#include <signaldata/FailRep.hpp>
#include <AttributeHeader.hpp>
#include <Configuration.hpp>
#include <DebuggerNames.hpp>
...
...
@@ -1474,13 +1476,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
sendSignal
(
SUMA_REF
,
GSN_NODE_FAILREP
,
signal
,
NodeFailRep
::
SignalLength
,
JBB
);
if
(
c_stopRec
.
stopReq
.
senderRef
)
{
jam
();
switch
(
c_stopRec
.
m_state
){
case
StopRecord
:
:
SR_WAIT_NODE_FAILURES
:
{
jam
();
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
tmp
.
bitANDC
(
allFailed
);
tmp
.
copyto
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
if
(
tmp
.
isclear
())
{
jam
();
if
(
c_stopRec
.
stopReq
.
senderRef
!=
RNIL
)
{
jam
();
StopConf
*
const
stopConf
=
(
StopConf
*
)
&
signal
->
theData
[
0
];
stopConf
->
senderData
=
c_stopRec
.
stopReq
.
senderData
;
stopConf
->
nodeState
=
(
Uint32
)
NodeState
::
SL_SINGLEUSER
;
sendSignal
(
c_stopRec
.
stopReq
.
senderRef
,
GSN_STOP_CONF
,
signal
,
StopConf
::
SignalLength
,
JBB
);
}
c_stopRec
.
stopReq
.
senderRef
=
0
;
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_UNBLOCK_GCP_START_GCP
;
req
->
requestType
=
WaitGCPReq
::
UnblockStartGcp
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBA
);
}
break
;
}
case
StopRecord
:
:
SR_QMGR_STOP_REQ
:
{
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
tmp
.
bitANDC
(
allFailed
);
if
(
tmp
.
isclear
())
{
Uint32
nodeId
=
allFailed
.
find
(
0
);
tmp
.
set
(
nodeId
);
StopConf
*
conf
=
(
StopConf
*
)
signal
->
getDataPtrSend
();
conf
->
senderData
=
c_stopRec
.
stopReq
.
senderData
;
conf
->
nodeId
=
nodeId
;
sendSignal
(
reference
(),
GSN_STOP_CONF
,
signal
,
StopConf
::
SignalLength
,
JBB
);
}
tmp
.
copyto
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
break
;
}
}
}
signal
->
theData
[
0
]
=
NDB_LE_NODE_FAILREP
;
signal
->
theData
[
2
]
=
0
;
Uint32
nodeId
=
0
;
while
(
!
allFailed
.
isclear
()){
nodeId
=
allFailed
.
find
(
nodeId
+
1
);
allFailed
.
clear
(
nodeId
);
signal
->
theData
[
0
]
=
NDB_LE_NODE_FAILREP
;
signal
->
theData
[
1
]
=
nodeId
;
signal
->
theData
[
2
]
=
0
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
3
,
JBB
);
}
//for
...
...
@@ -1924,13 +1987,15 @@ void
Ndbcntr
::
execDUMP_STATE_ORD
(
Signal
*
signal
)
{
DumpStateOrd
*
const
&
dumpState
=
(
DumpStateOrd
*
)
&
signal
->
theData
[
0
];
if
(
signal
->
theData
[
0
]
==
13
){
Uint32
arg
=
dumpState
->
args
[
0
];
if
(
arg
==
13
){
infoEvent
(
"Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d"
,
cstartPhase
,
cinternalStartphase
,
cndbBlocksCount
);
infoEvent
(
"Cntr: cmasterNodeId = %d"
,
cmasterNodeId
);
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
NdbcntrTestStopOnError
){
if
(
arg
==
DumpStateOrd
::
NdbcntrTestStopOnError
){
if
(
theConfiguration
.
stopOnError
()
==
true
)
((
Configuration
&
)
theConfiguration
).
stopOnError
(
false
);
...
...
@@ -1943,6 +2008,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal)
SystemError
::
SignalLength
,
JBA
);
}
if
(
arg
==
DumpStateOrd
::
NdbcntrStopNodes
)
{
NdbNodeBitmask
mask
;
for
(
Uint32
i
=
1
;
i
<
signal
->
getLength
();
i
++
)
mask
.
set
(
signal
->
theData
[
i
]);
StopReq
*
req
=
(
StopReq
*
)
signal
->
getDataPtrSend
();
req
->
senderRef
=
RNIL
;
req
->
senderData
=
123
;
req
->
requestInfo
=
0
;
req
->
singleuser
=
0
;
req
->
singleUserApi
=
0
;
mask
.
copyto
(
NdbNodeBitmask
::
Size
,
req
->
nodes
);
StopReq
::
setPerformRestart
(
req
->
requestInfo
,
1
);
StopReq
::
setNoStart
(
req
->
requestInfo
,
1
);
StopReq
::
setStopNodes
(
req
->
requestInfo
,
1
);
StopReq
::
setStopAbort
(
req
->
requestInfo
,
1
);
sendSignal
(
reference
(),
GSN_STOP_REQ
,
signal
,
StopReq
::
SignalLength
,
JBB
);
return
;
}
}
//Ndbcntr::execDUMP_STATE_ORD()
...
...
@@ -2003,9 +2090,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
Uint32
senderData
=
req
->
senderData
;
BlockReference
senderRef
=
req
->
senderRef
;
bool
abort
=
StopReq
::
getStopAbort
(
req
->
requestInfo
);
bool
stopnodes
=
StopReq
::
getStopNodes
(
req
->
requestInfo
);
if
(
getNodeState
().
startLevel
<
NodeState
::
SL_STARTED
||
abort
&&
!
singleuser
){
if
(
!
singleuser
&&
(
getNodeState
().
startLevel
<
NodeState
::
SL_STARTED
||
(
abort
&&
!
stopnodes
)))
{
/**
* Node is not started yet
*
...
...
@@ -2047,21 +2137,71 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
else
ref
->
errorCode
=
StopRef
::
NodeShutdownInProgress
;
ref
->
senderData
=
senderData
;
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
if
(
senderRef
!=
RNIL
)
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
return
;
}
if
(
stopnodes
&&
!
abort
)
{
jam
();
ref
->
errorCode
=
StopRef
::
UnsupportedNodeShutdown
;
ref
->
senderData
=
senderData
;
if
(
senderRef
!=
RNIL
)
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
return
;
}
if
(
stopnodes
&&
cmasterNodeId
!=
getOwnNodeId
())
{
jam
();
ref
->
errorCode
=
StopRef
::
MultiNodeShutdownNotMaster
;
ref
->
senderData
=
senderData
;
if
(
senderRef
!=
RNIL
)
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
return
;
}
c_stopRec
.
stopReq
=
*
req
;
c_stopRec
.
stopInitiatedTime
=
NdbTick_CurrentMillisecond
();
if
(
!
singleuser
)
{
if
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
))
{
if
(
stopnodes
)
{
jam
();
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
)){
return
;
}
char
buf
[
100
];
NdbNodeBitmask
mask
;
mask
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
infoEvent
(
"Initiating shutdown abort of %s"
,
mask
.
getText
(
buf
));
ndbout_c
(
"Initiating shutdown abort of %s"
,
mask
.
getText
(
buf
));
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_BLOCK_GCP_START_GCP
;
req
->
requestType
=
WaitGCPReq
::
BlockStartGcp
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
return
;
}
else
if
(
!
singleuser
)
{
if
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
))
{
jam
();
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
))
{
((
Configuration
&
)
theConfiguration
).
stopOnError
(
false
);
}
}
if
(
!
c_stopRec
.
checkNodeFail
(
signal
)){
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
return
;
}
...
...
@@ -2131,7 +2271,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
*/
NodeBitmask
ndbMask
;
ndbMask
.
assign
(
cntr
.
c_startedNodes
);
ndbMask
.
clear
(
cntr
.
getOwnNodeId
());
if
(
StopReq
::
getStopNodes
(
stopReq
.
requestInfo
))
{
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
stopReq
.
nodes
);
ndbMask
.
bitANDC
(
tmp
);
}
else
{
ndbMask
.
clear
(
cntr
.
getOwnNodeId
());
}
CheckNodeGroups
*
sd
=
(
CheckNodeGroups
*
)
&
signal
->
theData
[
0
];
sd
->
blockRef
=
cntr
.
reference
();
...
...
@@ -2153,7 +2303,8 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
ref
->
errorCode
=
StopRef
::
NodeShutdownWouldCauseSystemCrash
;
const
BlockReference
bref
=
stopReq
.
senderRef
;
cntr
.
sendSignal
(
bref
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
if
(
bref
!=
RNIL
)
cntr
.
sendSignal
(
bref
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
stopReq
.
senderRef
=
0
;
...
...
@@ -2203,23 +2354,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){
if
(
stopReq
.
getSystemStop
(
stopReq
.
requestInfo
)
||
stopReq
.
singleuser
){
jam
();
if
(
stopReq
.
singleuser
)
{
jam
();
AbortAllReq
*
req
=
(
AbortAllReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
12
;
cntr
.
sendSignal
(
DBTC_REF
,
GSN_ABORT_ALL_REQ
,
signal
,
AbortAllReq
::
SignalLength
,
JBB
);
}
{
jam
();
AbortAllReq
*
req
=
(
AbortAllReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
12
;
cntr
.
sendSignal
(
DBTC_REF
,
GSN_ABORT_ALL_REQ
,
signal
,
AbortAllReq
::
SignalLength
,
JBB
);
}
else
{
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
12
;
req
->
requestType
=
WaitGCPReq
::
CompleteForceStart
;
cntr
.
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
}
{
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
StopRecord
::
SR_CLUSTER_SHUTDOWN
;
req
->
requestType
=
WaitGCPReq
::
CompleteForceStart
;
cntr
.
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
}
}
else
{
jam
();
StopPermReq
*
req
=
(
StopPermReq
*
)
&
signal
->
theData
[
0
];
...
...
@@ -2381,7 +2532,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
12
;
req
->
senderData
=
StopRecord
::
SR_CLUSTER_SHUTDOWN
;
req
->
requestType
=
WaitGCPReq
::
CompleteForceStart
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
...
...
@@ -2390,29 +2541,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
void
Ndbcntr
::
execWAIT_GCP_CONF
(
Signal
*
signal
){
jamEntry
();
ndbrequire
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
));
NodeState
newState
(
NodeState
::
SL_STOPPING_3
,
true
);
WaitGCPConf
*
conf
=
(
WaitGCPConf
*
)
signal
->
getDataPtr
();
/**
* Inform QMGR so that arbitrator won't kill us
*/
NodeStateRep
*
rep
=
(
NodeStateRep
*
)
&
signal
->
theData
[
0
];
rep
->
nodeState
=
newState
;
rep
->
nodeState
.
masterNodeId
=
cmasterNodeId
;
rep
->
nodeState
.
setNodeGroup
(
c_nodeGroup
);
EXECUTE_DIRECT
(
QMGR
,
GSN_NODE_STATE_REP
,
signal
,
NodeStateRep
::
SignalLength
);
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
)){
jam
();
StartOrd
*
startOrd
=
(
StartOrd
*
)
&
signal
->
theData
[
0
];
startOrd
->
restartInfo
=
c_stopRec
.
stopReq
.
requestInfo
;
sendSignalWithDelay
(
CMVMI_REF
,
GSN_START_ORD
,
signal
,
500
,
StartOrd
::
SignalLength
);
}
else
{
switch
(
conf
->
senderData
){
case
StopRecord
:
:
SR_BLOCK_GCP_START_GCP
:
{
jam
();
/**
*
*/
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
goto
unblock
;
}
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_WAIT_COMPLETE_GCP
;
req
->
requestType
=
WaitGCPReq
::
CompleteIfRunning
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
return
;
}
case
StopRecord
:
:
SR_UNBLOCK_GCP_START_GCP
:
{
jam
();
sendSignalWithDelay
(
CMVMI_REF
,
GSN_STOP_ORD
,
signal
,
500
,
1
);
return
;
}
case
StopRecord
:
:
SR_WAIT_COMPLETE_GCP
:
{
jam
();
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
goto
unblock
;
}
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
c_stopRec
.
m_stop_req_counter
=
tmp
;
NodeReceiverGroup
rg
(
QMGR
,
tmp
);
StopReq
*
stopReq
=
(
StopReq
*
)
&
signal
->
theData
[
0
];
*
stopReq
=
c_stopRec
.
stopReq
;
stopReq
->
senderRef
=
reference
();
sendSignal
(
rg
,
GSN_STOP_REQ
,
signal
,
StopReq
::
SignalLength
,
JBA
);
c_stopRec
.
m_state
=
StopRecord
::
SR_QMGR_STOP_REQ
;
return
;
}
case
StopRecord
:
:
SR_CLUSTER_SHUTDOWN
:
{
jam
();
break
;
}
}
{
ndbrequire
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
));
NodeState
newState
(
NodeState
::
SL_STOPPING_3
,
true
);
/**
* Inform QMGR so that arbitrator won't kill us
*/
NodeStateRep
*
rep
=
(
NodeStateRep
*
)
&
signal
->
theData
[
0
];
rep
->
nodeState
=
newState
;
rep
->
nodeState
.
masterNodeId
=
cmasterNodeId
;
rep
->
nodeState
.
setNodeGroup
(
c_nodeGroup
);
EXECUTE_DIRECT
(
QMGR
,
GSN_NODE_STATE_REP
,
signal
,
NodeStateRep
::
SignalLength
);
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
)){
jam
();
StartOrd
*
startOrd
=
(
StartOrd
*
)
&
signal
->
theData
[
0
];
startOrd
->
restartInfo
=
c_stopRec
.
stopReq
.
requestInfo
;
sendSignalWithDelay
(
CMVMI_REF
,
GSN_START_ORD
,
signal
,
500
,
StartOrd
::
SignalLength
);
}
else
{
jam
();
sendSignalWithDelay
(
CMVMI_REF
,
GSN_STOP_ORD
,
signal
,
500
,
1
);
}
return
;
}
unblock:
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_UNBLOCK_GCP_START_GCP
;
req
->
requestType
=
WaitGCPReq
::
UnblockStartGcp
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
}
void
Ndbcntr
::
execSTOP_CONF
(
Signal
*
signal
)
{
jamEntry
();
StopConf
*
conf
=
(
StopConf
*
)
signal
->
getDataPtr
();
ndbrequire
(
c_stopRec
.
m_state
==
StopRecord
::
SR_QMGR_STOP_REQ
);
c_stopRec
.
m_stop_req_counter
.
clearWaitingFor
(
conf
->
nodeId
);
if
(
c_stopRec
.
m_stop_req_counter
.
done
())
{
char
buf
[
100
];
NdbNodeBitmask
mask
;
mask
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
infoEvent
(
"Stopping of %s"
,
mask
.
getText
(
buf
));
ndbout_c
(
"Stopping of %s"
,
mask
.
getText
(
buf
));
/**
* Kill any node...
*/
FailRep
*
const
failRep
=
(
FailRep
*
)
&
signal
->
theData
[
0
];
failRep
->
failCause
=
FailRep
::
ZMULTI_NODE_SHUTDOWN
;
NodeReceiverGroup
rg
(
QMGR
,
c_clusterNodes
);
Uint32
nodeId
=
0
;
while
((
nodeId
=
NdbNodeBitmask
::
find
(
c_stopRec
.
stopReq
.
nodes
,
nodeId
+
1
))
!=
NdbNodeBitmask
::
NotFound
)
{
failRep
->
failNodeId
=
nodeId
;
sendSignal
(
rg
,
GSN_FAIL_REP
,
signal
,
FailRep
::
SignalLength
,
JBA
);
}
c_stopRec
.
m_state
=
StopRecord
::
SR_WAIT_NODE_FAILURES
;
return
;
}
return
;
}
void
Ndbcntr
::
execSTTORRY
(
Signal
*
signal
){
...
...
ndb/src/kernel/blocks/qmgr/Qmgr.hpp
View file @
0e410aa1
...
...
@@ -29,6 +29,7 @@
#include <signaldata/CmRegSignalData.hpp>
#include <signaldata/ApiRegSignalData.hpp>
#include <signaldata/FailRep.hpp>
#include <signaldata/StopReq.hpp>
#include "timer.hpp"
...
...
@@ -100,7 +101,12 @@ public:
};
struct
StartRecord
{
void
reset
(){
m_startKey
++
;
m_startNode
=
0
;}
void
reset
(){
m_startKey
++
;
m_startNode
=
0
;
m_gsn
=
RNIL
;
m_nodes
.
clearWaitingFor
();
}
Uint32
m_startKey
;
Uint32
m_startNode
;
Uint64
m_startTimeout
;
...
...
@@ -112,6 +118,14 @@ public:
NdbNodeBitmask
c_definedNodes
;
// DB nodes in config
NdbNodeBitmask
c_clusterNodes
;
// DB nodes in cluster
NodeBitmask
c_connectedNodes
;
// All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask
c_readnodes_nodes
;
Uint32
c_maxDynamicId
;
// Records
...
...
@@ -204,6 +218,7 @@ private:
void
execPRES_TOCONF
(
Signal
*
signal
);
void
execDISCONNECT_REP
(
Signal
*
signal
);
void
execSYSTEM_ERROR
(
Signal
*
signal
);
void
execSTOP_REQ
(
Signal
*
signal
);
// Received signals
void
execDUMP_STATE_ORD
(
Signal
*
signal
);
...
...
@@ -218,6 +233,8 @@ private:
void
execREAD_NODESREQ
(
Signal
*
signal
);
void
execSET_VAR_REQ
(
Signal
*
signal
);
void
execREAD_NODESREF
(
Signal
*
signal
);
void
execREAD_NODESCONF
(
Signal
*
signal
);
void
execAPI_VERSION_REQ
(
Signal
*
signal
);
void
execAPI_BROADCAST_REP
(
Signal
*
signal
);
...
...
@@ -234,6 +251,8 @@ private:
void
execARBIT_STOPREP
(
Signal
*
signal
);
// Statement blocks
void
check_readnodes_reply
(
Signal
*
signal
,
Uint32
nodeId
,
Uint32
gsn
);
void
node_failed
(
Signal
*
signal
,
Uint16
aFailedNode
);
void
checkStartInterface
(
Signal
*
signal
);
void
failReport
(
Signal
*
signal
,
...
...
@@ -251,8 +270,9 @@ private:
// Generated statement blocks
void
startphase1
(
Signal
*
signal
);
void
electionWon
();
void
electionWon
(
Signal
*
signal
);
void
cmInfoconf010Lab
(
Signal
*
signal
);
void
apiHbHandlingLab
(
Signal
*
signal
);
void
timerHandlingLab
(
Signal
*
signal
);
void
hbReceivedLab
(
Signal
*
signal
);
...
...
@@ -387,7 +407,9 @@ private:
Uint16
cfailedNodes
[
MAX_NDB_NODES
];
Uint16
cprepFailedNodes
[
MAX_NDB_NODES
];
Uint16
ccommitFailedNodes
[
MAX_NDB_NODES
];
StopReq
c_stopReq
;
void
check_multi_node_shutdown
(
Signal
*
signal
);
};
#endif
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
View file @
0e410aa1
...
...
@@ -35,9 +35,8 @@ void Qmgr::initData()
Uint32
hbDBAPI
=
500
;
setHbApiDelay
(
hbDBAPI
);
c_connectedNodes
.
clear
();
c_connectedNodes
.
set
(
getOwnNodeId
());
c_stopReq
.
senderRef
=
0
;
}
//Qmgr::initData()
void
Qmgr
::
initRecords
()
...
...
@@ -52,6 +51,7 @@ Qmgr::Qmgr(const class Configuration & conf)
// Transit signals
addRecSignal
(
GSN_DUMP_STATE_ORD
,
&
Qmgr
::
execDUMP_STATE_ORD
);
addRecSignal
(
GSN_STOP_REQ
,
&
Qmgr
::
execSTOP_REQ
);
addRecSignal
(
GSN_DEBUG_SIG
,
&
Qmgr
::
execDEBUG_SIG
);
addRecSignal
(
GSN_CONTINUEB
,
&
Qmgr
::
execCONTINUEB
);
addRecSignal
(
GSN_CM_HEARTBEAT
,
&
Qmgr
::
execCM_HEARTBEAT
);
...
...
@@ -96,6 +96,9 @@ Qmgr::Qmgr(const class Configuration & conf)
addRecSignal
(
GSN_ARBIT_CHOOSEREF
,
&
Qmgr
::
execARBIT_CHOOSEREF
);
addRecSignal
(
GSN_ARBIT_STOPREP
,
&
Qmgr
::
execARBIT_STOPREP
);
addRecSignal
(
GSN_READ_NODESREF
,
&
Qmgr
::
execREAD_NODESREF
);
addRecSignal
(
GSN_READ_NODESCONF
,
&
Qmgr
::
execREAD_NODESCONF
);
initData
();
}
//Qmgr::Qmgr()
...
...
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
View file @
0e410aa1
...
...
@@ -56,6 +56,33 @@
#define DEBUG_START3(signal, msg)
#endif
/**
* c_start.m_gsn = GSN_CM_REGREQ
* Possible for all nodes
* c_start.m_nodes contains all nodes in config
*
* c_start.m_gsn = GSN_CM_NODEINFOREQ;
* Set when receiving CM_REGCONF
* State possible for starting node only (not in cluster)
*
* c_start.m_nodes contains all node in alive cluster that
* that has not replied to GSN_CM_NODEINFOREQ
* passed by president in GSN_CM_REGCONF
*
* c_start.m_gsn = GSN_CM_ADD
* Possible for president only
* Set when receiving and accepting CM_REGREQ (to include node)
*
* c_start.m_nodes contains all nodes in alive cluster + starting node
* that has not replied to GSN_CM_ADD
* by sending GSN_CM_ACKADD
*
* c_start.m_gsn = GSN_CM_NODEINFOCONF
* Possible for non presidents only
* c_start.m_nodes contains a node that has been accepted by president
* but has not connected to us yet
*/
// Signal entries and statement blocks
/* 4 P R O G R A M */
/*******************************/
...
...
@@ -280,18 +307,24 @@ void Qmgr::execCONNECT_REP(Signal* signal)
{
jamEntry
();
const
Uint32
nodeId
=
signal
->
theData
[
0
];
if
(
ERROR_INSERTED
(
931
))
{
jam
();
ndbout_c
(
"Discarding CONNECT_REP(%d)"
,
nodeId
);
infoEvent
(
"Discarding CONNECT_REP(%d)"
,
nodeId
);
return
;
}
c_connectedNodes
.
set
(
nodeId
);
NodeRecPtr
nodePtr
;
nodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
nodePtr
,
MAX_NODES
,
nodeRec
);
switch
(
nodePtr
.
p
->
phase
){
case
ZSTARTING
:
case
ZRUNNING
:
ndbrequire
(
!
c_clusterNodes
.
get
(
nodeId
));
case
ZSTARTING
:
jam
();
if
(
!
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
)){
jam
();
return
;
}
break
;
case
ZPREPARE_FAIL
:
case
ZFAIL_CLOSING
:
...
...
@@ -303,35 +336,83 @@ void Qmgr::execCONNECT_REP(Signal* signal)
case
ZAPI_INACTIVE
:
return
;
}
if
(
getNodeInfo
(
nodeId
).
getType
()
!=
NodeInfo
::
DB
)
{
jam
();
return
;
}
switch
(
c_start
.
m_gsn
){
case
GSN_CM_REGREQ
:
jam
();
sendCmRegReq
(
signal
,
nodeId
);
/**
* We're waiting for CM_REGCONF c_start.m_nodes contains all configured
* nodes
*/
ndbrequire
(
nodePtr
.
p
->
phase
==
ZSTARTING
);
ndbrequire
(
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
));
return
;
case
GSN_CM_NODEINFOREQ
:
jam
();
sendCmNodeInfoReq
(
signal
,
nodeId
,
nodePtr
.
p
);
if
(
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
))
{
jam
();
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
ndbrequire
(
nodePtr
.
p
->
phase
==
ZSTARTING
);
sendCmNodeInfoReq
(
signal
,
nodeId
,
nodePtr
.
p
);
return
;
}
return
;
case
GSN_CM_
ADD
:{
case
GSN_CM_
NODEINFOCONF
:{
jam
();
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
c_start
.
m_nodes
.
clearWaitingFor
(
nodeId
);
c_start
.
m_gsn
=
RNIL
;
NodeRecPtr
addNodePtr
;
addNodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
cmAddPrepare
(
signal
,
addNodePtr
,
nodePtr
.
p
);
return
;
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
ndbrequire
(
nodePtr
.
p
->
phase
==
ZRUNNING
);
if
(
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
))
{
jam
();
c_start
.
m_nodes
.
clearWaitingFor
(
nodeId
);
c_start
.
m_gsn
=
RNIL
;
NodeRecPtr
addNodePtr
;
addNodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
cmAddPrepare
(
signal
,
addNodePtr
,
nodePtr
.
p
);
return
;
}
}
default:
return
;
(
void
)
1
;
}
ndbrequire
(
!
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
));
ndbrequire
(
!
c_readnodes_nodes
.
get
(
nodeId
));
c_readnodes_nodes
.
set
(
nodeId
);
signal
->
theData
[
0
]
=
reference
();
sendSignal
(
calcQmgrBlockRef
(
nodeId
),
GSN_READ_NODESREQ
,
signal
,
1
,
JBA
);
return
;
}
//Qmgr::execCONNECT_REP()
void
Qmgr
::
execREAD_NODESCONF
(
Signal
*
signal
)
{
check_readnodes_reply
(
signal
,
refToNode
(
signal
->
getSendersBlockRef
()),
GSN_READ_NODESCONF
);
}
void
Qmgr
::
execREAD_NODESREF
(
Signal
*
signal
)
{
check_readnodes_reply
(
signal
,
refToNode
(
signal
->
getSendersBlockRef
()),
GSN_READ_NODESREF
);
}
/*******************************/
/* CM_INFOCONF */
/*******************************/
...
...
@@ -622,22 +703,33 @@ void Qmgr::execCM_REGCONF(Signal* signal)
jamEntry
();
const
CmRegConf
*
const
cmRegConf
=
(
CmRegConf
*
)
&
signal
->
theData
[
0
];
Uint32
presidentNodeId
=
cmRegConf
->
presidentNodeId
;
if
(
!
ndbCompatible_ndb_ndb
(
NDB_VERSION
,
cmRegConf
->
presidentVersion
))
{
jam
();
char
buf
[
128
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"incompatible version own=0x%x other=0x%x, shutting down"
,
NDB_VERSION
,
cmRegConf
->
presidentVersion
);
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"incompatible version own=0x%x other=0x%x, "
" shutting down"
,
NDB_VERSION
,
cmRegConf
->
presidentVersion
);
systemErrorLab
(
signal
,
__LINE__
,
buf
);
return
;
}
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
ndbrequire
(
c_start
.
m_gsn
==
GSN_CM_REGREQ
);
ndbrequire
(
myNodePtr
.
p
->
phase
=
ZSTARTING
);
cpdistref
=
cmRegConf
->
presidentBlockRef
;
cpresident
=
cmRegConf
->
presidentNodeId
;
UintR
TdynamicId
=
cmRegConf
->
dynamicId
;
c_maxDynamicId
=
TdynamicId
;
c_clusterNodes
.
assign
(
NdbNodeBitmask
::
Size
,
cmRegConf
->
allNdbNodes
);
myNodePtr
.
p
->
ndynamicId
=
TdynamicId
;
/*--------------------------------------------------------------*/
// Send this as an EVENT REPORT to inform about hearing about
// other NDB node proclaiming to be president.
...
...
@@ -648,10 +740,6 @@ void Qmgr::execCM_REGCONF(Signal* signal)
signal
->
theData
[
3
]
=
TdynamicId
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
4
,
JBB
);
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
myNodePtr
.
p
->
ndynamicId
=
TdynamicId
;
for
(
nodePtr
.
i
=
1
;
nodePtr
.
i
<
MAX_NDB_NODES
;
nodePtr
.
i
++
)
{
jam
();
if
(
c_clusterNodes
.
get
(
nodePtr
.
i
)){
...
...
@@ -674,6 +762,84 @@ void Qmgr::execCM_REGCONF(Signal* signal)
return
;
}
//Qmgr::execCM_REGCONF()
void
Qmgr
::
check_readnodes_reply
(
Signal
*
signal
,
Uint32
nodeId
,
Uint32
gsn
)
{
NodeRecPtr
myNodePtr
;
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
NodeRecPtr
nodePtr
;
nodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
nodePtr
,
MAX_NDB_NODES
,
nodeRec
);
ndbrequire
(
c_readnodes_nodes
.
get
(
nodeId
));
ReadNodesConf
*
conf
=
(
ReadNodesConf
*
)
signal
->
getDataPtr
();
if
(
gsn
==
GSN_READ_NODESREF
)
{
jam
();
retry:
signal
->
theData
[
0
]
=
reference
();
sendSignal
(
calcQmgrBlockRef
(
nodeId
),
GSN_READ_NODESREQ
,
signal
,
1
,
JBA
);
return
;
}
if
(
conf
->
masterNodeId
==
ZNIL
)
{
jam
();
goto
retry
;
}
Uint32
president
=
conf
->
masterNodeId
;
if
(
president
==
cpresident
)
{
jam
();
c_readnodes_nodes
.
clear
(
nodeId
);
return
;
}
char
buf
[
255
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"Partitioned cluster! check StartPartialTimeout, "
" node %d thinks %d is president, "
" I think president is: %d"
,
nodeId
,
president
,
cpresident
);
ndbout_c
(
buf
);
CRASH_INSERTION
(
933
);
if
(
getNodeState
().
startLevel
==
NodeState
::
SL_STARTED
)
{
jam
();
NdbNodeBitmask
part
;
part
.
assign
(
NdbNodeBitmask
::
Size
,
conf
->
clusterNodes
);
FailRep
*
rep
=
(
FailRep
*
)
signal
->
getDataPtrSend
();
rep
->
failCause
=
FailRep
::
ZPARTITIONED_CLUSTER
;
rep
->
president
=
cpresident
;
c_clusterNodes
.
copyto
(
NdbNodeBitmask
::
Size
,
rep
->
partition
);
Uint32
ref
=
calcQmgrBlockRef
(
nodeId
);
Uint32
i
=
0
;
while
((
i
=
part
.
find
(
i
+
1
))
!=
NdbNodeBitmask
::
NotFound
)
{
if
(
i
==
nodeId
)
continue
;
rep
->
failNodeId
=
i
;
sendSignal
(
ref
,
GSN_FAIL_REP
,
signal
,
FailRep
::
SignalLength
,
JBA
);
}
rep
->
failNodeId
=
nodeId
;
sendSignal
(
ref
,
GSN_FAIL_REP
,
signal
,
FailRep
::
SignalLength
,
JBB
);
return
;
}
CRASH_INSERTION
(
932
);
progError
(
__LINE__
,
ERR_ARBIT_SHUTDOWN
,
buf
);
ndbrequire
(
false
);
}
void
Qmgr
::
sendCmNodeInfoReq
(
Signal
*
signal
,
Uint32
nodeId
,
const
NodeRec
*
self
){
CmNodeInfoReq
*
const
req
=
(
CmNodeInfoReq
*
)
signal
->
getDataPtrSend
();
...
...
@@ -706,13 +872,15 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
void
Qmgr
::
execCM_REGREF
(
Signal
*
signal
)
{
jamEntry
();
c_regReqReqRecv
++
;
// Ignore block reference in data[0]
UintR
TaddNodeno
=
signal
->
theData
[
1
];
UintR
TrefuseReason
=
signal
->
theData
[
2
];
Uint32
candidate
=
signal
->
theData
[
3
];
DEBUG_START3
(
signal
,
TrefuseReason
);
c_regReqReqRecv
++
;
// Ignore block reference in data[0]
if
(
candidate
!=
cpresidentCandidate
){
jam
();
...
...
@@ -800,7 +968,7 @@ void Qmgr::execCM_REGREF(Signal* signal)
Uint64
now
=
NdbTick_CurrentMillisecond
();
if
((
c_regReqReqRecv
==
cnoOfNodes
)
||
now
>
c_stopElectionTime
){
jam
();
electionWon
();
electionWon
(
signal
);
sendSttorryLab
(
signal
);
/**
...
...
@@ -814,7 +982,7 @@ void Qmgr::execCM_REGREF(Signal* signal)
}
//Qmgr::execCM_REGREF()
void
Qmgr
::
electionWon
(){
Qmgr
::
electionWon
(
Signal
*
signal
){
NodeRecPtr
myNodePtr
;
cpresident
=
getOwnNodeId
();
/* This node becomes president. */
myNodePtr
.
i
=
getOwnNodeId
();
...
...
@@ -833,6 +1001,12 @@ Qmgr::electionWon(){
cpresidentAlive
=
ZTRUE
;
c_stopElectionTime
=
~
0
;
c_start
.
reset
();
signal
->
theData
[
0
]
=
EventReport
::
CM_REGCONF
;
signal
->
theData
[
1
]
=
getOwnNodeId
();
signal
->
theData
[
2
]
=
cpresident
;
signal
->
theData
[
3
]
=
1
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
4
,
JBB
);
}
/*
...
...
@@ -967,7 +1141,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
ndbrequire
(
signal
->
header
.
theVerId_signalNumber
==
GSN_CM_ADD
);
c_start
.
m_nodes
.
clearWaitingFor
();
c_start
.
m_nodes
.
setWaitingFor
(
nodePtr
.
i
);
c_start
.
m_gsn
=
GSN_CM_
ADD
;
c_start
.
m_gsn
=
GSN_CM_
NODEINFOCONF
;
#else
warningEvent
(
"Enabling communication to CM_ADD node %u state=%d"
,
nodePtr
.
i
,
...
...
@@ -1872,7 +2046,8 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
const
Uint32
nodeId
=
rep
->
nodeId
;
const
Uint32
err
=
rep
->
err
;
c_connectedNodes
.
clear
(
nodeId
);
c_readnodes_nodes
.
clear
(
nodeId
);
NodeRecPtr
nodePtr
;
nodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
nodePtr
,
MAX_NODES
,
nodeRec
);
...
...
@@ -1893,9 +2068,13 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
case
ZAPI_ACTIVE
:
ndbrequire
(
false
);
case
ZAPI_INACTIVE
:
{
char
buf
[
100
];
BaseString
::
snprintf
(
buf
,
100
,
"Node %u disconected"
,
nodeId
);
progError
(
__LINE__
,
ERR_SR_OTHERNODEFAILED
,
buf
);
ndbrequire
(
false
);
}
}
node_failed
(
signal
,
nodeId
);
}
//DISCONNECT_REP
...
...
@@ -2150,10 +2329,16 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
failedNodePtr
.
i
=
aFailedNode
;
ptrCheckGuard
(
failedNodePtr
,
MAX_NODES
,
nodeRec
);
FailRep
*
rep
=
(
FailRep
*
)
signal
->
getDataPtr
();
check_multi_node_shutdown
(
signal
);
if
(
failedNodePtr
.
i
==
getOwnNodeId
())
{
jam
();
Uint32
code
=
0
;
const
char
*
msg
=
0
;
char
extra
[
100
];
switch
(
aFailCause
){
case
FailRep
:
:
ZOWN_FAILURE
:
msg
=
"Own failure"
;
...
...
@@ -2174,17 +2359,46 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
case
FailRep
:
:
ZLINK_FAILURE
:
msg
=
"Connection failure"
;
break
;
case
FailRep
:
:
ZPARTITIONED_CLUSTER
:
{
code
=
ERR_ARBIT_SHUTDOWN
;
char
buf1
[
100
],
buf2
[
100
];
c_clusterNodes
.
getText
(
buf1
);
if
(
signal
->
getLength
()
==
FailRep
::
SignalLength
+
FailRep
::
ExtraLength
&&
signal
->
header
.
theVerId_signalNumber
==
GSN_FAIL_REP
)
{
jam
();
NdbNodeBitmask
part
;
part
.
assign
(
NdbNodeBitmask
::
Size
,
rep
->
partition
);
part
.
getText
(
buf2
);
BaseString
::
snprintf
(
extra
,
sizeof
(
extra
),
"Partitioned cluster!"
" Our cluster: %s other cluster: %s"
,
buf1
,
buf2
);
}
else
{
jam
();
BaseString
::
snprintf
(
extra
,
sizeof
(
extra
),
"Partitioned cluster!"
" Our cluster: %s "
,
buf1
);
}
msg
=
extra
;
break
;
}
}
char
buf
[
100
];
BaseString
::
snprintf
(
buf
,
100
,
CRASH_INSERTION
(
932
);
char
buf
[
255
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"We(%u) have been declared dead by %u reason: %s(%u)"
,
getOwnNodeId
(),
refToNode
(
signal
->
getSendersBlockRef
()),
aFailCause
,
msg
?
msg
:
"<Unknown>"
);
progError
(
__LINE__
,
0
,
buf
);
progError
(
__LINE__
,
code
,
buf
);
return
;
}
//if
...
...
@@ -2241,7 +2455,9 @@ void Qmgr::execPREP_FAILREQ(Signal* signal)
{
NodeRecPtr
myNodePtr
;
jamEntry
();
check_multi_node_shutdown
(
signal
);
PrepFailReqRef
*
const
prepFail
=
(
PrepFailReqRef
*
)
&
signal
->
theData
[
0
];
BlockReference
Tblockref
=
prepFail
->
xxxBlockRef
;
...
...
@@ -3893,6 +4109,7 @@ Qmgr::stateArbitCrash(Signal* signal)
if
(
!
(
arbitRec
.
getTimediff
()
>
getArbitTimeout
()))
return
;
#endif
CRASH_INSERTION
(
932
);
progError
(
__LINE__
,
NDBD_EXIT_ARBIT_SHUTDOWN
,
"Arbitrator decided to shutdown this node"
);
}
...
...
@@ -4054,3 +4271,40 @@ Qmgr::execAPI_BROADCAST_REP(Signal* signal)
NodeReceiverGroup
rg
(
API_CLUSTERMGR
,
mask
);
sendSignal
(
rg
,
api
.
gsn
,
signal
,
len
,
JBB
);
// forward sections
}
void
Qmgr
::
execSTOP_REQ
(
Signal
*
signal
)
{
jamEntry
();
c_stopReq
=
*
(
StopReq
*
)
signal
->
getDataPtr
();
if
(
c_stopReq
.
senderRef
)
{
ndbrequire
(
NdbNodeBitmask
::
get
(
c_stopReq
.
nodes
,
getOwnNodeId
()));
StopConf
*
conf
=
(
StopConf
*
)
signal
->
getDataPtrSend
();
conf
->
senderData
=
c_stopReq
.
senderData
;
conf
->
nodeState
=
getOwnNodeId
();
sendSignal
(
c_stopReq
.
senderRef
,
GSN_STOP_CONF
,
signal
,
StopConf
::
SignalLength
,
JBA
);
}
}
void
Qmgr
::
check_multi_node_shutdown
(
Signal
*
signal
)
{
if
(
c_stopReq
.
senderRef
&&
NdbNodeBitmask
::
get
(
c_stopReq
.
nodes
,
getOwnNodeId
()))
{
jam
();
if
(
StopReq
::
getPerformRestart
(
c_stopReq
.
requestInfo
))
{
jam
();
StartOrd
*
startOrd
=
(
StartOrd
*
)
&
signal
->
theData
[
0
];
startOrd
->
restartInfo
=
c_stopReq
.
requestInfo
;
EXECUTE_DIRECT
(
CMVMI
,
GSN_START_ORD
,
signal
,
2
);
}
else
{
EXECUTE_DIRECT
(
CMVMI
,
GSN_STOP_ORD
,
signal
,
1
);
}
}
}
ndb/test/ndbapi/testNodeRestart.cpp
View file @
0e410aa1
...
...
@@ -22,7 +22,7 @@
#include <NdbRestarts.hpp>
#include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
int
runLoadTable
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
...
...
@@ -669,6 +669,206 @@ err:
return
NDBT_FAILED
;
}
int
runBug18612
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
// Assume two replicas
NdbRestarter
restarter
;
if
(
restarter
.
getNumDbNodes
()
<
2
)
{
ctx
->
stopTest
();
return
NDBT_OK
;
}
Uint32
cnt
=
restarter
.
getNumDbNodes
();
for
(
int
loop
=
0
;
loop
<
ctx
->
getNumLoops
();
loop
++
)
{
int
partition0
[
256
];
int
partition1
[
256
];
bzero
(
partition0
,
sizeof
(
partition0
));
bzero
(
partition1
,
sizeof
(
partition1
));
Bitmask
<
4
>
nodesmask
;
Uint32
node1
=
restarter
.
getDbNodeId
(
rand
()
%
cnt
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
{
do
{
int
tmp
=
restarter
.
getRandomNodeOtherNodeGroup
(
node1
,
rand
());
if
(
tmp
==
-
1
)
break
;
node1
=
tmp
;
}
while
(
nodesmask
.
get
(
node1
));
partition0
[
i
]
=
node1
;
partition1
[
i
]
=
restarter
.
getRandomNodeSameNodeGroup
(
node1
,
rand
());
ndbout_c
(
"nodes %d %d"
,
node1
,
partition1
[
i
]);
assert
(
!
nodesmask
.
get
(
node1
));
assert
(
!
nodesmask
.
get
(
partition1
[
i
]));
nodesmask
.
set
(
node1
);
nodesmask
.
set
(
partition1
[
i
]);
}
ndbout_c
(
"done"
);
int
dump
[
255
];
dump
[
0
]
=
DumpStateOrd
::
NdbcntrStopNodes
;
memcpy
(
dump
+
1
,
partition0
,
sizeof
(
int
)
*
cnt
/
2
);
Uint32
master
=
restarter
.
getMasterNodeId
();
if
(
restarter
.
dumpStateOneNode
(
master
,
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
int
val2
[]
=
{
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
,
1
};
if
(
restarter
.
dumpStateAllNodes
(
val2
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
insertErrorInAllNodes
(
932
))
return
NDBT_FAILED
;
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition0
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition1
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition1
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition0
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
startNodes
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesStartPhase
(
partition0
,
cnt
/
2
,
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9001
;
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateAllNodes
(
dump
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
restartOneDbNode
(
partition0
[
i
],
true
,
true
,
true
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
startAll
())
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterStarted
())
return
NDBT_FAILED
;
}
return
NDBT_OK
;
}
int
runBug18612SR
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
// Assume two replicas
NdbRestarter
restarter
;
if
(
restarter
.
getNumDbNodes
()
<
2
)
{
ctx
->
stopTest
();
return
NDBT_OK
;
}
Uint32
cnt
=
restarter
.
getNumDbNodes
();
for
(
int
loop
=
0
;
loop
<
ctx
->
getNumLoops
();
loop
++
)
{
int
partition0
[
256
];
int
partition1
[
256
];
bzero
(
partition0
,
sizeof
(
partition0
));
bzero
(
partition1
,
sizeof
(
partition1
));
Bitmask
<
4
>
nodesmask
;
Uint32
node1
=
restarter
.
getDbNodeId
(
rand
()
%
cnt
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
{
do
{
int
tmp
=
restarter
.
getRandomNodeOtherNodeGroup
(
node1
,
rand
());
if
(
tmp
==
-
1
)
break
;
node1
=
tmp
;
}
while
(
nodesmask
.
get
(
node1
));
partition0
[
i
]
=
node1
;
partition1
[
i
]
=
restarter
.
getRandomNodeSameNodeGroup
(
node1
,
rand
());
ndbout_c
(
"nodes %d %d"
,
node1
,
partition1
[
i
]);
assert
(
!
nodesmask
.
get
(
node1
));
assert
(
!
nodesmask
.
get
(
partition1
[
i
]));
nodesmask
.
set
(
node1
);
nodesmask
.
set
(
partition1
[
i
]);
}
ndbout_c
(
"done"
);
if
(
restarter
.
restartAll
(
false
,
true
,
false
))
return
NDBT_FAILED
;
int
dump
[
255
];
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition0
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition1
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition1
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition0
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
int
val2
[]
=
{
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
,
1
};
if
(
restarter
.
dumpStateAllNodes
(
val2
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
insertErrorInAllNodes
(
932
))
return
NDBT_FAILED
;
if
(
restarter
.
startAll
())
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterStartPhase
(
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9001
;
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateAllNodes
(
dump
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterNoStart
(
30
))
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
,
10
))
if
(
restarter
.
waitNodesNoStart
(
partition1
,
cnt
/
2
,
10
))
return
NDBT_FAILED
;
if
(
restarter
.
startAll
())
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterStarted
())
return
NDBT_FAILED
;
}
return
NDBT_OK
;
}
NDBT_TESTSUITE
(
testNodeRestart
);
TESTCASE
(
"NoLoad"
,
"Test that one node at a time can be stopped and then restarted "
\
...
...
@@ -963,6 +1163,18 @@ TESTCASE("Bug18414",
STEP
(
runBug18414
);
FINALIZER
(
runClearTable
);
}
TESTCASE
(
"Bug18612"
,
"Test bug with partitioned clusters"
){
INITIALIZER
(
runLoadTable
);
STEP
(
runBug18612
);
FINALIZER
(
runClearTable
);
}
TESTCASE
(
"Bug18612SR"
,
"Test bug with partitioned clusters"
){
INITIALIZER
(
runLoadTable
);
STEP
(
runBug18612SR
);
FINALIZER
(
runClearTable
);
}
NDBT_TESTSUITE_END
(
testNodeRestart
);
int
main
(
int
argc
,
const
char
**
argv
){
...
...
ndb/test/run-test/daily-basic-tests.txt
View file @
0e410aa1
...
...
@@ -433,10 +433,18 @@ args: -n Bug16772 T1
#cmd: testSystemRestart
#args: -n Bug18385 T1
#
max-time:
5
00
max-time:
10
00
cmd: testNodeRestart
args: -n Bug18414 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612SR T1
# OLD FLEX
max-time: 500
cmd: flexBench
...
...
ndb/test/src/NdbRestarts.cpp
View file @
0e410aa1
...
...
@@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter,
<<
") secs "
<<
endl
;
NdbSleep_SecSleep
(
seconds
);
randomId
=
(
rand
()
%
_restarter
.
getNumDbNodes
());
nodeId
=
_restarter
.
getDbNodeId
(
randomId
);
nodeId
=
_restarter
.
getRandomNodeOtherNodeGroup
(
nodeId
,
rand
());
g_info
<<
_restart
->
m_name
<<
": node = "
<<
nodeId
<<
endl
;
CHECK
(
_restarter
.
insertErrorInNode
(
nodeId
,
9999
)
==
0
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment