Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
1b5ce338
Commit
1b5ce338
authored
Apr 06, 2006
by
unknown
Browse files
Options
Browse Files
Download
Plain Diff
Merge perch.ndb.mysql.com:/home/jonas/src/mysql-5.0
into perch.ndb.mysql.com:/home/jonas/src/50-jonas
parents
9a1d80e9
71a1864f
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1715 additions
and
235 deletions
+1715
-235
ndb/include/kernel/signaldata/CmRegSignalData.hpp
ndb/include/kernel/signaldata/CmRegSignalData.hpp
+23
-6
ndb/include/kernel/signaldata/DumpStateOrd.hpp
ndb/include/kernel/signaldata/DumpStateOrd.hpp
+1
-0
ndb/include/kernel/signaldata/FailRep.hpp
ndb/include/kernel/signaldata/FailRep.hpp
+13
-3
ndb/include/kernel/signaldata/StopReq.hpp
ndb/include/kernel/signaldata/StopReq.hpp
+22
-16
ndb/include/kernel/signaldata/WaitGCP.hpp
ndb/include/kernel/signaldata/WaitGCP.hpp
+5
-2
ndb/include/mgmapi/ndb_logevent.h
ndb/include/mgmapi/ndb_logevent.h
+12
-1
ndb/src/common/debugger/EventLogger.cpp
ndb/src/common/debugger/EventLogger.cpp
+85
-0
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
+53
-13
ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
+1
-0
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+80
-26
ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
+11
-0
ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
+1
-0
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+303
-64
ndb/src/kernel/blocks/qmgr/Qmgr.hpp
ndb/src/kernel/blocks/qmgr/Qmgr.hpp
+46
-8
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
+8
-2
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+783
-90
ndb/src/kernel/vm/Configuration.cpp
ndb/src/kernel/vm/Configuration.cpp
+45
-0
ndb/test/ndbapi/testNodeRestart.cpp
ndb/test/ndbapi/testNodeRestart.cpp
+213
-1
ndb/test/run-test/daily-basic-tests.txt
ndb/test/run-test/daily-basic-tests.txt
+9
-1
ndb/test/src/NdbRestarts.cpp
ndb/test/src/NdbRestarts.cpp
+1
-2
No files found.
ndb/include/kernel/signaldata/CmRegSignalData.hpp
View file @
1b5ce338
...
...
@@ -30,12 +30,17 @@ class CmRegReq {
friend
class
Qmgr
;
public:
STATIC_CONST
(
SignalLength
=
3
);
STATIC_CONST
(
SignalLength
=
5
+
NdbNodeBitmask
::
Size
);
private:
Uint32
blockRef
;
Uint32
nodeId
;
Uint32
version
;
// See ndb_version.h
Uint32
version
;
// See ndb_version.h
Uint32
start_type
;
// As specified by cmd-line or mgm, NodeState::StartType
Uint32
latest_gci
;
// 0 means no fs
Uint32
skip_nodes
[
NdbNodeBitmask
::
Size
];
// Nodes that does not _need_
// to be part of restart
};
/**
...
...
@@ -59,8 +64,7 @@ private:
* The dynamic id that the node reciving this signal has
*/
Uint32
dynamicId
;
Uint32
allNdbNodes
[
NdbNodeBitmask
::
Size
];
Uint32
allNdbNodes
[
NdbNodeBitmask
::
Size
];
};
/**
...
...
@@ -73,7 +77,7 @@ class CmRegRef {
friend
class
Qmgr
;
public:
STATIC_CONST
(
SignalLength
=
4
);
STATIC_CONST
(
SignalLength
=
7
+
NdbNodeBitmask
::
Size
);
enum
ErrorCode
{
ZBUSY
=
0
,
/* Only the president can send this */
...
...
@@ -85,14 +89,27 @@ public:
* as president. */
ZNOT_PRESIDENT
=
5
,
/* We are not president */
ZNOT_DEAD
=
6
,
/* We are not dead when we are starting */
ZINCOMPATIBLE_VERSION
=
7
ZINCOMPATIBLE_VERSION
=
7
,
ZINCOMPATIBLE_START_TYPE
=
8
};
private:
Uint32
blockRef
;
Uint32
nodeId
;
Uint32
errorCode
;
/**
* Applicable if ZELECTION
*/
Uint32
presidentCandidate
;
Uint32
candidate_latest_gci
;
// 0 means non
/**
* Data for sending node sending node
*/
Uint32
latest_gci
;
Uint32
start_type
;
Uint32
skip_nodes
[
NdbNodeBitmask
::
Size
];
// Nodes that does not _need_
// to be part of restart
};
class
CmAdd
{
...
...
ndb/include/kernel/signaldata/DumpStateOrd.hpp
View file @
1b5ce338
...
...
@@ -64,6 +64,7 @@ public:
// 19 NDBFS Fipple with O_SYNC, O_CREATE etc.
// 20-24 BACKUP
NdbcntrTestStopOnError
=
25
,
NdbcntrStopNodes
=
70
,
// 100-105 TUP and ACC
// 200-240 UTIL
// 300-305 TRIX
...
...
ndb/include/kernel/signaldata/FailRep.hpp
View file @
1b5ce338
...
...
@@ -18,6 +18,7 @@
#define FAIL_REP_HPP
#include "SignalData.hpp"
#include <NodeBitmask.hpp>
/**
*
...
...
@@ -27,6 +28,7 @@ class FailRep {
* Sender(s) & Reciver(s)
*/
friend
class
Qmgr
;
friend
class
Ndbcntr
;
/**
* For printing
...
...
@@ -35,7 +37,8 @@ class FailRep {
public:
STATIC_CONST
(
SignalLength
=
2
);
STATIC_CONST
(
ExtraLength
=
1
+
NdbNodeBitmask
::
Size
);
enum
FailCause
{
ZOWN_FAILURE
=
0
,
ZOTHER_NODE_WHEN_WE_START
=
1
,
...
...
@@ -43,13 +46,20 @@ public:
ZSTART_IN_REGREQ
=
3
,
ZHEARTBEAT_FAILURE
=
4
,
ZLINK_FAILURE
=
5
,
ZOTHERNODE_FAILED_DURING_START
=
6
ZOTHERNODE_FAILED_DURING_START
=
6
,
ZMULTI_NODE_SHUTDOWN
=
7
,
ZPARTITIONED_CLUSTER
=
8
};
private:
Uint32
failNodeId
;
Uint32
failCause
;
/**
* Used when failCause == ZPARTITIONED_CLUSTER
*/
Uint32
president
;
Uint32
partition
[
NdbNodeBitmask
::
Size
];
};
...
...
ndb/include/kernel/signaldata/StopReq.hpp
View file @
1b5ce338
...
...
@@ -32,7 +32,7 @@ class StopReq
friend
class
MgmtSrvr
;
public:
STATIC_CONST
(
SignalLength
=
9
);
STATIC_CONST
(
SignalLength
=
9
+
NdbNodeBitmask
::
Size
);
public:
Uint32
senderRef
;
...
...
@@ -49,29 +49,34 @@ public:
Int32
readOperationTimeout
;
// Timeout before read operations are aborted
Int32
operationTimeout
;
// Timeout before all operations are aborted
Uint32
nodes
[
NdbNodeBitmask
::
Size
];
static
void
setSystemStop
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setPerformRestart
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setNoStart
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setInitialStart
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setEscalateOnNodeFail
(
Uint32
&
requestInfo
,
bool
value
);
/**
* Don't perform "graceful" shutdown/restart...
*/
static
void
setStopAbort
(
Uint32
&
requestInfo
,
bool
value
);
static
void
setStopNodes
(
Uint32
&
requestInfo
,
bool
value
);
static
bool
getSystemStop
(
const
Uint32
&
requestInfo
);
static
bool
getPerformRestart
(
const
Uint32
&
requestInfo
);
static
bool
getNoStart
(
const
Uint32
&
requestInfo
);
static
bool
getInitialStart
(
const
Uint32
&
requestInfo
);
static
bool
getEscalateOnNodeFail
(
const
Uint32
&
requestInfo
);
static
bool
getStopAbort
(
const
Uint32
&
requestInfo
);
static
bool
getStopNodes
(
const
Uint32
&
requestInfo
);
};
struct
StopConf
{
STATIC_CONST
(
SignalLength
=
2
);
Uint32
senderData
;
Uint32
nodeState
;
union
{
Uint32
nodeState
;
Uint32
nodeId
;
};
};
class
StopRef
...
...
@@ -94,7 +99,9 @@ public:
NodeShutdownInProgress
=
1
,
SystemShutdownInProgress
=
2
,
NodeShutdownWouldCauseSystemCrash
=
3
,
TransactionAbortFailed
=
4
TransactionAbortFailed
=
4
,
UnsupportedNodeShutdown
=
5
,
MultiNodeShutdownNotMaster
=
6
};
public:
...
...
@@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo)
inline
bool
StopReq
::
get
EscalateOnNodeFail
(
const
Uint32
&
requestInfo
)
StopReq
::
get
StopAbort
(
const
Uint32
&
requestInfo
)
{
return
requestInfo
&
16
;
return
requestInfo
&
32
;
}
inline
bool
StopReq
::
getStop
Abort
(
const
Uint32
&
requestInfo
)
StopReq
::
getStop
Nodes
(
const
Uint32
&
requestInfo
)
{
return
requestInfo
&
32
;
return
requestInfo
&
64
;
}
...
...
@@ -187,24 +194,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value)
inline
void
StopReq
::
set
EscalateOnNodeFail
(
Uint32
&
requestInfo
,
bool
value
)
StopReq
::
set
StopAbort
(
Uint32
&
requestInfo
,
bool
value
)
{
if
(
value
)
requestInfo
|=
16
;
requestInfo
|=
32
;
else
requestInfo
&=
~
16
;
requestInfo
&=
~
32
;
}
inline
void
StopReq
::
setStop
Abort
(
Uint32
&
requestInfo
,
bool
value
)
StopReq
::
setStop
Nodes
(
Uint32
&
requestInfo
,
bool
value
)
{
if
(
value
)
requestInfo
|=
32
;
requestInfo
|=
64
;
else
requestInfo
&=
~
32
;
requestInfo
&=
~
64
;
}
#endif
ndb/include/kernel/signaldata/WaitGCP.hpp
View file @
1b5ce338
...
...
@@ -46,7 +46,9 @@ public:
Complete
=
1
,
///< Wait for a GCP to complete
CompleteForceStart
=
2
,
///< Wait for a GCP to complete start one if needed
CompleteIfRunning
=
3
,
///< Wait for ongoing GCP
CurrentGCI
=
8
///< Immediately return current GCI
CurrentGCI
=
8
,
///< Immediately return current GCI
BlockStartGcp
=
9
,
UnblockStartGcp
=
10
};
Uint32
senderRef
;
...
...
@@ -70,11 +72,12 @@ class WaitGCPConf {
//friend class Grep::PSCoord;
public:
STATIC_CONST
(
SignalLength
=
2
);
STATIC_CONST
(
SignalLength
=
3
);
public:
Uint32
senderData
;
Uint32
gcp
;
Uint32
blockStatus
;
};
class
WaitGCPRef
{
...
...
ndb/include/mgmapi/ndb_logevent.h
View file @
1b5ce338
...
...
@@ -166,10 +166,14 @@ extern "C" {
/** NDB_MGM_EVENT_CATEGORY_BACKUP */
NDB_LE_BackupCompleted
=
56
,
/** NDB_MGM_EVENT_CATEGORY_BACKUP */
NDB_LE_BackupAborted
=
57
NDB_LE_BackupAborted
=
57
,
/* 58 used in 5.1 */
/* 59 used */
/** NDB_MGM_EVENT_CATEGORY_STARTUP */
NDB_LE_StartReport
=
60
/* 60 unused */
/* 61 unused */
/* 62 unused */
...
...
@@ -625,6 +629,13 @@ extern "C" {
unsigned
type
;
unsigned
node_id
;
}
SingleUser
;
/** Log even data @ref NDB_LE_StartReport */
struct
{
unsigned
report_type
;
unsigned
remaining_time
;
unsigned
bitmask_size
;
unsigned
bitmask_data
[
1
];
}
StartReport
;
#ifndef DOXYGEN_FIX
};
#else
...
...
ndb/src/common/debugger/EventLogger.cpp
View file @
1b5ce338
...
...
@@ -707,6 +707,90 @@ void getTextSingleUser(QQQQ) {
}
}
void
getTextStartReport
(
QQQQ
)
{
Uint32
time
=
theData
[
2
];
Uint32
sz
=
theData
[
3
];
char
mask1
[
100
];
char
mask2
[
100
];
char
mask3
[
100
];
char
mask4
[
100
];
BitmaskImpl
::
getText
(
sz
,
theData
+
4
+
(
0
*
sz
),
mask1
);
BitmaskImpl
::
getText
(
sz
,
theData
+
4
+
(
1
*
sz
),
mask2
);
BitmaskImpl
::
getText
(
sz
,
theData
+
4
+
(
2
*
sz
),
mask3
);
BitmaskImpl
::
getText
(
sz
,
theData
+
4
+
(
3
*
sz
),
mask4
);
switch
(
theData
[
1
]){
case
1
:
// Wait initial
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Initial start, waiting for %s to connect, "
" nodes [ all: %s connected: %s no-wait: %s ]"
,
mask4
,
mask1
,
mask2
,
mask3
);
break
;
case
2
:
// Wait partial
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Waiting until nodes: %s connects, "
"nodes [ all: %s connected: %s no-wait: %s ]"
,
mask4
,
mask1
,
mask2
,
mask3
);
break
;
case
3
:
// Wait partial timeout
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Waiting %u sec for nodes %s to connect, "
"nodes [ all: %s connected: %s no-wait: %s ]"
,
time
,
mask4
,
mask1
,
mask2
,
mask3
);
break
;
case
4
:
// Wait partioned
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Waiting for non partitioned start, "
"nodes [ all: %s connected: %s missing: %s no-wait: %s ]"
,
mask1
,
mask2
,
mask4
,
mask3
);
break
;
case
5
:
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Waiting %u sec for non partitioned start, "
"nodes [ all: %s connected: %s missing: %s no-wait: %s ]"
,
time
,
mask1
,
mask2
,
mask4
,
mask3
);
break
;
case
0x8000
:
// Do initial
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Initial start with nodes %s [ missing: %s no-wait: %s ]"
,
mask2
,
mask4
,
mask3
);
break
;
case
0x8001
:
// Do start
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Start with all nodes %s"
,
mask2
);
break
;
case
0x8002
:
// Do partial
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Start with nodes %s [ missing: %s no-wait: %s ]"
,
mask2
,
mask4
,
mask3
);
break
;
case
0x8003
:
// Do partioned
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Start potentially partitioned with nodes %s "
" [ missing: %s no-wait: %s ]"
,
mask2
,
mask4
,
mask3
);
break
;
default:
BaseString
::
snprintf
(
m_text
,
m_text_len
,
"Unknown startreport: 0x%x [ %s %s %s %s ]"
,
theData
[
1
],
mask1
,
mask2
,
mask3
,
mask4
);
}
}
#if 0
BaseString::snprintf(m_text,
m_text_len,
...
...
@@ -755,6 +839,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = {
ROW
(
StartREDOLog
,
LogLevel
::
llStartUp
,
10
,
Logger
::
LL_INFO
),
ROW
(
StartLog
,
LogLevel
::
llStartUp
,
10
,
Logger
::
LL_INFO
),
ROW
(
UNDORecordsExecuted
,
LogLevel
::
llStartUp
,
15
,
Logger
::
LL_INFO
),
ROW
(
StartReport
,
LogLevel
::
llStartUp
,
4
,
Logger
::
LL_INFO
),
// NODERESTART
ROW
(
NR_CopyDict
,
LogLevel
::
llNodeRestart
,
8
,
Logger
::
LL_INFO
),
...
...
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
View file @
1b5ce338
...
...
@@ -134,6 +134,9 @@ Cmvmi::~Cmvmi()
{
}
#ifdef ERROR_INSERT
NodeBitmask
c_error_9000_nodes_mask
;
#endif
void
Cmvmi
::
execNDB_TAMPER
(
Signal
*
signal
)
{
...
...
@@ -419,21 +422,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const
Uint32
len
=
signal
->
getLength
();
if
(
len
==
2
){
globalTransporterRegistry
.
do_connect
(
tStartingNode
);
globalTransporterRegistry
.
setIOState
(
tStartingNode
,
HaltIO
);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal
->
theData
[
0
]
=
NDB_LE_CommunicationOpened
;
signal
->
theData
[
1
]
=
tStartingNode
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
2
,
JBB
);
//-----------------------------------------------------
#ifdef ERROR_INSERT
if
(
!
(
ERROR_INSERTED
(
9000
)
&&
c_error_9000_nodes_mask
.
get
(
tStartingNode
)))
#endif
{
globalTransporterRegistry
.
do_connect
(
tStartingNode
);
globalTransporterRegistry
.
setIOState
(
tStartingNode
,
HaltIO
);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal
->
theData
[
0
]
=
NDB_LE_CommunicationOpened
;
signal
->
theData
[
1
]
=
tStartingNode
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
2
,
JBB
);
//-----------------------------------------------------
}
}
else
{
for
(
unsigned
int
i
=
1
;
i
<
MAX_NODES
;
i
++
)
{
jam
();
if
(
i
!=
getOwnNodeId
()
&&
getNodeInfo
(
i
).
m_type
==
tData2
){
jam
();
#ifdef ERROR_INSERT
if
(
ERROR_INSERTED
(
9000
)
&&
c_error_9000_nodes_mask
.
get
(
i
))
continue
;
#endif
globalTransporterRegistry
.
do_connect
(
i
);
globalTransporterRegistry
.
setIOState
(
i
,
HaltIO
);
...
...
@@ -1039,7 +1054,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
DumpStateOrd
*
const
&
dumpState
=
(
DumpStateOrd
*
)
&
signal
->
theData
[
0
];
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiDumpConnections
){
Uint32
arg
=
dumpState
->
args
[
0
];
if
(
arg
==
DumpStateOrd
::
CmvmiDumpConnections
){
for
(
unsigned
int
i
=
1
;
i
<
MAX_NODES
;
i
++
){
const
char
*
nodeTypeStr
=
""
;
switch
(
getNodeInfo
(
i
).
m_type
){
...
...
@@ -1072,13 +1088,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiDumpLongSignalMemory
){
if
(
arg
==
DumpStateOrd
::
CmvmiDumpLongSignalMemory
){
infoEvent
(
"Cmvmi: g_sectionSegmentPool size: %d free: %d"
,
g_sectionSegmentPool
.
getSize
(),
g_sectionSegmentPool
.
getNoOfFree
());
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
)
if
(
arg
==
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
)
{
if
(
signal
->
getLength
()
==
1
)
{
...
...
@@ -1098,7 +1114,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
CmvmiTestLongSigWithDelay
)
{
if
(
arg
==
DumpStateOrd
::
CmvmiTestLongSigWithDelay
)
{
unsigned
i
;
Uint32
loopCount
=
dumpState
->
args
[
1
];
const
unsigned
len0
=
11
;
...
...
@@ -1126,6 +1142,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal
(
reference
(),
GSN_TESTSIG
,
signal
,
8
,
JBB
,
ptr
,
2
);
}
#ifdef ERROR_INSERT
if
(
arg
==
9000
)
{
SET_ERROR_INSERT_VALUE
(
9000
);
for
(
Uint32
i
=
1
;
i
<
signal
->
getLength
();
i
++
)
c_error_9000_nodes_mask
.
set
(
signal
->
theData
[
i
]);
}
if
(
arg
==
9001
)
{
CLEAR_ERROR_INSERT_VALUE
;
for
(
Uint32
i
=
0
;
i
<
MAX_NODES
;
i
++
)
{
if
(
c_error_9000_nodes_mask
.
get
(
i
))
{
signal
->
theData
[
0
]
=
0
;
signal
->
theData
[
1
]
=
i
;
EXECUTE_DIRECT
(
CMVMI
,
GSN_OPEN_COMREQ
,
signal
,
2
);
}
}
c_error_9000_nodes_mask
.
clear
();
}
#endif
#ifdef VM_TRACE
#if 0
{
...
...
ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
View file @
1b5ce338
...
...
@@ -71,6 +71,7 @@ void Dbdih::initData()
cwaitLcpSr
=
false
;
c_blockCommit
=
false
;
c_blockCommitNo
=
1
;
cntrlblockref
=
RNIL
;
}
//Dbdih::initData()
void
Dbdih
::
initRecords
()
...
...
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
View file @
1b5ce338
...
...
@@ -11659,7 +11659,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Uint32
tmngNode
;
Uint32
tmngNodeGroup
;
Uint32
tmngLimit
;
Uint32
i
;
Uint32
i
,
j
;
/**-----------------------------------------------------------------------
* ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
...
...
@@ -11705,6 +11705,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Sysfile
::
setNodeGroup
(
mngNodeptr
.
i
,
SYSFILE
->
nodeGroups
,
mngNodeptr
.
p
->
nodeGroup
);
}
//if
}
//for
for
(
i
=
0
;
i
<
cnoOfNodeGroups
;
i
++
)
{
jam
();
bool
alive
=
false
;
NodeGroupRecordPtr
NGPtr
;
NGPtr
.
i
=
i
;
ptrCheckGuard
(
NGPtr
,
MAX_NDB_NODES
,
nodeGroupRecord
);
for
(
j
=
0
;
j
<
NGPtr
.
p
->
nodeCount
;
j
++
)
{
jam
();
mngNodeptr
.
i
=
NGPtr
.
p
->
nodesInGroup
[
j
];
ptrCheckGuard
(
mngNodeptr
,
MAX_NDB_NODES
,
nodeRecord
);
if
(
checkNodeAlive
(
NGPtr
.
p
->
nodesInGroup
[
j
]))
{
alive
=
true
;
break
;
}
}
if
(
!
alive
)
{
char
buf
[
255
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"Illegal initial start, no alive node in nodegroup %u"
,
i
);
progError
(
__LINE__
,
NDBD_EXIT_SR_RESTARTCONFLICT
,
buf
);
}
}
}
//Dbdih::makeNodeGroups()
/**
...
...
@@ -12513,7 +12545,6 @@ void Dbdih::sendStartFragreq(Signal* signal,
void
Dbdih
::
setInitialActiveStatus
()
{
NodeRecordPtr
siaNodeptr
;
Uint32
tsiaNodeActiveStatus
;
Uint32
tsiaNoActiveNodes
;
tsiaNoActiveNodes
=
csystemnodes
-
cnoHotSpare
;
...
...
@@ -12521,39 +12552,34 @@ void Dbdih::setInitialActiveStatus()
SYSFILE
->
nodeStatus
[
i
]
=
0
;
for
(
siaNodeptr
.
i
=
1
;
siaNodeptr
.
i
<
MAX_NDB_NODES
;
siaNodeptr
.
i
++
)
{
ptrAss
(
siaNodeptr
,
nodeRecord
);
if
(
siaNodeptr
.
p
->
nodeStatus
==
NodeRecord
::
ALIVE
)
{
switch
(
siaNodeptr
.
p
->
nodeStatus
){
case
NodeRecord
:
:
ALIVE
:
case
NodeRecord
:
:
DEAD
:
if
(
tsiaNoActiveNodes
==
0
)
{
jam
();
siaNodeptr
.
p
->
activeStatus
=
Sysfile
::
NS_HotSpare
;
}
else
{
jam
();
tsiaNoActiveNodes
=
tsiaNoActiveNodes
-
1
;
siaNodeptr
.
p
->
activeStatus
=
Sysfile
::
NS_Active
;
}
//if
}
else
{
jam
();
siaNodeptr
.
p
->
activeStatus
=
Sysfile
::
NS_NotDefined
;
}
//if
switch
(
siaNodeptr
.
p
->
activeStatus
)
{
case
Sysfile
:
:
NS_Active
:
jam
();
tsiaNodeActiveStatus
=
Sysfile
::
NS_Active
;
break
;
case
Sysfile
:
:
NS_HotSpare
:
jam
();
tsiaNodeActiveStatus
=
Sysfile
::
NS_HotSpare
;
break
;
case
Sysfile
:
:
NS_NotDefined
:
jam
();
tsiaNodeActiveStatus
=
Sysfile
::
NS_NotDefined
;
if
(
siaNodeptr
.
p
->
nodeStatus
==
NodeRecord
::
ALIVE
)
{
jam
();
siaNodeptr
.
p
->
activeStatus
=
Sysfile
::
NS_Active
;
}
else
{
siaNodeptr
.
p
->
activeStatus
=
Sysfile
::
NS_NotActive_NotTakenOver
;
}
}
break
;
default:
ndbrequire
(
false
);
return
;
jam
(
);
siaNodeptr
.
p
->
activeStatus
=
Sysfile
::
NS_NotDefined
;
break
;
}
//switch
Sysfile
::
setNodeStatus
(
siaNodeptr
.
i
,
SYSFILE
->
nodeStatus
,
tsiaNodeActiveStatus
);
}
//if
Sysfile
::
setNodeStatus
(
siaNodeptr
.
i
,
SYSFILE
->
nodeStatus
,
siaNodeptr
.
p
->
activeStatus
);
}
//for
}
//Dbdih::setInitialActiveStatus()
...
...
@@ -14274,11 +14300,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
cnewgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
return
;
}
//if
if
(
requestType
==
WaitGCPReq
::
BlockStartGcp
)
{
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
cnewgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
cgcpOrderBlocked
=
1
;
return
;
}
if
(
requestType
==
WaitGCPReq
::
UnblockStartGcp
)
{
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
cnewgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
cgcpOrderBlocked
=
0
;
return
;
}
if
(
isMaster
())
{
/**
* Master
...
...
@@ -14290,6 +14341,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam
();
conf
->
senderData
=
senderData
;
conf
->
gcp
=
coldgcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
senderRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
return
;
...
...
@@ -14376,6 +14428,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal)
conf
->
senderData
=
ptr
.
p
->
clientData
;
conf
->
gcp
=
gcp
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
ptr
.
p
->
clientRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
...
...
@@ -14443,6 +14496,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
c_waitGCPMasterList
.
next
(
ptr
);
conf
->
senderData
=
clientData
;
conf
->
blockStatus
=
cgcpOrderBlocked
;
sendSignal
(
clientRef
,
GSN_WAIT_GCP_CONF
,
signal
,
WaitGCPConf
::
SignalLength
,
JBB
);
...
...
ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
View file @
1b5ce338
...
...
@@ -203,6 +203,7 @@ private:
void
execWAIT_GCP_CONF
(
Signal
*
signal
);
void
execSTOP_REQ
(
Signal
*
signal
);
void
execSTOP_CONF
(
Signal
*
signal
);
void
execRESUME_REQ
(
Signal
*
signal
);
void
execCHANGE_NODE_STATE_CONF
(
Signal
*
signal
);
...
...
@@ -338,6 +339,16 @@ public:
void
progError
(
int
line
,
int
cause
,
const
char
*
extra
)
{
cntr
.
progError
(
line
,
cause
,
extra
);
}
enum
StopNodesStep
{
SR_BLOCK_GCP_START_GCP
=
0
,
SR_WAIT_COMPLETE_GCP
=
1
,
SR_UNBLOCK_GCP_START_GCP
=
2
,
SR_QMGR_STOP_REQ
=
3
,
SR_WAIT_NODE_FAILURES
=
4
,
SR_CLUSTER_SHUTDOWN
=
12
}
m_state
;
SignalCounter
m_stop_req_counter
;
};
private
:
StopRecord
c_stopRec
;
...
...
ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
View file @
1b5ce338
...
...
@@ -87,6 +87,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf):
addRecSignal
(
GSN_STOP_ME_CONF
,
&
Ndbcntr
::
execSTOP_ME_CONF
);
addRecSignal
(
GSN_STOP_REQ
,
&
Ndbcntr
::
execSTOP_REQ
);
addRecSignal
(
GSN_STOP_CONF
,
&
Ndbcntr
::
execSTOP_CONF
);
addRecSignal
(
GSN_RESUME_REQ
,
&
Ndbcntr
::
execRESUME_REQ
);
addRecSignal
(
GSN_WAIT_GCP_REF
,
&
Ndbcntr
::
execWAIT_GCP_REF
);
...
...
ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
View file @
1b5ce338
...
...
@@ -42,6 +42,8 @@
#include <signaldata/FsRemoveReq.hpp>
#include <signaldata/ReadConfig.hpp>
#include <signaldata/FailRep.hpp>
#include <AttributeHeader.hpp>
#include <Configuration.hpp>
#include <DebuggerNames.hpp>
...
...
@@ -818,17 +820,9 @@ Ndbcntr::trySystemRestart(Signal* signal){
return
false
;
}
if
(
!
allNodes
&&
c_start
.
m_startPartialTimeout
>
now
){
jam
();
return
false
;
}
NodeState
::
StartType
srType
=
NodeState
::
ST_SYSTEM_RESTART
;
if
(
c_start
.
m_waiting
.
equal
(
c_start
.
m_withoutLog
)){
if
(
!
allNodes
){
jam
();
return
false
;
}
if
(
c_start
.
m_waiting
.
equal
(
c_start
.
m_withoutLog
))
{
jam
();
srType
=
NodeState
::
ST_INITIAL_START
;
c_start
.
m_starting
=
c_start
.
m_withoutLog
;
// Used for starting...
...
...
@@ -858,10 +852,6 @@ Ndbcntr::trySystemRestart(Signal* signal){
ndbrequire
(
false
);
// All nodes -> partitioning, which is not allowed
}
if
(
c_start
.
m_startPartitionedTimeout
>
now
){
jam
();
return
false
;
}
break
;
}
...
...
@@ -1474,13 +1464,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
sendSignal
(
SUMA_REF
,
GSN_NODE_FAILREP
,
signal
,
NodeFailRep
::
SignalLength
,
JBB
);
if
(
c_stopRec
.
stopReq
.
senderRef
)
{
jam
();
switch
(
c_stopRec
.
m_state
){
case
StopRecord
:
:
SR_WAIT_NODE_FAILURES
:
{
jam
();
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
tmp
.
bitANDC
(
allFailed
);
tmp
.
copyto
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
if
(
tmp
.
isclear
())
{
jam
();
if
(
c_stopRec
.
stopReq
.
senderRef
!=
RNIL
)
{
jam
();
StopConf
*
const
stopConf
=
(
StopConf
*
)
&
signal
->
theData
[
0
];
stopConf
->
senderData
=
c_stopRec
.
stopReq
.
senderData
;
stopConf
->
nodeState
=
(
Uint32
)
NodeState
::
SL_SINGLEUSER
;
sendSignal
(
c_stopRec
.
stopReq
.
senderRef
,
GSN_STOP_CONF
,
signal
,
StopConf
::
SignalLength
,
JBB
);
}
c_stopRec
.
stopReq
.
senderRef
=
0
;
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_UNBLOCK_GCP_START_GCP
;
req
->
requestType
=
WaitGCPReq
::
UnblockStartGcp
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBA
);
}
break
;
}
case
StopRecord
:
:
SR_QMGR_STOP_REQ
:
{
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
tmp
.
bitANDC
(
allFailed
);
if
(
tmp
.
isclear
())
{
Uint32
nodeId
=
allFailed
.
find
(
0
);
tmp
.
set
(
nodeId
);
StopConf
*
conf
=
(
StopConf
*
)
signal
->
getDataPtrSend
();
conf
->
senderData
=
c_stopRec
.
stopReq
.
senderData
;
conf
->
nodeId
=
nodeId
;
sendSignal
(
reference
(),
GSN_STOP_CONF
,
signal
,
StopConf
::
SignalLength
,
JBB
);
}
tmp
.
copyto
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
break
;
}
}
}
signal
->
theData
[
0
]
=
NDB_LE_NODE_FAILREP
;
signal
->
theData
[
2
]
=
0
;
Uint32
nodeId
=
0
;
while
(
!
allFailed
.
isclear
()){
nodeId
=
allFailed
.
find
(
nodeId
+
1
);
allFailed
.
clear
(
nodeId
);
signal
->
theData
[
0
]
=
NDB_LE_NODE_FAILREP
;
signal
->
theData
[
1
]
=
nodeId
;
signal
->
theData
[
2
]
=
0
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
3
,
JBB
);
}
//for
...
...
@@ -1924,13 +1975,15 @@ void
Ndbcntr
::
execDUMP_STATE_ORD
(
Signal
*
signal
)
{
DumpStateOrd
*
const
&
dumpState
=
(
DumpStateOrd
*
)
&
signal
->
theData
[
0
];
if
(
signal
->
theData
[
0
]
==
13
){
Uint32
arg
=
dumpState
->
args
[
0
];
if
(
arg
==
13
){
infoEvent
(
"Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d"
,
cstartPhase
,
cinternalStartphase
,
cndbBlocksCount
);
infoEvent
(
"Cntr: cmasterNodeId = %d"
,
cmasterNodeId
);
}
if
(
dumpState
->
args
[
0
]
==
DumpStateOrd
::
NdbcntrTestStopOnError
){
if
(
arg
==
DumpStateOrd
::
NdbcntrTestStopOnError
){
if
(
theConfiguration
.
stopOnError
()
==
true
)
((
Configuration
&
)
theConfiguration
).
stopOnError
(
false
);
...
...
@@ -1943,6 +1996,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal)
SystemError
::
SignalLength
,
JBA
);
}
if
(
arg
==
DumpStateOrd
::
NdbcntrStopNodes
)
{
NdbNodeBitmask
mask
;
for
(
Uint32
i
=
1
;
i
<
signal
->
getLength
();
i
++
)
mask
.
set
(
signal
->
theData
[
i
]);
StopReq
*
req
=
(
StopReq
*
)
signal
->
getDataPtrSend
();
req
->
senderRef
=
RNIL
;
req
->
senderData
=
123
;
req
->
requestInfo
=
0
;
req
->
singleuser
=
0
;
req
->
singleUserApi
=
0
;
mask
.
copyto
(
NdbNodeBitmask
::
Size
,
req
->
nodes
);
StopReq
::
setPerformRestart
(
req
->
requestInfo
,
1
);
StopReq
::
setNoStart
(
req
->
requestInfo
,
1
);
StopReq
::
setStopNodes
(
req
->
requestInfo
,
1
);
StopReq
::
setStopAbort
(
req
->
requestInfo
,
1
);
sendSignal
(
reference
(),
GSN_STOP_REQ
,
signal
,
StopReq
::
SignalLength
,
JBB
);
return
;
}
}
//Ndbcntr::execDUMP_STATE_ORD()
...
...
@@ -2003,9 +2078,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
Uint32
senderData
=
req
->
senderData
;
BlockReference
senderRef
=
req
->
senderRef
;
bool
abort
=
StopReq
::
getStopAbort
(
req
->
requestInfo
);
bool
stopnodes
=
StopReq
::
getStopNodes
(
req
->
requestInfo
);
if
(
getNodeState
().
startLevel
<
NodeState
::
SL_STARTED
||
abort
&&
!
singleuser
){
if
(
!
singleuser
&&
(
getNodeState
().
startLevel
<
NodeState
::
SL_STARTED
||
(
abort
&&
!
stopnodes
)))
{
/**
* Node is not started yet
*
...
...
@@ -2047,21 +2125,71 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
else
ref
->
errorCode
=
StopRef
::
NodeShutdownInProgress
;
ref
->
senderData
=
senderData
;
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
if
(
senderRef
!=
RNIL
)
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
return
;
}
if
(
stopnodes
&&
!
abort
)
{
jam
();
ref
->
errorCode
=
StopRef
::
UnsupportedNodeShutdown
;
ref
->
senderData
=
senderData
;
if
(
senderRef
!=
RNIL
)
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
return
;
}
if
(
stopnodes
&&
cmasterNodeId
!=
getOwnNodeId
())
{
jam
();
ref
->
errorCode
=
StopRef
::
MultiNodeShutdownNotMaster
;
ref
->
senderData
=
senderData
;
if
(
senderRef
!=
RNIL
)
sendSignal
(
senderRef
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
return
;
}
c_stopRec
.
stopReq
=
*
req
;
c_stopRec
.
stopInitiatedTime
=
NdbTick_CurrentMillisecond
();
if
(
!
singleuser
)
{
if
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
))
{
if
(
stopnodes
)
{
jam
();
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
return
;
}
char
buf
[
100
];
NdbNodeBitmask
mask
;
mask
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
infoEvent
(
"Initiating shutdown abort of %s"
,
mask
.
getText
(
buf
));
ndbout_c
(
"Initiating shutdown abort of %s"
,
mask
.
getText
(
buf
));
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_BLOCK_GCP_START_GCP
;
req
->
requestType
=
WaitGCPReq
::
BlockStartGcp
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
return
;
}
else
if
(
!
singleuser
)
{
if
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
))
{
jam
();
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
)){
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
))
{
((
Configuration
&
)
theConfiguration
).
stopOnError
(
false
);
}
}
if
(
!
c_stopRec
.
checkNodeFail
(
signal
)){
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
return
;
}
...
...
@@ -2131,7 +2259,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
*/
NodeBitmask
ndbMask
;
ndbMask
.
assign
(
cntr
.
c_startedNodes
);
ndbMask
.
clear
(
cntr
.
getOwnNodeId
());
if
(
StopReq
::
getStopNodes
(
stopReq
.
requestInfo
))
{
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
stopReq
.
nodes
);
ndbMask
.
bitANDC
(
tmp
);
}
else
{
ndbMask
.
clear
(
cntr
.
getOwnNodeId
());
}
CheckNodeGroups
*
sd
=
(
CheckNodeGroups
*
)
&
signal
->
theData
[
0
];
sd
->
blockRef
=
cntr
.
reference
();
...
...
@@ -2153,7 +2291,8 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
ref
->
errorCode
=
StopRef
::
NodeShutdownWouldCauseSystemCrash
;
const
BlockReference
bref
=
stopReq
.
senderRef
;
cntr
.
sendSignal
(
bref
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
if
(
bref
!=
RNIL
)
cntr
.
sendSignal
(
bref
,
GSN_STOP_REF
,
signal
,
StopRef
::
SignalLength
,
JBB
);
stopReq
.
senderRef
=
0
;
...
...
@@ -2203,23 +2342,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){
if
(
stopReq
.
getSystemStop
(
stopReq
.
requestInfo
)
||
stopReq
.
singleuser
){
jam
();
if
(
stopReq
.
singleuser
)
{
jam
();
AbortAllReq
*
req
=
(
AbortAllReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
12
;
cntr
.
sendSignal
(
DBTC_REF
,
GSN_ABORT_ALL_REQ
,
signal
,
AbortAllReq
::
SignalLength
,
JBB
);
}
{
jam
();
AbortAllReq
*
req
=
(
AbortAllReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
12
;
cntr
.
sendSignal
(
DBTC_REF
,
GSN_ABORT_ALL_REQ
,
signal
,
AbortAllReq
::
SignalLength
,
JBB
);
}
else
{
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
12
;
req
->
requestType
=
WaitGCPReq
::
CompleteForceStart
;
cntr
.
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
}
{
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
cntr
.
reference
();
req
->
senderData
=
StopRecord
::
SR_CLUSTER_SHUTDOWN
;
req
->
requestType
=
WaitGCPReq
::
CompleteForceStart
;
cntr
.
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
}
}
else
{
jam
();
StopPermReq
*
req
=
(
StopPermReq
*
)
&
signal
->
theData
[
0
];
...
...
@@ -2381,7 +2520,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
12
;
req
->
senderData
=
StopRecord
::
SR_CLUSTER_SHUTDOWN
;
req
->
requestType
=
WaitGCPReq
::
CompleteForceStart
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
...
...
@@ -2390,29 +2529,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
void
Ndbcntr
::
execWAIT_GCP_CONF
(
Signal
*
signal
){
jamEntry
();
ndbrequire
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
));
NodeState
newState
(
NodeState
::
SL_STOPPING_3
,
true
);
WaitGCPConf
*
conf
=
(
WaitGCPConf
*
)
signal
->
getDataPtr
();
/**
* Inform QMGR so that arbitrator won't kill us
*/
NodeStateRep
*
rep
=
(
NodeStateRep
*
)
&
signal
->
theData
[
0
];
rep
->
nodeState
=
newState
;
rep
->
nodeState
.
masterNodeId
=
cmasterNodeId
;
rep
->
nodeState
.
setNodeGroup
(
c_nodeGroup
);
EXECUTE_DIRECT
(
QMGR
,
GSN_NODE_STATE_REP
,
signal
,
NodeStateRep
::
SignalLength
);
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
)){
jam
();
StartOrd
*
startOrd
=
(
StartOrd
*
)
&
signal
->
theData
[
0
];
startOrd
->
restartInfo
=
c_stopRec
.
stopReq
.
requestInfo
;
sendSignalWithDelay
(
CMVMI_REF
,
GSN_START_ORD
,
signal
,
500
,
StartOrd
::
SignalLength
);
}
else
{
switch
(
conf
->
senderData
){
case
StopRecord
:
:
SR_BLOCK_GCP_START_GCP
:
{
jam
();
/**
*
*/
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
goto
unblock
;
}
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_WAIT_COMPLETE_GCP
;
req
->
requestType
=
WaitGCPReq
::
CompleteIfRunning
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
return
;
}
case
StopRecord
:
:
SR_UNBLOCK_GCP_START_GCP
:
{
jam
();
return
;
}
case
StopRecord
:
:
SR_WAIT_COMPLETE_GCP
:
{
jam
();
sendSignalWithDelay
(
CMVMI_REF
,
GSN_STOP_ORD
,
signal
,
500
,
1
);
if
(
!
c_stopRec
.
checkNodeFail
(
signal
))
{
jam
();
goto
unblock
;
}
NdbNodeBitmask
tmp
;
tmp
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
c_stopRec
.
m_stop_req_counter
=
tmp
;
NodeReceiverGroup
rg
(
QMGR
,
tmp
);
StopReq
*
stopReq
=
(
StopReq
*
)
&
signal
->
theData
[
0
];
*
stopReq
=
c_stopRec
.
stopReq
;
stopReq
->
senderRef
=
reference
();
sendSignal
(
rg
,
GSN_STOP_REQ
,
signal
,
StopReq
::
SignalLength
,
JBA
);
c_stopRec
.
m_state
=
StopRecord
::
SR_QMGR_STOP_REQ
;
return
;
}
case
StopRecord
:
:
SR_CLUSTER_SHUTDOWN
:
{
jam
();
break
;
}
}
{
ndbrequire
(
StopReq
::
getSystemStop
(
c_stopRec
.
stopReq
.
requestInfo
));
NodeState
newState
(
NodeState
::
SL_STOPPING_3
,
true
);
/**
* Inform QMGR so that arbitrator won't kill us
*/
NodeStateRep
*
rep
=
(
NodeStateRep
*
)
&
signal
->
theData
[
0
];
rep
->
nodeState
=
newState
;
rep
->
nodeState
.
masterNodeId
=
cmasterNodeId
;
rep
->
nodeState
.
setNodeGroup
(
c_nodeGroup
);
EXECUTE_DIRECT
(
QMGR
,
GSN_NODE_STATE_REP
,
signal
,
NodeStateRep
::
SignalLength
);
if
(
StopReq
::
getPerformRestart
(
c_stopRec
.
stopReq
.
requestInfo
)){
jam
();
StartOrd
*
startOrd
=
(
StartOrd
*
)
&
signal
->
theData
[
0
];
startOrd
->
restartInfo
=
c_stopRec
.
stopReq
.
requestInfo
;
sendSignalWithDelay
(
CMVMI_REF
,
GSN_START_ORD
,
signal
,
500
,
StartOrd
::
SignalLength
);
}
else
{
jam
();
sendSignalWithDelay
(
CMVMI_REF
,
GSN_STOP_ORD
,
signal
,
500
,
1
);
}
return
;
}
unblock:
WaitGCPReq
*
req
=
(
WaitGCPReq
*
)
&
signal
->
theData
[
0
];
req
->
senderRef
=
reference
();
req
->
senderData
=
StopRecord
::
SR_UNBLOCK_GCP_START_GCP
;
req
->
requestType
=
WaitGCPReq
::
UnblockStartGcp
;
sendSignal
(
DBDIH_REF
,
GSN_WAIT_GCP_REQ
,
signal
,
WaitGCPReq
::
SignalLength
,
JBB
);
}
void
Ndbcntr
::
execSTOP_CONF
(
Signal
*
signal
)
{
jamEntry
();
StopConf
*
conf
=
(
StopConf
*
)
signal
->
getDataPtr
();
ndbrequire
(
c_stopRec
.
m_state
==
StopRecord
::
SR_QMGR_STOP_REQ
);
c_stopRec
.
m_stop_req_counter
.
clearWaitingFor
(
conf
->
nodeId
);
if
(
c_stopRec
.
m_stop_req_counter
.
done
())
{
char
buf
[
100
];
NdbNodeBitmask
mask
;
mask
.
assign
(
NdbNodeBitmask
::
Size
,
c_stopRec
.
stopReq
.
nodes
);
infoEvent
(
"Stopping of %s"
,
mask
.
getText
(
buf
));
ndbout_c
(
"Stopping of %s"
,
mask
.
getText
(
buf
));
/**
* Kill any node...
*/
FailRep
*
const
failRep
=
(
FailRep
*
)
&
signal
->
theData
[
0
];
failRep
->
failCause
=
FailRep
::
ZMULTI_NODE_SHUTDOWN
;
NodeReceiverGroup
rg
(
QMGR
,
c_clusterNodes
);
Uint32
nodeId
=
0
;
while
((
nodeId
=
NdbNodeBitmask
::
find
(
c_stopRec
.
stopReq
.
nodes
,
nodeId
+
1
))
!=
NdbNodeBitmask
::
NotFound
)
{
failRep
->
failNodeId
=
nodeId
;
sendSignal
(
rg
,
GSN_FAIL_REP
,
signal
,
FailRep
::
SignalLength
,
JBA
);
}
c_stopRec
.
m_state
=
StopRecord
::
SR_WAIT_NODE_FAILURES
;
return
;
}
return
;
}
void
Ndbcntr
::
execSTTORRY
(
Signal
*
signal
){
...
...
ndb/src/kernel/blocks/qmgr/Qmgr.hpp
View file @
1b5ce338
...
...
@@ -29,6 +29,7 @@
#include <signaldata/CmRegSignalData.hpp>
#include <signaldata/ApiRegSignalData.hpp>
#include <signaldata/FailRep.hpp>
#include <signaldata/StopReq.hpp>
#include "timer.hpp"
...
...
@@ -49,6 +50,7 @@
#define ZAPI_HB_HANDLING 3
#define ZTIMER_HANDLING 4
#define ZARBIT_HANDLING 5
#define ZSTART_FAILURE_LIMIT 6
/* Error Codes ------------------------------*/
#define ZERRTOOMANY 1101
...
...
@@ -100,18 +102,42 @@ public:
};
struct
StartRecord
{
void
reset
(){
m_startKey
++
;
m_startNode
=
0
;}
void
reset
(){
m_startKey
++
;
m_startNode
=
0
;
m_gsn
=
RNIL
;
m_nodes
.
clearWaitingFor
();
}
Uint32
m_startKey
;
Uint32
m_startNode
;
Uint64
m_startTimeout
;
Uint32
m_gsn
;
SignalCounter
m_nodes
;
}
c_start
;
Uint32
m_latest_gci
;
Uint32
m_start_type
;
NdbNodeBitmask
m_skip_nodes
;
NdbNodeBitmask
m_starting_nodes
;
NdbNodeBitmask
m_starting_nodes_w_log
;
Uint16
m_president_candidate
;
Uint32
m_president_candidate_gci
;
Uint16
m_regReqReqSent
;
Uint16
m_regReqReqRecv
;
}
c_start
;
NdbNodeBitmask
c_definedNodes
;
// DB nodes in config
NdbNodeBitmask
c_clusterNodes
;
// DB nodes in cluster
NodeBitmask
c_connectedNodes
;
// All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask
c_readnodes_nodes
;
Uint32
c_maxDynamicId
;
// Records
...
...
@@ -204,6 +230,7 @@ private:
void
execPRES_TOCONF
(
Signal
*
signal
);
void
execDISCONNECT_REP
(
Signal
*
signal
);
void
execSYSTEM_ERROR
(
Signal
*
signal
);
void
execSTOP_REQ
(
Signal
*
signal
);
// Received signals
void
execDUMP_STATE_ORD
(
Signal
*
signal
);
...
...
@@ -218,7 +245,12 @@ private:
void
execREAD_NODESREQ
(
Signal
*
signal
);
void
execSET_VAR_REQ
(
Signal
*
signal
);
void
execREAD_NODESREF
(
Signal
*
signal
);
void
execREAD_NODESCONF
(
Signal
*
signal
);
void
execDIH_RESTARTREF
(
Signal
*
signal
);
void
execDIH_RESTARTCONF
(
Signal
*
signal
);
void
execAPI_VERSION_REQ
(
Signal
*
signal
);
void
execAPI_BROADCAST_REP
(
Signal
*
signal
);
...
...
@@ -234,6 +266,9 @@ private:
void
execARBIT_STOPREP
(
Signal
*
signal
);
// Statement blocks
void
check_readnodes_reply
(
Signal
*
signal
,
Uint32
nodeId
,
Uint32
gsn
);
Uint32
check_startup
(
Signal
*
signal
);
void
node_failed
(
Signal
*
signal
,
Uint16
aFailedNode
);
void
checkStartInterface
(
Signal
*
signal
);
void
failReport
(
Signal
*
signal
,
...
...
@@ -251,8 +286,9 @@ private:
// Generated statement blocks
void
startphase1
(
Signal
*
signal
);
void
electionWon
();
void
electionWon
(
Signal
*
signal
);
void
cmInfoconf010Lab
(
Signal
*
signal
);
void
apiHbHandlingLab
(
Signal
*
signal
);
void
timerHandlingLab
(
Signal
*
signal
);
void
hbReceivedLab
(
Signal
*
signal
);
...
...
@@ -354,12 +390,12 @@ private:
/* Status flags ----------------------------------*/
Uint32
c_restartPartialTimeout
;
Uint32
c_restartPartionedTimeout
;
Uint32
c_restartFailureTimeout
;
Uint64
c_start_election_time
;
Uint16
creadyDistCom
;
Uint16
c_regReqReqSent
;
Uint16
c_regReqReqRecv
;
Uint64
c_stopElectionTime
;
Uint16
cpresidentCandidate
;
Uint16
cdelayRegreq
;
Uint16
cpresidentAlive
;
Uint16
cnoFailedNodes
;
...
...
@@ -387,7 +423,9 @@ private:
Uint16
cfailedNodes
[
MAX_NDB_NODES
];
Uint16
cprepFailedNodes
[
MAX_NDB_NODES
];
Uint16
ccommitFailedNodes
[
MAX_NDB_NODES
];
StopReq
c_stopReq
;
void
check_multi_node_shutdown
(
Signal
*
signal
);
};
#endif
ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
View file @
1b5ce338
...
...
@@ -35,9 +35,8 @@ void Qmgr::initData()
Uint32
hbDBAPI
=
500
;
setHbApiDelay
(
hbDBAPI
);
c_connectedNodes
.
clear
();
c_connectedNodes
.
set
(
getOwnNodeId
());
c_stopReq
.
senderRef
=
0
;
}
//Qmgr::initData()
void
Qmgr
::
initRecords
()
...
...
@@ -52,6 +51,7 @@ Qmgr::Qmgr(const class Configuration & conf)
// Transit signals
addRecSignal
(
GSN_DUMP_STATE_ORD
,
&
Qmgr
::
execDUMP_STATE_ORD
);
addRecSignal
(
GSN_STOP_REQ
,
&
Qmgr
::
execSTOP_REQ
);
addRecSignal
(
GSN_DEBUG_SIG
,
&
Qmgr
::
execDEBUG_SIG
);
addRecSignal
(
GSN_CONTINUEB
,
&
Qmgr
::
execCONTINUEB
);
addRecSignal
(
GSN_CM_HEARTBEAT
,
&
Qmgr
::
execCM_HEARTBEAT
);
...
...
@@ -96,6 +96,12 @@ Qmgr::Qmgr(const class Configuration & conf)
addRecSignal
(
GSN_ARBIT_CHOOSEREF
,
&
Qmgr
::
execARBIT_CHOOSEREF
);
addRecSignal
(
GSN_ARBIT_STOPREP
,
&
Qmgr
::
execARBIT_STOPREP
);
addRecSignal
(
GSN_READ_NODESREF
,
&
Qmgr
::
execREAD_NODESREF
);
addRecSignal
(
GSN_READ_NODESCONF
,
&
Qmgr
::
execREAD_NODESCONF
);
addRecSignal
(
GSN_DIH_RESTARTREF
,
&
Qmgr
::
execDIH_RESTARTREF
);
addRecSignal
(
GSN_DIH_RESTARTCONF
,
&
Qmgr
::
execDIH_RESTARTCONF
);
initData
();
}
//Qmgr::Qmgr()
...
...
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
View file @
1b5ce338
...
...
@@ -56,6 +56,33 @@
#define DEBUG_START3(signal, msg)
#endif
/**
* c_start.m_gsn = GSN_CM_REGREQ
* Possible for all nodes
* c_start.m_nodes contains all nodes in config
*
* c_start.m_gsn = GSN_CM_NODEINFOREQ;
* Set when receiving CM_REGCONF
* State possible for starting node only (not in cluster)
*
* c_start.m_nodes contains all node in alive cluster that
* that has not replied to GSN_CM_NODEINFOREQ
* passed by president in GSN_CM_REGCONF
*
* c_start.m_gsn = GSN_CM_ADD
* Possible for president only
* Set when receiving and accepting CM_REGREQ (to include node)
*
* c_start.m_nodes contains all nodes in alive cluster + starting node
* that has not replied to GSN_CM_ADD
* by sending GSN_CM_ACKADD
*
* c_start.m_gsn = GSN_CM_NODEINFOCONF
* Possible for non presidents only
* c_start.m_nodes contains a node that has been accepted by president
* but has not connected to us yet
*/
// Signal entries and statement blocks
/* 4 P R O G R A M */
/*******************************/
...
...
@@ -119,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal)
runArbitThread
(
signal
);
return
;
break
;
case
ZSTART_FAILURE_LIMIT
:{
if
(
cpresident
!=
ZNIL
)
{
jam
();
return
;
}
Uint64
now
=
NdbTick_CurrentMillisecond
();
if
(
now
>
(
c_start_election_time
+
c_restartFailureTimeout
))
{
jam
();
BaseString
tmp
;
tmp
.
append
(
"Shutting down node as total restart time exceeds "
" StartFailureTimeout as set in config file "
);
if
(
c_restartFailureTimeout
==
~
0
)
tmp
.
append
(
" 0 (inifinite)"
);
else
tmp
.
appfmt
(
" %d"
,
c_restartFailureTimeout
);
progError
(
__LINE__
,
NDBD_EXIT_SYSTEM_ERROR
,
tmp
.
c_str
());
}
signal
->
theData
[
0
]
=
ZSTART_FAILURE_LIMIT
;
sendSignalWithDelay
(
reference
(),
GSN_CONTINUEB
,
signal
,
3000
,
1
);
return
;
}
default:
jam
();
// ZCOULD_NOT_OCCUR_ERROR;
...
...
@@ -246,14 +297,28 @@ void Qmgr::startphase1(Signal* signal)
nodePtr
.
p
->
phase
=
ZSTARTING
;
nodePtr
.
p
->
blockRef
=
reference
();
c_connectedNodes
.
set
(
nodePtr
.
i
);
signal
->
theData
[
0
]
=
reference
();
sendSignal
(
DBDIH_REF
,
GSN_DIH_RESTARTREQ
,
signal
,
1
,
JBB
);
return
;
}
signal
->
theData
[
0
]
=
0
;
// no answer
signal
->
theData
[
1
]
=
0
;
// no id
signal
->
theData
[
2
]
=
NodeInfo
::
DB
;
sendSignal
(
CMVMI_REF
,
GSN_OPEN_COMREQ
,
signal
,
3
,
JBB
);
void
Qmgr
::
execDIH_RESTARTREF
(
Signal
*
signal
)
{
jamEntry
(
);
c_start
.
m_latest_gci
=
0
;
execCM_INFOCONF
(
signal
);
}
void
Qmgr
::
execDIH_RESTARTCONF
(
Signal
*
signal
)
{
jamEntry
();
c_start
.
m_latest_gci
=
signal
->
theData
[
1
];
execCM_INFOCONF
(
signal
);
return
;
}
void
Qmgr
::
setHbDelay
(
UintR
aHbDelay
)
...
...
@@ -280,18 +345,24 @@ void Qmgr::execCONNECT_REP(Signal* signal)
{
jamEntry
();
const
Uint32
nodeId
=
signal
->
theData
[
0
];
if
(
ERROR_INSERTED
(
931
))
{
jam
();
ndbout_c
(
"Discarding CONNECT_REP(%d)"
,
nodeId
);
infoEvent
(
"Discarding CONNECT_REP(%d)"
,
nodeId
);
return
;
}
c_connectedNodes
.
set
(
nodeId
);
NodeRecPtr
nodePtr
;
nodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
nodePtr
,
MAX_NODES
,
nodeRec
);
switch
(
nodePtr
.
p
->
phase
){
case
ZSTARTING
:
case
ZRUNNING
:
ndbrequire
(
!
c_clusterNodes
.
get
(
nodeId
));
case
ZSTARTING
:
jam
();
if
(
!
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
)){
jam
();
return
;
}
break
;
case
ZPREPARE_FAIL
:
case
ZFAIL_CLOSING
:
...
...
@@ -303,59 +374,126 @@ void Qmgr::execCONNECT_REP(Signal* signal)
case
ZAPI_INACTIVE
:
return
;
}
if
(
getNodeInfo
(
nodeId
).
getType
()
!=
NodeInfo
::
DB
)
{
jam
();
return
;
}
switch
(
c_start
.
m_gsn
){
case
GSN_CM_REGREQ
:
jam
();
sendCmRegReq
(
signal
,
nodeId
);
/**
* We're waiting for CM_REGCONF c_start.m_nodes contains all configured
* nodes
*/
ndbrequire
(
nodePtr
.
p
->
phase
==
ZSTARTING
);
ndbrequire
(
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
));
return
;
case
GSN_CM_NODEINFOREQ
:
jam
();
sendCmNodeInfoReq
(
signal
,
nodeId
,
nodePtr
.
p
);
if
(
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
))
{
jam
();
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
ndbrequire
(
nodePtr
.
p
->
phase
==
ZSTARTING
);
sendCmNodeInfoReq
(
signal
,
nodeId
,
nodePtr
.
p
);
return
;
}
return
;
case
GSN_CM_
ADD
:{
case
GSN_CM_
NODEINFOCONF
:{
jam
();
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
c_start
.
m_nodes
.
clearWaitingFor
(
nodeId
);
c_start
.
m_gsn
=
RNIL
;
NodeRecPtr
addNodePtr
;
addNodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
cmAddPrepare
(
signal
,
addNodePtr
,
nodePtr
.
p
);
return
;
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
ndbrequire
(
nodePtr
.
p
->
phase
==
ZRUNNING
);
if
(
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
))
{
jam
();
c_start
.
m_nodes
.
clearWaitingFor
(
nodeId
);
c_start
.
m_gsn
=
RNIL
;
NodeRecPtr
addNodePtr
;
addNodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
cmAddPrepare
(
signal
,
addNodePtr
,
nodePtr
.
p
);
return
;
}
}
default:
return
;
(
void
)
1
;
}
ndbrequire
(
!
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
));
ndbrequire
(
!
c_readnodes_nodes
.
get
(
nodeId
));
c_readnodes_nodes
.
set
(
nodeId
);
signal
->
theData
[
0
]
=
reference
();
sendSignal
(
calcQmgrBlockRef
(
nodeId
),
GSN_READ_NODESREQ
,
signal
,
1
,
JBA
);
return
;
}
//Qmgr::execCONNECT_REP()
void
Qmgr
::
execREAD_NODESCONF
(
Signal
*
signal
)
{
check_readnodes_reply
(
signal
,
refToNode
(
signal
->
getSendersBlockRef
()),
GSN_READ_NODESCONF
);
}
void
Qmgr
::
execREAD_NODESREF
(
Signal
*
signal
)
{
check_readnodes_reply
(
signal
,
refToNode
(
signal
->
getSendersBlockRef
()),
GSN_READ_NODESREF
);
}
/*******************************/
/* CM_INFOCONF */
/*******************************/
void
Qmgr
::
execCM_INFOCONF
(
Signal
*
signal
)
{
/**
* Open communcation to all DB nodes
*/
signal
->
theData
[
0
]
=
0
;
// no answer
signal
->
theData
[
1
]
=
0
;
// no id
signal
->
theData
[
2
]
=
NodeInfo
::
DB
;
sendSignal
(
CMVMI_REF
,
GSN_OPEN_COMREQ
,
signal
,
3
,
JBB
);
cpresident
=
ZNIL
;
cpresidentCandidate
=
getOwnNodeId
();
cpresidentAlive
=
ZFALSE
;
c_stopElectionTime
=
NdbTick_CurrentMillisecond
();
c_stopElectionTime
+=
c_restartPartialTimeout
;
c_start_election_time
=
NdbTick_CurrentMillisecond
();
signal
->
theData
[
0
]
=
ZSTART_FAILURE_LIMIT
;
sendSignalWithDelay
(
reference
(),
GSN_CONTINUEB
,
signal
,
3000
,
1
);
cmInfoconf010Lab
(
signal
);
return
;
}
//Qmgr::execCM_INFOCONF()
Uint32
g_start_type
=
0
;
NdbNodeBitmask
g_nowait_nodes
;
// Set by clo
void
Qmgr
::
cmInfoconf010Lab
(
Signal
*
signal
)
{
c_start
.
m_startKey
=
0
;
c_start
.
m_startNode
=
getOwnNodeId
();
c_start
.
m_nodes
.
clearWaitingFor
();
c_start
.
m_gsn
=
GSN_CM_REGREQ
;
c_start
.
m_starting_nodes
.
clear
();
c_start
.
m_starting_nodes_w_log
.
clear
();
c_start
.
m_regReqReqSent
=
0
;
c_start
.
m_regReqReqRecv
=
0
;
c_start
.
m_skip_nodes
=
g_nowait_nodes
;
c_start
.
m_skip_nodes
.
bitAND
(
c_definedNodes
);
c_start
.
m_start_type
=
g_start_type
;
NodeRecPtr
nodePtr
;
c_regReqReqSent
=
c_regReqReqRecv
=
0
;
cnoOfNodes
=
0
;
for
(
nodePtr
.
i
=
1
;
nodePtr
.
i
<
MAX_NDB_NODES
;
nodePtr
.
i
++
)
{
jam
();
...
...
@@ -390,14 +528,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal)
void
Qmgr
::
sendCmRegReq
(
Signal
*
signal
,
Uint32
nodeId
){
c_regReqReqSent
++
;
CmRegReq
*
const
cmRegReq
=
(
CmRegReq
*
)
&
signal
->
theData
[
0
];
cmRegReq
->
blockRef
=
reference
();
cmRegReq
->
nodeId
=
getOwnNodeId
();
cmRegReq
->
version
=
NDB_VERSION
;
CmRegReq
*
req
=
(
CmRegReq
*
)
&
signal
->
theData
[
0
];
req
->
blockRef
=
reference
();
req
->
nodeId
=
getOwnNodeId
();
req
->
version
=
NDB_VERSION
;
req
->
latest_gci
=
c_start
.
m_latest_gci
;
req
->
start_type
=
c_start
.
m_start_type
;
c_start
.
m_skip_nodes
.
copyto
(
NdbNodeBitmask
::
Size
,
req
->
skip_nodes
);
const
Uint32
ref
=
calcQmgrBlockRef
(
nodeId
);
sendSignal
(
ref
,
GSN_CM_REGREQ
,
signal
,
CmRegReq
::
SignalLength
,
JBB
);
DEBUG_START
(
GSN_CM_REGREQ
,
nodeId
,
""
);
c_start
.
m_regReqReqSent
++
;
}
/*
...
...
@@ -437,6 +579,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
/*******************************/
/* CM_REGREQ */
/*******************************/
static
int
check_start_type
(
Uint32
starting
,
Uint32
own
)
{
if
(
starting
==
(
1
<<
NodeState
::
ST_INITIAL_START
)
&&
((
own
&
(
1
<<
NodeState
::
ST_INITIAL_START
))
==
0
))
{
return
1
;
}
return
0
;
}
void
Qmgr
::
execCM_REGREQ
(
Signal
*
signal
)
{
DEBUG_START3
(
signal
,
""
);
...
...
@@ -448,6 +602,17 @@ void Qmgr::execCM_REGREQ(Signal* signal)
const
BlockReference
Tblockref
=
cmRegReq
->
blockRef
;
const
Uint32
startingVersion
=
cmRegReq
->
version
;
addNodePtr
.
i
=
cmRegReq
->
nodeId
;
Uint32
gci
=
1
;
Uint32
start_type
=
~
0
;
NdbNodeBitmask
skip_nodes
;
if
(
signal
->
getLength
()
==
CmRegReq
::
SignalLength
)
{
jam
();
gci
=
cmRegReq
->
latest_gci
;
start_type
=
cmRegReq
->
start_type
;
skip_nodes
.
assign
(
NdbNodeBitmask
::
Size
,
cmRegReq
->
skip_nodes
);
}
if
(
creadyDistCom
==
ZFALSE
)
{
jam
();
...
...
@@ -461,11 +626,19 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return
;
}
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
if
(
cpresident
!=
getOwnNodeId
()){
if
(
check_start_type
(
start_type
,
c_start
.
m_start_type
))
{
jam
();
sendCmRegrefLab
(
signal
,
Tblockref
,
CmRegRef
::
ZINCOMPATIBLE_START_TYPE
);
return
;
}
if
(
cpresident
!=
getOwnNodeId
())
{
jam
();
if
(
cpresident
==
ZNIL
)
{
if
(
cpresident
==
ZNIL
)
{
/***
* We don't know the president.
* If the node to be added has lower node id
...
...
@@ -473,13 +646,19 @@ void Qmgr::execCM_REGREQ(Signal* signal)
* candidate
*/
jam
();
if
(
addNodePtr
.
i
<
cpresidentCandidate
)
{
if
(
gci
>
c_start
.
m_president_candidate_gci
||
(
gci
==
c_start
.
m_president_candidate_gci
&&
addNodePtr
.
i
<
c_start
.
m_president_candidate
))
{
jam
();
cpresidentCandidate
=
addNodePtr
.
i
;
}
//if
c_start
.
m_president_candidate
=
addNodePtr
.
i
;
c_start
.
m_president_candidate_gci
=
gci
;
ndbout_c
(
"assign candidate: %u %u"
,
addNodePtr
.
i
,
gci
);
}
sendCmRegrefLab
(
signal
,
Tblockref
,
CmRegRef
::
ZELECTION
);
return
;
}
}
/**
* We are not the president.
* We know the president.
...
...
@@ -489,7 +668,8 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return
;
}
//if
if
(
c_start
.
m_startNode
!=
0
){
if
(
c_start
.
m_startNode
!=
0
)
{
jam
();
/**
* President busy by adding another node
...
...
@@ -498,7 +678,8 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return
;
}
//if
if
(
ctoStatus
==
Q_ACTIVE
)
{
if
(
ctoStatus
==
Q_ACTIVE
)
{
jam
();
/**
* Active taking over as president
...
...
@@ -507,7 +688,8 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return
;
}
//if
if
(
getNodeInfo
(
addNodePtr
.
i
).
m_type
!=
NodeInfo
::
DB
)
{
if
(
getNodeInfo
(
addNodePtr
.
i
).
m_type
!=
NodeInfo
::
DB
)
{
jam
();
/**
* The new node is not in config file
...
...
@@ -516,13 +698,15 @@ void Qmgr::execCM_REGREQ(Signal* signal)
return
;
}
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
Phase
phase
=
addNodePtr
.
p
->
phase
;
if
(
phase
!=
ZINIT
){
if
(
phase
!=
ZINIT
)
{
jam
();
DEBUG
(
"phase = "
<<
phase
);
sendCmRegrefLab
(
signal
,
Tblockref
,
CmRegRef
::
ZNOT_DEAD
);
return
;
}
//if
}
jam
();
/**
...
...
@@ -594,7 +778,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
ref
->
blockRef
=
reference
();
ref
->
nodeId
=
getOwnNodeId
();
ref
->
errorCode
=
Terror
;
ref
->
presidentCandidate
=
(
cpresident
==
ZNIL
?
cpresidentCandidate
:
cpresident
);
ref
->
presidentCandidate
=
(
cpresident
==
ZNIL
?
c_start
.
m_president_candidate
:
cpresident
);
ref
->
candidate_latest_gci
=
c_start
.
m_president_candidate_gci
;
ref
->
latest_gci
=
c_start
.
m_latest_gci
;
ref
->
start_type
=
c_start
.
m_start_type
;
c_start
.
m_skip_nodes
.
copyto
(
NdbNodeBitmask
::
Size
,
ref
->
skip_nodes
);
sendSignal
(
TBRef
,
GSN_CM_REGREF
,
signal
,
CmRegRef
::
SignalLength
,
JBB
);
DEBUG_START
(
GSN_CM_REGREF
,
refToNode
(
TBRef
),
""
);
...
...
@@ -622,22 +811,33 @@ void Qmgr::execCM_REGCONF(Signal* signal)
jamEntry
();
const
CmRegConf
*
const
cmRegConf
=
(
CmRegConf
*
)
&
signal
->
theData
[
0
];
Uint32
presidentNodeId
=
cmRegConf
->
presidentNodeId
;
if
(
!
ndbCompatible_ndb_ndb
(
NDB_VERSION
,
cmRegConf
->
presidentVersion
))
{
jam
();
char
buf
[
128
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"incompatible version own=0x%x other=0x%x, shutting down"
,
NDB_VERSION
,
cmRegConf
->
presidentVersion
);
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"incompatible version own=0x%x other=0x%x, "
" shutting down"
,
NDB_VERSION
,
cmRegConf
->
presidentVersion
);
systemErrorLab
(
signal
,
__LINE__
,
buf
);
return
;
}
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
ndbrequire
(
c_start
.
m_gsn
==
GSN_CM_REGREQ
);
ndbrequire
(
myNodePtr
.
p
->
phase
=
ZSTARTING
);
cpdistref
=
cmRegConf
->
presidentBlockRef
;
cpresident
=
cmRegConf
->
presidentNodeId
;
UintR
TdynamicId
=
cmRegConf
->
dynamicId
;
c_maxDynamicId
=
TdynamicId
;
c_clusterNodes
.
assign
(
NdbNodeBitmask
::
Size
,
cmRegConf
->
allNdbNodes
);
myNodePtr
.
p
->
ndynamicId
=
TdynamicId
;
/*--------------------------------------------------------------*/
// Send this as an EVENT REPORT to inform about hearing about
// other NDB node proclaiming to be president.
...
...
@@ -648,10 +848,6 @@ void Qmgr::execCM_REGCONF(Signal* signal)
signal
->
theData
[
3
]
=
TdynamicId
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
4
,
JBB
);
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
myNodePtr
.
p
->
ndynamicId
=
TdynamicId
;
for
(
nodePtr
.
i
=
1
;
nodePtr
.
i
<
MAX_NDB_NODES
;
nodePtr
.
i
++
)
{
jam
();
if
(
c_clusterNodes
.
get
(
nodePtr
.
i
)){
...
...
@@ -674,6 +870,84 @@ void Qmgr::execCM_REGCONF(Signal* signal)
return
;
}
//Qmgr::execCM_REGCONF()
void
Qmgr
::
check_readnodes_reply
(
Signal
*
signal
,
Uint32
nodeId
,
Uint32
gsn
)
{
NodeRecPtr
myNodePtr
;
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
NodeRecPtr
nodePtr
;
nodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
nodePtr
,
MAX_NDB_NODES
,
nodeRec
);
ndbrequire
(
c_readnodes_nodes
.
get
(
nodeId
));
ReadNodesConf
*
conf
=
(
ReadNodesConf
*
)
signal
->
getDataPtr
();
if
(
gsn
==
GSN_READ_NODESREF
)
{
jam
();
retry:
signal
->
theData
[
0
]
=
reference
();
sendSignal
(
calcQmgrBlockRef
(
nodeId
),
GSN_READ_NODESREQ
,
signal
,
1
,
JBA
);
return
;
}
if
(
conf
->
masterNodeId
==
ZNIL
)
{
jam
();
goto
retry
;
}
Uint32
president
=
conf
->
masterNodeId
;
if
(
president
==
cpresident
)
{
jam
();
c_readnodes_nodes
.
clear
(
nodeId
);
return
;
}
char
buf
[
255
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"Partitioned cluster! check StartPartialTimeout, "
" node %d thinks %d is president, "
" I think president is: %d"
,
nodeId
,
president
,
cpresident
);
ndbout_c
(
buf
);
CRASH_INSERTION
(
933
);
if
(
getNodeState
().
startLevel
==
NodeState
::
SL_STARTED
)
{
jam
();
NdbNodeBitmask
part
;
part
.
assign
(
NdbNodeBitmask
::
Size
,
conf
->
clusterNodes
);
FailRep
*
rep
=
(
FailRep
*
)
signal
->
getDataPtrSend
();
rep
->
failCause
=
FailRep
::
ZPARTITIONED_CLUSTER
;
rep
->
president
=
cpresident
;
c_clusterNodes
.
copyto
(
NdbNodeBitmask
::
Size
,
rep
->
partition
);
Uint32
ref
=
calcQmgrBlockRef
(
nodeId
);
Uint32
i
=
0
;
while
((
i
=
part
.
find
(
i
+
1
))
!=
NdbNodeBitmask
::
NotFound
)
{
if
(
i
==
nodeId
)
continue
;
rep
->
failNodeId
=
i
;
sendSignal
(
ref
,
GSN_FAIL_REP
,
signal
,
FailRep
::
SignalLength
,
JBA
);
}
rep
->
failNodeId
=
nodeId
;
sendSignal
(
ref
,
GSN_FAIL_REP
,
signal
,
FailRep
::
SignalLength
,
JBB
);
return
;
}
CRASH_INSERTION
(
932
);
progError
(
__LINE__
,
NDBD_EXIT_ARBIT_SHUTDOWN
,
buf
);
ndbrequire
(
false
);
}
void
Qmgr
::
sendCmNodeInfoReq
(
Signal
*
signal
,
Uint32
nodeId
,
const
NodeRec
*
self
){
CmNodeInfoReq
*
const
req
=
(
CmNodeInfoReq
*
)
signal
->
getDataPtrSend
();
...
...
@@ -703,26 +977,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
/*******************************/
/* CM_REGREF */
/*******************************/
static
const
char
*
get_start_type_string
(
Uint32
st
)
{
static
char
buf
[
256
];
if
(
st
==
0
)
{
return
"<ANY>"
;
}
else
{
buf
[
0
]
=
0
;
for
(
Uint32
i
=
0
;
i
<
NodeState
::
ST_ILLEGAL_TYPE
;
i
++
)
{
if
(
st
&
(
1
<<
i
))
{
if
(
buf
[
0
])
strcat
(
buf
,
"/"
);
switch
(
i
){
case
NodeState
:
:
ST_INITIAL_START
:
strcat
(
buf
,
"inital start"
);
break
;
case
NodeState
:
:
ST_SYSTEM_RESTART
:
strcat
(
buf
,
"system restart"
);
break
;
case
NodeState
:
:
ST_NODE_RESTART
:
strcat
(
buf
,
"node restart"
);
break
;
case
NodeState
:
:
ST_INITIAL_NODE_RESTART
:
strcat
(
buf
,
"initial node restart"
);
break
;
}
}
}
return
buf
;
}
}
void
Qmgr
::
execCM_REGREF
(
Signal
*
signal
)
{
jamEntry
();
c_regReqReqRecv
++
;
// Ignore block reference in data[0]
UintR
TaddNodeno
=
signal
->
theData
[
1
];
UintR
TrefuseReason
=
signal
->
theData
[
2
];
Uint32
candidate
=
signal
->
theData
[
3
];
CmRegRef
*
ref
=
(
CmRegRef
*
)
signal
->
getDataPtr
();
UintR
TaddNodeno
=
ref
->
nodeId
;
UintR
TrefuseReason
=
ref
->
errorCode
;
Uint32
candidate
=
ref
->
presidentCandidate
;
Uint32
node_gci
=
1
;
Uint32
candidate_gci
=
1
;
Uint32
start_type
=
~
0
;
NdbNodeBitmask
skip_nodes
;
DEBUG_START3
(
signal
,
TrefuseReason
);
if
(
candidate
!=
cpresidentCandidate
){
if
(
signal
->
getLength
()
==
CmRegRef
::
SignalLength
)
{
jam
();
c_regReqReqRecv
=
~
0
;
node_gci
=
ref
->
latest_gci
;
candidate_gci
=
ref
->
candidate_latest_gci
;
start_type
=
ref
->
start_type
;
skip_nodes
.
assign
(
NdbNodeBitmask
::
Size
,
ref
->
skip_nodes
);
}
c_start
.
m_regReqReqRecv
++
;
// Ignore block reference in data[0]
if
(
candidate
!=
c_start
.
m_president_candidate
)
{
jam
();
c_start
.
m_regReqReqRecv
=
~
0
;
}
c_start
.
m_starting_nodes
.
set
(
TaddNodeno
);
if
(
node_gci
)
{
jam
();
c_start
.
m_starting_nodes_w_log
.
set
(
TaddNodeno
);
}
skip_nodes
.
bitAND
(
c_definedNodes
);
c_start
.
m_skip_nodes
.
bitOR
(
skip_nodes
);
char
buf
[
100
];
switch
(
TrefuseReason
)
{
case
CmRegRef
:
:
ZINCOMPATIBLE_VERSION
:
jam
();
systemErrorLab
(
signal
,
__LINE__
,
"incompatible version, connection refused by running ndb node"
);
systemErrorLab
(
signal
,
__LINE__
,
"incompatible version, "
"connection refused by running ndb node"
);
case
CmRegRef
:
:
ZINCOMPATIBLE_START_TYPE
:
jam
();
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"incompatible start type detected: node %d"
" reports %s(%d) my start type: %s(%d)"
,
TaddNodeno
,
get_start_type_string
(
start_type
),
start_type
,
get_start_type_string
(
c_start
.
m_start_type
),
c_start
.
m_start_type
);
progError
(
__LINE__
,
NDBD_EXIT_SR_RESTARTCONFLICT
,
buf
);
break
;
case
CmRegRef
:
:
ZBUSY
:
case
CmRegRef
:
:
ZBUSY_TO_PRES
:
...
...
@@ -741,14 +1094,19 @@ void Qmgr::execCM_REGREF(Signal* signal)
break
;
case
CmRegRef
:
:
ZELECTION
:
jam
();
if
(
cpresidentCandidate
>
TaddNodeno
)
{
if
(
candidate_gci
>
c_start
.
m_president_candidate_gci
||
(
candidate_gci
==
c_start
.
m_president_candidate_gci
&&
candidate
<
c_start
.
m_president_candidate
))
{
jam
();
//----------------------------------------
/* We may already have a candidate */
/* choose the lowest nodeno */
//----------------------------------------
signal
->
theData
[
3
]
=
2
;
cpresidentCandidate
=
TaddNodeno
;
c_start
.
m_president_candidate
=
candidate
;
c_start
.
m_president_candidate_gci
=
candidate_gci
;
ndbout_c
(
"assign candidate: %u %u"
,
candidate
,
candidate_gci
);
}
else
{
signal
->
theData
[
3
]
=
4
;
}
//if
...
...
@@ -776,32 +1134,34 @@ void Qmgr::execCM_REGREF(Signal* signal)
//-----------------------------------------
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
4
,
JBB
);
if
(
cpresidentAlive
==
ZTRUE
){
if
(
cpresidentAlive
==
ZTRUE
)
{
jam
();
DEBUG
(
""
);
DEBUG
(
"
cpresidentAlive
"
);
return
;
}
if
(
c_regReqReqSent
!=
c_regReqReqRecv
){
if
(
c_start
.
m_regReqReqSent
!=
c_start
.
m_regReqReqRecv
)
{
jam
();
DEBUG
(
c_regReqReqSent
<<
" != "
<<
c
_regReqReqRecv
);
DEBUG
(
c_start
.
m_regReqReqSent
<<
" != "
<<
c_start
.
m
_regReqReqRecv
);
return
;
}
if
(
cpresidentCandidate
!=
getOwnNodeId
()){
if
(
c_start
.
m_president_candidate
!=
getOwnNodeId
())
{
jam
();
DEBUG
(
""
);
DEBUG
(
"
i'm not the candidate
"
);
return
;
}
/**
* All con
figur
ed nodes has agreed
* All con
nect
ed nodes has agreed
*/
Uint64
now
=
NdbTick_CurrentMillisecond
();
if
((
c_regReqReqRecv
==
cnoOfNodes
)
||
now
>
c_stopElectionTime
)
{
if
(
check_startup
(
signal
))
{
jam
();
electionWon
();
sendSttorryLab
(
signal
);
electionWon
(
signal
);
/**
* Start timer handling
...
...
@@ -813,8 +1173,192 @@ void Qmgr::execCM_REGREF(Signal* signal)
return
;
}
//Qmgr::execCM_REGREF()
Uint32
Qmgr
::
check_startup
(
Signal
*
signal
)
{
Uint64
now
=
NdbTick_CurrentMillisecond
();
Uint64
partial_timeout
=
c_start_election_time
+
c_restartPartialTimeout
;
Uint64
partitioned_timeout
=
partial_timeout
+
c_restartPartionedTimeout
;
/**
* First see if we should wait more...
*/
NdbNodeBitmask
tmp
;
tmp
.
bitOR
(
c_start
.
m_skip_nodes
);
tmp
.
bitOR
(
c_start
.
m_starting_nodes
);
NdbNodeBitmask
wait
;
wait
.
assign
(
c_definedNodes
);
wait
.
bitANDC
(
tmp
);
Uint32
retVal
=
0
;
NdbNodeBitmask
report_mask
;
if
((
c_start
.
m_latest_gci
==
0
)
||
(
c_start
.
m_start_type
==
(
1
<<
NodeState
::
ST_INITIAL_START
)))
{
if
(
!
tmp
.
equal
(
c_definedNodes
))
{
jam
();
signal
->
theData
[
1
]
=
1
;
signal
->
theData
[
2
]
=
~
0
;
report_mask
.
assign
(
wait
);
retVal
=
0
;
goto
start_report
;
}
else
{
jam
();
signal
->
theData
[
1
]
=
0x8000
;
report_mask
.
assign
(
c_definedNodes
);
report_mask
.
bitANDC
(
c_start
.
m_starting_nodes
);
retVal
=
1
;
goto
start_report
;
}
}
const
bool
all
=
c_start
.
m_starting_nodes
.
equal
(
c_definedNodes
);
CheckNodeGroups
*
sd
=
(
CheckNodeGroups
*
)
&
signal
->
theData
[
0
];
{
/**
* Check for missing node group directly
*/
char
buf
[
100
];
NdbNodeBitmask
check
;
check
.
assign
(
c_definedNodes
);
check
.
bitANDC
(
c_start
.
m_starting_nodes
);
// Not connected nodes
check
.
bitOR
(
c_start
.
m_starting_nodes_w_log
);
sd
->
blockRef
=
reference
();
sd
->
requestType
=
CheckNodeGroups
::
Direct
|
CheckNodeGroups
::
ArbitCheck
;
sd
->
mask
=
check
;
EXECUTE_DIRECT
(
DBDIH
,
GSN_CHECKNODEGROUPSREQ
,
signal
,
CheckNodeGroups
::
SignalLength
);
if
(
sd
->
output
==
CheckNodeGroups
::
Lose
)
{
jam
();
goto
missing_nodegroup
;
}
}
sd
->
blockRef
=
reference
();
sd
->
requestType
=
CheckNodeGroups
::
Direct
|
CheckNodeGroups
::
ArbitCheck
;
sd
->
mask
=
c_start
.
m_starting_nodes
;
EXECUTE_DIRECT
(
DBDIH
,
GSN_CHECKNODEGROUPSREQ
,
signal
,
CheckNodeGroups
::
SignalLength
);
const
Uint32
result
=
sd
->
output
;
sd
->
blockRef
=
reference
();
sd
->
requestType
=
CheckNodeGroups
::
Direct
|
CheckNodeGroups
::
ArbitCheck
;
sd
->
mask
=
c_start
.
m_starting_nodes_w_log
;
EXECUTE_DIRECT
(
DBDIH
,
GSN_CHECKNODEGROUPSREQ
,
signal
,
CheckNodeGroups
::
SignalLength
);
const
Uint32
result_w_log
=
sd
->
output
;
if
(
tmp
.
equal
(
c_definedNodes
))
{
/**
* All nodes (wrt no-wait nodes) has connected...
* this means that we will now start or die
*/
jam
();
switch
(
result_w_log
){
case
CheckNodeGroups
:
:
Lose
:
{
jam
();
goto
missing_nodegroup
;
}
case
CheckNodeGroups
:
:
Win
:
signal
->
theData
[
1
]
=
all
?
0x8001
:
0x8002
;
report_mask
.
assign
(
c_definedNodes
);
report_mask
.
bitANDC
(
c_start
.
m_starting_nodes
);
retVal
=
1
;
goto
start_report
;
case
CheckNodeGroups
:
:
Partitioning
:
ndbrequire
(
result
!=
CheckNodeGroups
::
Lose
);
signal
->
theData
[
1
]
=
all
?
0x8001
:
(
result
==
CheckNodeGroups
::
Win
?
0x8002
:
0x8003
);
report_mask
.
assign
(
c_definedNodes
);
report_mask
.
bitANDC
(
c_start
.
m_starting_nodes
);
retVal
=
1
;
goto
start_report
;
}
}
if
(
now
<
partial_timeout
)
{
jam
();
signal
->
theData
[
1
]
=
c_restartPartialTimeout
==
~
0
?
2
:
3
;
signal
->
theData
[
2
]
=
Uint32
((
partial_timeout
-
now
+
500
)
/
1000
);
report_mask
.
assign
(
wait
);
retVal
=
0
;
goto
start_report
;
}
/**
* Start partial has passed...check for partitioning...
*/
switch
(
result_w_log
){
case
CheckNodeGroups
:
:
Lose
:
jam
();
goto
missing_nodegroup
;
case
CheckNodeGroups
:
:
Partitioning
:
if
(
now
<
partitioned_timeout
&&
result
!=
CheckNodeGroups
::
Win
)
{
signal
->
theData
[
1
]
=
c_restartPartionedTimeout
==
~
0
?
4
:
5
;
signal
->
theData
[
2
]
=
Uint32
((
partitioned_timeout
-
now
+
500
)
/
1000
);
report_mask
.
assign
(
c_definedNodes
);
report_mask
.
bitANDC
(
c_start
.
m_starting_nodes
);
retVal
=
0
;
goto
start_report
;
}
// Fall through...
case
CheckNodeGroups
:
:
Win
:
signal
->
theData
[
1
]
=
all
?
0x8001
:
(
result
==
CheckNodeGroups
::
Win
?
0x8002
:
0x8003
);
report_mask
.
assign
(
c_definedNodes
);
report_mask
.
bitANDC
(
c_start
.
m_starting_nodes
);
retVal
=
1
;
goto
start_report
;
}
ndbrequire
(
false
);
start_report:
jam
();
{
Uint32
sz
=
NdbNodeBitmask
::
Size
;
signal
->
theData
[
0
]
=
NDB_LE_StartReport
;
signal
->
theData
[
3
]
=
sz
;
Uint32
*
ptr
=
signal
->
theData
+
4
;
c_definedNodes
.
copyto
(
sz
,
ptr
);
ptr
+=
sz
;
c_start
.
m_starting_nodes
.
copyto
(
sz
,
ptr
);
ptr
+=
sz
;
c_start
.
m_skip_nodes
.
copyto
(
sz
,
ptr
);
ptr
+=
sz
;
report_mask
.
copyto
(
sz
,
ptr
);
ptr
+=
sz
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
4
+
4
*
NdbNodeBitmask
::
Size
,
JBB
);
}
return
retVal
;
missing_nodegroup:
jam
();
char
buf
[
100
],
mask1
[
100
],
mask2
[
100
];
c_start
.
m_starting_nodes
.
getText
(
mask1
);
tmp
.
assign
(
c_start
.
m_starting_nodes
);
tmp
.
bitANDC
(
c_start
.
m_starting_nodes_w_log
);
tmp
.
getText
(
mask2
);
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"Unable to start missing node group! "
" starting: %s (missing fs for: %s)"
,
mask1
,
mask2
);
progError
(
__LINE__
,
NDBD_EXIT_SR_RESTARTCONFLICT
,
buf
);
}
void
Qmgr
::
electionWon
(){
Qmgr
::
electionWon
(
Signal
*
signal
){
NodeRecPtr
myNodePtr
;
cpresident
=
getOwnNodeId
();
/* This node becomes president. */
myNodePtr
.
i
=
getOwnNodeId
();
...
...
@@ -831,8 +1375,21 @@ Qmgr::electionWon(){
c_clusterNodes
.
set
(
getOwnNodeId
());
cpresidentAlive
=
ZTRUE
;
c_st
opElectionT
ime
=
~
0
;
c_st
art_election_t
ime
=
~
0
;
c_start
.
reset
();
signal
->
theData
[
0
]
=
NDB_LE_CM_REGCONF
;
signal
->
theData
[
1
]
=
getOwnNodeId
();
signal
->
theData
[
2
]
=
cpresident
;
signal
->
theData
[
3
]
=
1
;
sendSignal
(
CMVMI_REF
,
GSN_EVENT_REP
,
signal
,
4
,
JBB
);
c_start
.
m_starting_nodes
.
clear
(
getOwnNodeId
());
if
(
c_start
.
m_starting_nodes
.
isclear
())
{
jam
();
sendSttorryLab
(
signal
);
}
}
/*
...
...
@@ -846,7 +1403,15 @@ Qmgr::electionWon(){
/*--------------------------------------------------------------*/
void
Qmgr
::
regreqTimeLimitLab
(
Signal
*
signal
)
{
if
(
cpresident
==
ZNIL
){
if
(
cpresident
==
ZNIL
)
{
if
(
c_start
.
m_president_candidate
==
ZNIL
)
{
jam
();
c_start
.
m_president_candidate
=
getOwnNodeId
();
ndbout_c
(
"Assigning candidate to self: %d"
,
getOwnNodeId
());
}
cmInfoconf010Lab
(
signal
);
}
}
//Qmgr::regreqTimelimitLab()
...
...
@@ -967,7 +1532,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
ndbrequire
(
signal
->
header
.
theVerId_signalNumber
==
GSN_CM_ADD
);
c_start
.
m_nodes
.
clearWaitingFor
();
c_start
.
m_nodes
.
setWaitingFor
(
nodePtr
.
i
);
c_start
.
m_gsn
=
GSN_CM_
ADD
;
c_start
.
m_gsn
=
GSN_CM_
NODEINFOCONF
;
#else
warningEvent
(
"Enabling communication to CM_ADD node %u state=%d"
,
nodePtr
.
i
,
...
...
@@ -1256,6 +1821,17 @@ void Qmgr::execCM_ACKADD(Signal* signal)
*/
handleArbitNdbAdd
(
signal
,
addNodePtr
.
i
);
c_start
.
reset
();
if
(
c_start
.
m_starting_nodes
.
get
(
addNodePtr
.
i
))
{
jam
();
c_start
.
m_starting_nodes
.
clear
(
addNodePtr
.
i
);
if
(
c_start
.
m_starting_nodes
.
isclear
())
{
jam
();
sendSttorryLab
(
signal
);
}
}
return
;
}
//switch
ndbrequire
(
false
);
...
...
@@ -1409,7 +1985,8 @@ void Qmgr::initData(Signal* signal)
cnoPrepFailedNodes
=
0
;
creadyDistCom
=
ZFALSE
;
cpresident
=
ZNIL
;
cpresidentCandidate
=
ZNIL
;
c_start
.
m_president_candidate
=
ZNIL
;
c_start
.
m_president_candidate_gci
=
0
;
cpdistref
=
0
;
cneighbourh
=
ZNIL
;
cneighbourl
=
ZNIL
;
...
...
@@ -1437,15 +2014,33 @@ void Qmgr::initData(Signal* signal)
Uint32
hbDBAPI
=
1500
;
Uint32
arbitTimeout
=
1000
;
c_restartPartialTimeout
=
30000
;
c_restartPartionedTimeout
=
60000
;
c_restartFailureTimeout
=
~
0
;
ndb_mgm_get_int_parameter
(
p
,
CFG_DB_HEARTBEAT_INTERVAL
,
&
hbDBDB
);
ndb_mgm_get_int_parameter
(
p
,
CFG_DB_API_HEARTBEAT_INTERVAL
,
&
hbDBAPI
);
ndb_mgm_get_int_parameter
(
p
,
CFG_DB_ARBIT_TIMEOUT
,
&
arbitTimeout
);
ndb_mgm_get_int_parameter
(
p
,
CFG_DB_START_PARTIAL_TIMEOUT
,
&
c_restartPartialTimeout
);
if
(
c_restartPartialTimeout
==
0
){
ndb_mgm_get_int_parameter
(
p
,
CFG_DB_START_PARTITION_TIMEOUT
,
&
c_restartPartionedTimeout
);
ndb_mgm_get_int_parameter
(
p
,
CFG_DB_START_FAILURE_TIMEOUT
,
&
c_restartFailureTimeout
);
if
(
c_restartPartialTimeout
==
0
)
{
c_restartPartialTimeout
=
~
0
;
}
if
(
c_restartPartionedTimeout
==
0
)
{
c_restartPartionedTimeout
=
~
0
;
}
if
(
c_restartFailureTimeout
==
0
)
{
c_restartFailureTimeout
=
~
0
;
}
setHbDelay
(
hbDBDB
);
setHbApiDelay
(
hbDBAPI
);
setArbitTimeout
(
arbitTimeout
);
...
...
@@ -1872,10 +2467,23 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
const
Uint32
nodeId
=
rep
->
nodeId
;
const
Uint32
err
=
rep
->
err
;
c_connectedNodes
.
clear
(
nodeId
);
c_readnodes_nodes
.
clear
(
nodeId
);
NodeRecPtr
nodePtr
;
nodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
nodePtr
,
MAX_NODES
,
nodeRec
);
char
buf
[
100
];
if
(
getNodeInfo
(
nodeId
).
getType
()
==
NodeInfo
::
DB
&&
getNodeState
().
startLevel
<
NodeState
::
SL_STARTED
)
{
jam
();
CRASH_INSERTION
(
932
);
BaseString
::
snprintf
(
buf
,
100
,
"Node %u disconected"
,
nodeId
);
progError
(
__LINE__
,
NDBD_EXIT_SR_OTHERNODEFAILED
,
buf
);
ndbrequire
(
false
);
}
switch
(
nodePtr
.
p
->
phase
){
case
ZRUNNING
:
jam
();
...
...
@@ -1893,9 +2501,12 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
case
ZAPI_ACTIVE
:
ndbrequire
(
false
);
case
ZAPI_INACTIVE
:
{
BaseString
::
snprintf
(
buf
,
100
,
"Node %u disconected"
,
nodeId
);
progError
(
__LINE__
,
NDBD_EXIT_SR_OTHERNODEFAILED
,
buf
);
ndbrequire
(
false
);
}
}
node_failed
(
signal
,
nodeId
);
}
//DISCONNECT_REP
...
...
@@ -2150,10 +2761,16 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
failedNodePtr
.
i
=
aFailedNode
;
ptrCheckGuard
(
failedNodePtr
,
MAX_NODES
,
nodeRec
);
FailRep
*
rep
=
(
FailRep
*
)
signal
->
getDataPtr
();
check_multi_node_shutdown
(
signal
);
if
(
failedNodePtr
.
i
==
getOwnNodeId
())
{
jam
();
Uint32
code
=
0
;
const
char
*
msg
=
0
;
char
extra
[
100
];
switch
(
aFailCause
){
case
FailRep
:
:
ZOWN_FAILURE
:
msg
=
"Own failure"
;
...
...
@@ -2174,17 +2791,51 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
case
FailRep
:
:
ZLINK_FAILURE
:
msg
=
"Connection failure"
;
break
;
case
FailRep
:
:
ZPARTITIONED_CLUSTER
:
{
code
=
NDBD_EXIT_ARBIT_SHUTDOWN
;
char
buf1
[
100
],
buf2
[
100
];
c_clusterNodes
.
getText
(
buf1
);
if
(
signal
->
getLength
()
==
FailRep
::
SignalLength
+
FailRep
::
ExtraLength
&&
signal
->
header
.
theVerId_signalNumber
==
GSN_FAIL_REP
)
{
jam
();
NdbNodeBitmask
part
;
part
.
assign
(
NdbNodeBitmask
::
Size
,
rep
->
partition
);
part
.
getText
(
buf2
);
BaseString
::
snprintf
(
extra
,
sizeof
(
extra
),
"Partitioned cluster!"
" Our cluster: %s other cluster: %s"
,
buf1
,
buf2
);
}
else
{
jam
();
BaseString
::
snprintf
(
extra
,
sizeof
(
extra
),
"Partitioned cluster!"
" Our cluster: %s "
,
buf1
);
}
msg
=
extra
;
break
;
}
case
FailRep
:
:
ZMULTI_NODE_SHUTDOWN
:
msg
=
"Multi node shutdown"
;
break
;
default:
msg
=
"<UNKNOWN>"
;
}
char
buf
[
100
];
BaseString
::
snprintf
(
buf
,
100
,
CRASH_INSERTION
(
932
);
char
buf
[
255
];
BaseString
::
snprintf
(
buf
,
sizeof
(
buf
),
"We(%u) have been declared dead by %u reason: %s(%u)"
,
getOwnNodeId
(),
refToNode
(
signal
->
getSendersBlockRef
()),
aFailCause
,
msg
?
msg
:
"<Unknown>"
);
progError
(
__LINE__
,
0
,
buf
);
progError
(
__LINE__
,
code
,
buf
);
return
;
}
//if
...
...
@@ -2241,7 +2892,9 @@ void Qmgr::execPREP_FAILREQ(Signal* signal)
{
NodeRecPtr
myNodePtr
;
jamEntry
();
check_multi_node_shutdown
(
signal
);
PrepFailReqRef
*
const
prepFail
=
(
PrepFailReqRef
*
)
&
signal
->
theData
[
0
];
BlockReference
Tblockref
=
prepFail
->
xxxBlockRef
;
...
...
@@ -3893,6 +4546,7 @@ Qmgr::stateArbitCrash(Signal* signal)
if
(
!
(
arbitRec
.
getTimediff
()
>
getArbitTimeout
()))
return
;
#endif
CRASH_INSERTION
(
932
);
progError
(
__LINE__
,
NDBD_EXIT_ARBIT_SHUTDOWN
,
"Arbitrator decided to shutdown this node"
);
}
...
...
@@ -3956,8 +4610,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
case
1
:
infoEvent
(
"creadyDistCom = %d, cpresident = %d
\n
"
,
creadyDistCom
,
cpresident
);
infoEvent
(
"cpresidentAlive = %d, cpresidentCand = %d
\n
"
,
cpresidentAlive
,
cpresidentCandidate
);
infoEvent
(
"cpresidentAlive = %d, cpresidentCand = %d (gci: %d)
\n
"
,
cpresidentAlive
,
c_start
.
m_president_candidate
,
c_start
.
m_president_candidate_gci
);
infoEvent
(
"ctoStatus = %d
\n
"
,
ctoStatus
);
for
(
Uint32
i
=
1
;
i
<
MAX_NDB_NODES
;
i
++
){
if
(
getNodeInfo
(
i
).
getType
()
==
NodeInfo
::
DB
){
...
...
@@ -4054,3 +4710,40 @@ Qmgr::execAPI_BROADCAST_REP(Signal* signal)
NodeReceiverGroup
rg
(
API_CLUSTERMGR
,
mask
);
sendSignal
(
rg
,
api
.
gsn
,
signal
,
len
,
JBB
);
// forward sections
}
void
Qmgr
::
execSTOP_REQ
(
Signal
*
signal
)
{
jamEntry
();
c_stopReq
=
*
(
StopReq
*
)
signal
->
getDataPtr
();
if
(
c_stopReq
.
senderRef
)
{
ndbrequire
(
NdbNodeBitmask
::
get
(
c_stopReq
.
nodes
,
getOwnNodeId
()));
StopConf
*
conf
=
(
StopConf
*
)
signal
->
getDataPtrSend
();
conf
->
senderData
=
c_stopReq
.
senderData
;
conf
->
nodeState
=
getOwnNodeId
();
sendSignal
(
c_stopReq
.
senderRef
,
GSN_STOP_CONF
,
signal
,
StopConf
::
SignalLength
,
JBA
);
}
}
void
Qmgr
::
check_multi_node_shutdown
(
Signal
*
signal
)
{
if
(
c_stopReq
.
senderRef
&&
NdbNodeBitmask
::
get
(
c_stopReq
.
nodes
,
getOwnNodeId
()))
{
jam
();
if
(
StopReq
::
getPerformRestart
(
c_stopReq
.
requestInfo
))
{
jam
();
StartOrd
*
startOrd
=
(
StartOrd
*
)
&
signal
->
theData
[
0
];
startOrd
->
restartInfo
=
c_stopReq
.
requestInfo
;
EXECUTE_DIRECT
(
CMVMI
,
GSN_START_ORD
,
signal
,
2
);
}
else
{
EXECUTE_DIRECT
(
CMVMI
,
GSN_STOP_ORD
,
signal
,
1
);
}
}
}
ndb/src/kernel/vm/Configuration.cpp
View file @
1b5ce338
...
...
@@ -55,6 +55,12 @@ enum ndbd_options {
NDB_STD_OPTS_VARS
;
// XXX should be my_bool ???
static
int
_daemon
,
_no_daemon
,
_foreground
,
_initial
,
_no_start
;
static
int
_initialstart
;
static
const
char
*
_nowait_nodes
;
extern
Uint32
g_start_type
;
extern
NdbNodeBitmask
g_nowait_nodes
;
/**
* Arguments to NDB process
*/
...
...
@@ -82,6 +88,14 @@ static struct my_option my_long_options[] =
" (implies --nodaemon)"
,
(
gptr
*
)
&
_foreground
,
(
gptr
*
)
&
_foreground
,
0
,
GET_BOOL
,
NO_ARG
,
0
,
0
,
0
,
0
,
0
,
0
},
{
"nowait-nodes"
,
NO_ARG
,
"Nodes that will not be waited for during start"
,
(
gptr
*
)
&
_nowait_nodes
,
(
gptr
*
)
&
_nowait_nodes
,
0
,
GET_STR
,
REQUIRED_ARG
,
0
,
0
,
0
,
0
,
0
,
0
},
{
"initial-start"
,
NO_ARG
,
"Perform initial start"
,
(
gptr
*
)
&
_initialstart
,
(
gptr
*
)
&
_initialstart
,
0
,
GET_BOOL
,
NO_ARG
,
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
,
GET_NO_ARG
,
NO_ARG
,
0
,
0
,
0
,
0
,
0
,
0
}
};
static
void
short_usage_sub
(
void
)
...
...
@@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv)
globalData
.
ownId
=
0
;
if
(
_nowait_nodes
)
{
BaseString
str
(
_nowait_nodes
);
Vector
<
BaseString
>
arr
;
str
.
split
(
arr
,
","
);
for
(
Uint32
i
=
0
;
i
<
arr
.
size
();
i
++
)
{
char
*
endptr
=
0
;
long
val
=
strtol
(
arr
[
i
].
c_str
(),
&
endptr
,
10
);
if
(
*
endptr
)
{
ndbout_c
(
"Unable to parse nowait-nodes argument: %s : %s"
,
arr
[
i
].
c_str
(),
_nowait_nodes
);
exit
(
-
1
);
}
if
(
!
(
val
>
0
&&
val
<
MAX_NDB_NODES
))
{
ndbout_c
(
"Invalid nodeid specified in nowait-nodes: %d : %s"
,
val
,
_nowait_nodes
);
exit
(
-
1
);
}
g_nowait_nodes
.
set
(
val
);
}
}
if
(
_initialstart
)
{
_initialStart
=
true
;
g_start_type
|=
(
1
<<
NodeState
::
ST_INITIAL_START
);
}
return
true
;
}
...
...
ndb/test/ndbapi/testNodeRestart.cpp
View file @
1b5ce338
...
...
@@ -22,7 +22,7 @@
#include <NdbRestarts.hpp>
#include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
int
runLoadTable
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
...
...
@@ -669,6 +669,206 @@ runBug18414(NDBT_Context* ctx, NDBT_Step* step){
return
NDBT_FAILED
;
}
int
runBug18612
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
// Assume two replicas
NdbRestarter
restarter
;
if
(
restarter
.
getNumDbNodes
()
<
2
)
{
ctx
->
stopTest
();
return
NDBT_OK
;
}
Uint32
cnt
=
restarter
.
getNumDbNodes
();
for
(
int
loop
=
0
;
loop
<
ctx
->
getNumLoops
();
loop
++
)
{
int
partition0
[
256
];
int
partition1
[
256
];
bzero
(
partition0
,
sizeof
(
partition0
));
bzero
(
partition1
,
sizeof
(
partition1
));
Bitmask
<
4
>
nodesmask
;
Uint32
node1
=
restarter
.
getDbNodeId
(
rand
()
%
cnt
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
{
do
{
int
tmp
=
restarter
.
getRandomNodeOtherNodeGroup
(
node1
,
rand
());
if
(
tmp
==
-
1
)
break
;
node1
=
tmp
;
}
while
(
nodesmask
.
get
(
node1
));
partition0
[
i
]
=
node1
;
partition1
[
i
]
=
restarter
.
getRandomNodeSameNodeGroup
(
node1
,
rand
());
ndbout_c
(
"nodes %d %d"
,
node1
,
partition1
[
i
]);
assert
(
!
nodesmask
.
get
(
node1
));
assert
(
!
nodesmask
.
get
(
partition1
[
i
]));
nodesmask
.
set
(
node1
);
nodesmask
.
set
(
partition1
[
i
]);
}
ndbout_c
(
"done"
);
int
dump
[
255
];
dump
[
0
]
=
DumpStateOrd
::
NdbcntrStopNodes
;
memcpy
(
dump
+
1
,
partition0
,
sizeof
(
int
)
*
cnt
/
2
);
Uint32
master
=
restarter
.
getMasterNodeId
();
if
(
restarter
.
dumpStateOneNode
(
master
,
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
int
val2
[]
=
{
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
,
1
};
if
(
restarter
.
dumpStateAllNodes
(
val2
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
insertErrorInAllNodes
(
932
))
return
NDBT_FAILED
;
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition0
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition1
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition1
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition0
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
startNodes
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesStartPhase
(
partition0
,
cnt
/
2
,
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9001
;
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateAllNodes
(
dump
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
restartOneDbNode
(
partition0
[
i
],
true
,
true
,
true
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
))
return
NDBT_FAILED
;
if
(
restarter
.
startAll
())
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterStarted
())
return
NDBT_FAILED
;
}
return
NDBT_OK
;
}
int
runBug18612SR
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
// Assume two replicas
NdbRestarter
restarter
;
if
(
restarter
.
getNumDbNodes
()
<
2
)
{
ctx
->
stopTest
();
return
NDBT_OK
;
}
Uint32
cnt
=
restarter
.
getNumDbNodes
();
for
(
int
loop
=
0
;
loop
<
ctx
->
getNumLoops
();
loop
++
)
{
int
partition0
[
256
];
int
partition1
[
256
];
bzero
(
partition0
,
sizeof
(
partition0
));
bzero
(
partition1
,
sizeof
(
partition1
));
Bitmask
<
4
>
nodesmask
;
Uint32
node1
=
restarter
.
getDbNodeId
(
rand
()
%
cnt
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
{
do
{
int
tmp
=
restarter
.
getRandomNodeOtherNodeGroup
(
node1
,
rand
());
if
(
tmp
==
-
1
)
break
;
node1
=
tmp
;
}
while
(
nodesmask
.
get
(
node1
));
partition0
[
i
]
=
node1
;
partition1
[
i
]
=
restarter
.
getRandomNodeSameNodeGroup
(
node1
,
rand
());
ndbout_c
(
"nodes %d %d"
,
node1
,
partition1
[
i
]);
assert
(
!
nodesmask
.
get
(
node1
));
assert
(
!
nodesmask
.
get
(
partition1
[
i
]));
nodesmask
.
set
(
node1
);
nodesmask
.
set
(
partition1
[
i
]);
}
ndbout_c
(
"done"
);
if
(
restarter
.
restartAll
(
false
,
true
,
false
))
return
NDBT_FAILED
;
int
dump
[
255
];
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition0
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition1
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9000
;
memcpy
(
dump
+
1
,
partition1
,
sizeof
(
int
)
*
cnt
/
2
);
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateOneNode
(
partition0
[
i
],
dump
,
1
+
cnt
/
2
))
return
NDBT_FAILED
;
int
val2
[]
=
{
DumpStateOrd
::
CmvmiSetRestartOnErrorInsert
,
1
};
if
(
restarter
.
dumpStateAllNodes
(
val2
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
insertErrorInAllNodes
(
932
))
return
NDBT_FAILED
;
if
(
restarter
.
startAll
())
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterStartPhase
(
2
))
return
NDBT_FAILED
;
dump
[
0
]
=
9001
;
for
(
Uint32
i
=
0
;
i
<
cnt
/
2
;
i
++
)
if
(
restarter
.
dumpStateAllNodes
(
dump
,
2
))
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterNoStart
(
30
))
if
(
restarter
.
waitNodesNoStart
(
partition0
,
cnt
/
2
,
10
))
if
(
restarter
.
waitNodesNoStart
(
partition1
,
cnt
/
2
,
10
))
return
NDBT_FAILED
;
if
(
restarter
.
startAll
())
return
NDBT_FAILED
;
if
(
restarter
.
waitClusterStarted
())
return
NDBT_FAILED
;
}
return
NDBT_OK
;
}
NDBT_TESTSUITE
(
testNodeRestart
);
TESTCASE
(
"NoLoad"
,
"Test that one node at a time can be stopped and then restarted "
\
...
...
@@ -963,6 +1163,18 @@ TESTCASE("Bug18414",
STEP
(
runBug18414
);
FINALIZER
(
runClearTable
);
}
TESTCASE
(
"Bug18612"
,
"Test bug with partitioned clusters"
){
INITIALIZER
(
runLoadTable
);
STEP
(
runBug18612
);
FINALIZER
(
runClearTable
);
}
TESTCASE
(
"Bug18612SR"
,
"Test bug with partitioned clusters"
){
INITIALIZER
(
runLoadTable
);
STEP
(
runBug18612SR
);
FINALIZER
(
runClearTable
);
}
NDBT_TESTSUITE_END
(
testNodeRestart
);
int
main
(
int
argc
,
const
char
**
argv
){
...
...
ndb/test/run-test/daily-basic-tests.txt
View file @
1b5ce338
...
...
@@ -433,10 +433,18 @@ args: -n Bug16772 T1
#cmd: testSystemRestart
#args: -n Bug18385 T1
#
max-time:
5
00
max-time:
10
00
cmd: testNodeRestart
args: -n Bug18414 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612SR T1
# OLD FLEX
max-time: 500
cmd: flexBench
...
...
ndb/test/src/NdbRestarts.cpp
View file @
1b5ce338
...
...
@@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter,
<<
") secs "
<<
endl
;
NdbSleep_SecSleep
(
seconds
);
randomId
=
(
rand
()
%
_restarter
.
getNumDbNodes
());
nodeId
=
_restarter
.
getDbNodeId
(
randomId
);
nodeId
=
_restarter
.
getRandomNodeOtherNodeGroup
(
nodeId
,
rand
());
g_info
<<
_restart
->
m_name
<<
": node = "
<<
nodeId
<<
endl
;
CHECK
(
_restarter
.
insertErrorInNode
(
nodeId
,
9999
)
==
0
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment