Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
b14ed530
Commit
b14ed530
authored
Mar 17, 2006
by
jonas@perch.ndb.mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ndb - bug#16772
dont't allow node to join cluster until all nodes has completed failure handling
parent
5fbb1fb4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
141 additions
and
14 deletions
+141
-14
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+87
-14
ndb/test/ndbapi/testNodeRestart.cpp
ndb/test/ndbapi/testNodeRestart.cpp
+50
-0
ndb/test/run-test/daily-basic-tests.txt
ndb/test/run-test/daily-basic-tests.txt
+4
-0
No files found.
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
View file @
b14ed530
...
...
@@ -257,6 +257,7 @@ void Qmgr::setArbitTimeout(UintR aArbitTimeout)
void
Qmgr
::
execCONNECT_REP
(
Signal
*
signal
)
{
jamEntry
();
const
Uint32
nodeId
=
signal
->
theData
[
0
];
c_connectedNodes
.
set
(
nodeId
);
NodeRecPtr
nodePtr
;
...
...
@@ -264,9 +265,13 @@ void Qmgr::execCONNECT_REP(Signal* signal)
ptrCheckGuard
(
nodePtr
,
MAX_NODES
,
nodeRec
);
switch
(
nodePtr
.
p
->
phase
){
case
ZSTARTING
:
case
ZRUNNING
:
jam
();
if
(
!
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
)){
jam
();
return
;
}
break
;
case
ZRUNNING
:
case
ZPREPARE_FAIL
:
case
ZFAIL_CLOSING
:
jam
();
...
...
@@ -277,21 +282,28 @@ void Qmgr::execCONNECT_REP(Signal* signal)
case
ZAPI_INACTIVE
:
return
;
}
if
(
!
c_start
.
m_nodes
.
isWaitingFor
(
nodeId
)){
jam
();
return
;
}
switch
(
c_start
.
m_gsn
){
case
GSN_CM_REGREQ
:
jam
();
sendCmRegReq
(
signal
,
nodeId
);
return
;
case
GSN_CM_NODEINFOREQ
:
{
case
GSN_CM_NODEINFOREQ
:
jam
();
sendCmNodeInfoReq
(
signal
,
nodeId
,
nodePtr
.
p
);
return
;
case
GSN_CM_ADD
:{
jam
();
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
c_start
.
m_nodes
.
clearWaitingFor
(
nodeId
);
c_start
.
m_gsn
=
RNIL
;
NodeRecPtr
addNodePtr
;
addNodePtr
.
i
=
nodeId
;
ptrCheckGuard
(
addNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
cmAddPrepare
(
signal
,
addNodePtr
,
nodePtr
.
p
);
return
;
}
default:
return
;
...
...
@@ -924,15 +936,27 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
return
;
case
ZFAIL_CLOSING
:
jam
();
#ifdef VM_TRACE
ndbout_c
(
"Enabling communication to CM_ADD node state=%d"
,
nodePtr
.
p
->
phase
);
#endif
#if 1
warningEvent
(
"Recieved request to incorperate node %u, "
"while error handling has not yet completed"
,
nodePtr
.
i
);
ndbrequire
(
getOwnNodeId
()
!=
cpresident
);
ndbrequire
(
signal
->
header
.
theVerId_signalNumber
==
GSN_CM_ADD
);
c_start
.
m_nodes
.
clearWaitingFor
();
c_start
.
m_nodes
.
setWaitingFor
(
nodePtr
.
i
);
c_start
.
m_gsn
=
GSN_CM_ADD
;
#else
warningEvent
(
"Enabling communication to CM_ADD node %u state=%d"
,
nodePtr
.
i
,
nodePtr
.
p
->
phase
);
nodePtr
.
p
->
phase
=
ZSTARTING
;
nodePtr
.
p
->
failState
=
NORMAL
;
signal
->
theData
[
0
]
=
0
;
signal
->
theData
[
1
]
=
nodePtr
.
i
;
sendSignal
(
CMVMI_REF
,
GSN_OPEN_COMREQ
,
signal
,
2
,
JBA
);
#endif
return
;
case
ZSTARTING
:
break
;
...
...
@@ -1766,11 +1790,27 @@ void Qmgr::execNDB_FAILCONF(Signal* signal)
jamEntry
();
failedNodePtr
.
i
=
signal
->
theData
[
0
];
if
(
ERROR_INSERTED
(
930
))
{
CLEAR_ERROR_INSERT_VALUE
;
infoEvent
(
"Discarding NDB_FAILCONF for %u"
,
failedNodePtr
.
i
);
return
;
}
ptrCheckGuard
(
failedNodePtr
,
MAX_NODES
,
nodeRec
);
if
(
failedNodePtr
.
p
->
failState
==
WAITING_FOR_NDB_FAILCONF
){
failedNodePtr
.
p
->
failState
=
NORMAL
;
}
else
{
jam
();
char
buf
[
100
];
BaseString
::
snprintf
(
buf
,
100
,
"Received NDB_FAILCONF for node %u with state: %d %d"
,
failedNodePtr
.
i
,
failedNodePtr
.
p
->
phase
,
failedNodePtr
.
p
->
failState
);
progError
(
__LINE__
,
0
,
buf
);
systemErrorLab
(
signal
,
__LINE__
);
}
//if
if
(
cpresident
==
getOwnNodeId
())
{
...
...
@@ -2077,10 +2117,42 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
ptrCheckGuard
(
failedNodePtr
,
MAX_NODES
,
nodeRec
);
if
(
failedNodePtr
.
i
==
getOwnNodeId
())
{
jam
();
systemErrorLab
(
signal
,
__LINE__
);
const
char
*
msg
=
0
;
switch
(
aFailCause
){
case
FailRep
:
:
ZOWN_FAILURE
:
msg
=
"Own failure"
;
break
;
case
FailRep
:
:
ZOTHER_NODE_WHEN_WE_START
:
case
FailRep
:
:
ZOTHERNODE_FAILED_DURING_START
:
msg
=
"Other node died during start"
;
break
;
case
FailRep
:
:
ZIN_PREP_FAIL_REQ
:
msg
=
"Prep fail"
;
break
;
case
FailRep
:
:
ZSTART_IN_REGREQ
:
msg
=
"Start timeout"
;
break
;
case
FailRep
:
:
ZHEARTBEAT_FAILURE
:
msg
=
"Hearbeat failure"
;
break
;
case
FailRep
:
:
ZLINK_FAILURE
:
msg
=
"Connection failure"
;
break
;
}
char
buf
[
100
];
BaseString
::
snprintf
(
buf
,
100
,
"We(%u) have been declared dead by %u reason: %s(%u)"
,
getOwnNodeId
(),
refToNode
(
signal
->
getSendersBlockRef
()),
aFailCause
,
msg
?
msg
:
"<Unknown>"
);
progError
(
__LINE__
,
0
,
buf
);
return
;
}
//if
myNodePtr
.
i
=
getOwnNodeId
();
ptrCheckGuard
(
myNodePtr
,
MAX_NDB_NODES
,
nodeRec
);
if
(
myNodePtr
.
p
->
phase
!=
ZRUNNING
)
{
...
...
@@ -2791,6 +2863,7 @@ void Qmgr::failReport(Signal* signal,
cfailureNr
=
cprepareFailureNr
;
ctoFailureNr
=
0
;
ctoStatus
=
Q_ACTIVE
;
c_start
.
reset
();
// Don't take over nodes being started
if
(
cnoCommitFailedNodes
>
0
)
{
jam
();
/**-----------------------------------------------------------------
...
...
ndb/test/ndbapi/testNodeRestart.cpp
View file @
b14ed530
...
...
@@ -535,6 +535,52 @@ int runBug15685(NDBT_Context* ctx, NDBT_Step* step){
return
NDBT_FAILED
;
}
int
runBug16772
(
NDBT_Context
*
ctx
,
NDBT_Step
*
step
){
NdbRestarter
restarter
;
if
(
restarter
.
getNumDbNodes
()
<
2
)
{
ctx
->
stopTest
();
return
NDBT_OK
;
}
int
aliveNodeId
=
restarter
.
getRandomNotMasterNodeId
(
rand
());
int
deadNodeId
=
aliveNodeId
;
while
(
deadNodeId
==
aliveNodeId
)
deadNodeId
=
restarter
.
getDbNodeId
(
rand
()
%
restarter
.
getNumDbNodes
());
if
(
restarter
.
insertErrorInNode
(
aliveNodeId
,
930
))
return
NDBT_FAILED
;
if
(
restarter
.
restartOneDbNode
(
deadNodeId
,
/** initial */
false
,
/** nostart */
true
,
/** abort */
true
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesNoStart
(
&
deadNodeId
,
1
))
return
NDBT_FAILED
;
if
(
restarter
.
startNodes
(
&
deadNodeId
,
1
))
return
NDBT_FAILED
;
// It should now be hanging since we throw away NDB_FAILCONF
int
ret
=
restarter
.
waitNodesStartPhase
(
&
deadNodeId
,
1
,
3
,
10
);
// So this should fail...i.e it should not reach startphase 3
// Now send a NDB_FAILCONF for deadNo
int
dump
[]
=
{
7020
,
323
,
252
,
0
};
dump
[
3
]
=
deadNodeId
;
if
(
restarter
.
dumpStateOneNode
(
aliveNodeId
,
dump
,
4
))
return
NDBT_FAILED
;
if
(
restarter
.
waitNodesStarted
(
&
deadNodeId
,
1
))
return
NDBT_FAILED
;
return
ret
?
NDBT_OK
:
NDBT_FAILED
;
}
NDBT_TESTSUITE
(
testNodeRestart
);
TESTCASE
(
"NoLoad"
,
...
...
@@ -820,6 +866,10 @@ TESTCASE("Bug15685",
STEP
(
runBug15685
);
FINALIZER
(
runClearTable
);
}
TESTCASE
(
"Bug16772"
,
"Test bug with restarting before NF handling is complete"
){
STEP
(
runBug16772
);
}
NDBT_TESTSUITE_END
(
testNodeRestart
);
int
main
(
int
argc
,
const
char
**
argv
){
...
...
ndb/test/run-test/daily-basic-tests.txt
View file @
b14ed530
...
...
@@ -446,6 +446,10 @@ max-time: 500
cmd: testNodeRestart
args: -n Bug15685 T1
max-time: 500
cmd: testNodeRestart
args: -n Bug16772 T1
# OLD FLEX
max-time: 500
cmd: flexBench
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment