Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Levin Zimmermann
neoppod
Commits
e8ac7104
Commit
e8ac7104
authored
Aug 04, 2017
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
e17082f1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
77 additions
and
31 deletions
+77
-31
go/neo/server/master.go
go/neo/server/master.go
+75
-31
go/neo/server/server.go
go/neo/server/server.go
+2
-0
No files found.
go/neo/server/master.go
View file @
e8ac7104
...
...
@@ -68,7 +68,6 @@ type Master struct {
type
nodeCome
struct
{
conn
*
neo
.
Conn
idReq
*
neo
.
RequestIdentification
// we received this identification request
idResp
chan
neo
.
Msg
// what we reply (AcceptIdentification | Error) XXX kill
}
// event: node disconnects
...
...
@@ -168,7 +167,7 @@ func (m *Master) Run(ctx context.Context) error {
}
select
{
case
m
.
nodeCome
<-
nodeCome
{
conn
,
idReq
,
nil
/*XXX kill*/
}
:
case
m
.
nodeCome
<-
nodeCome
{
conn
,
idReq
}
:
// ok
case
<-
serveCtx
.
Done
()
:
...
...
@@ -256,29 +255,52 @@ func (m *Master) recovery(ctx context.Context) (err error) {
defer
rcancel
()
recovery
:=
make
(
chan
storRecovery
)
inprogress
:=
0
inprogress
:=
sync
.
WaitGroup
{}
// start recovery on all storages we are currently in touch with
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
if
stor
.
NodeState
>
neo
.
DOWN
{
// XXX state cmp ok ? XXX or stor.Link != nil ?
inprogress
++
go
storCtlRecovery
(
rctx
,
stor
.
Link
,
recovery
)
inprogress
.
Add
(
1
)
go
func
()
{
defer
inprogress
.
Done
()
storCtlRecovery
(
rctx
,
stor
.
Link
,
recovery
)
}()
}
}
loop
:
for
{
select
{
//
XXX this is m.Accept() and semantic must be semantic of net.Accept() !
//
new connection comes in
case
n
:=
<-
m
.
nodeCome
:
node
,
ok
:=
m
.
accept
(
n
,
/* XXX only accept storages -> PENDING */
)
if
!
ok
{
break
node
,
resp
,
ok
:=
m
.
accept
(
n
,
/* XXX only accept storages -> PENDING */
)
// if new storage arrived - start recovery on it too
inprogress
.
Add
(
1
)
go
func
()
{
defer
inprogress
.
Done
()
// send identification response
// XXX cancel on ctx
err
:=
n
.
conn
.
Send
(
resp
)
if
err
!=
nil
{
// XXX log
}
n
.
conn
.
Close
()
// XXX err
if
err
!=
nil
||
!
ok
{
// XXX must let recovery know it failed to update nodeTab
if
ok
{
recovery
<-
storRecovery
{
err
:
err
}
}
c
.
conn
.
Link
()
.
Close
()
// XXX err
return
}
// new storage arrived - start recovery on it too
inprogress
++
go
storCtlRecovery
(
rctx
,
node
.
Link
,
recovery
)
err
:=
welcome
(
n
.
conn
,
resp
)
// start recovery
storCtlRecovery
(
rctx
,
node
.
Link
,
recovery
)
}()
case
n
:=
<-
m
.
nodeLeave
:
m
.
nodeTab
.
UpdateLinkDown
(
n
.
link
)
...
...
@@ -287,12 +309,14 @@ loop:
// a storage node came through recovery - let's see whether
// ptid ↑ and if so we should take partition table from there
case
r
:=
<-
recovery
:
inprogress
--
//
inprogress--
if
r
.
err
!=
nil
{
// XXX err ctx?
// XXX log here or in producer?
fmt
.
Printf
(
"master: %v
\n
"
,
r
.
err
)
// XXX close stor link / update .nodeTab ?
break
}
...
...
@@ -337,9 +361,22 @@ loop:
}
}
// consume left recovery responses (which should come without delay since it was cancelled)
for
;
inprogress
>
0
;
inprogress
--
{
<-
recovery
// // consume left recovery responses (which should come without delay since it was cancelled)
// for ; inprogress > 0; inprogress-- {
// <-recovery
// }
done
:=
make
(
chan
struct
{})
go
func
()
{
inprogress
.
Wait
()
close
(
done
)
}()
for
{
select
{
case
<-
recovery
:
case
<-
done
:
return
err
}
return
err
...
...
@@ -376,7 +413,7 @@ func storCtlRecovery(ctx context.Context, link *neo.NodeLink, res chan storRecov
}()
defer
xerr
.
Contextf
(
&
err
,
"%s: stor recovery"
,
link
)
conn
,
err
:=
link
.
NewConn
()
// FIXME bad
conn
,
err
:=
link
.
NewConn
()
// FIXME bad
-> not bad
if
err
!=
nil
{
return
}
...
...
@@ -514,6 +551,8 @@ loop:
}
}
// XXX if m.partTab.OperationalWith(&.nodeTab, RUNNING) -> break (ok)
case
ech
:=
<-
m
.
ctlStart
:
ech
<-
nil
// we are already starting
...
...
@@ -541,7 +580,7 @@ loop:
type
storVerify
struct
{
lastOid
zodb
.
Oid
lastTid
zodb
.
Tid
link
*
neo
.
NodeLink
link
*
neo
.
NodeLink
// XXX -> Node
err
error
}
...
...
@@ -561,6 +600,8 @@ func storCtlVerify(ctx context.Context, link *neo.NodeLink, res chan storVerify)
// FIXME stub
conn
,
_
:=
link
.
NewConn
()
// XXX NotifyPT (so storages save locally recovered PT)
locked
:=
neo
.
AnswerLockedTransactions
{}
err
=
conn
.
Ask
(
&
neo
.
LockedTransactions
{},
&
locked
)
if
err
!=
nil
{
...
...
@@ -648,16 +689,18 @@ loop:
return
err
}
// accept processes identification request of just connected node and either accepts or declines it
// accept processes identification request of just connected node and either accepts or declines it
.
// if node identification is accepted nodeTab is updated and corresponding node entry is returned
func
(
m
*
Master
)
accept
(
n
nodeCome
)
(
node
*
neo
.
Node
,
ok
bool
)
{
// XXX no need for ok
func
(
m
*
Master
)
accept
(
n
nodeCome
)
(
node
*
neo
.
Node
,
resp
neo
.
Msg
,
ok
bool
)
{
// TODO log node accept/rejected
// XXX also verify ? :
// - NodeType valid
// - IdTimestamp ?
if
n
.
idReq
.
ClusterName
!=
m
.
node
.
ClusterName
{
n
.
idResp
<-
&
neo
.
Error
{
neo
.
PROTOCOL_ERROR
,
"cluster name mismatch"
}
// XXX
return
nil
,
false
return
nil
,
&
neo
.
Error
{
neo
.
PROTOCOL_ERROR
,
"cluster name mismatch"
},
false
}
nodeType
:=
n
.
idReq
.
NodeType
...
...
@@ -672,20 +715,19 @@ func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) {
if
node
!=
nil
{
// reject - uuid is already occupied by someone else
// XXX check also for down state - it could be the same node reconnecting
n
.
idResp
<-
&
neo
.
Error
{
neo
.
PROTOCOL_ERROR
,
"uuid %v already used by another node"
}
// XXX
return
nil
,
false
return
nil
,
&
neo
.
Error
{
neo
.
PROTOCOL_ERROR
,
"uuid %v already used by another node"
/*XXX*/
},
false
}
// XXX accept only certain kind of nodes depending on .clusterState, e.g.
switch
nodeType
{
case
neo
.
CLIENT
:
n
.
idResp
<-
&
neo
.
Error
{
neo
.
NOT_READY
,
"cluster not operational"
}
return
nil
,
&
neo
.
Error
{
neo
.
NOT_READY
,
"cluster not operational"
}
// XXX ...
}
n
.
idResp
<-
&
neo
.
AcceptIdentification
{
accept
:=
&
neo
.
AcceptIdentification
{
NodeType
:
neo
.
MASTER
,
MyNodeUUID
:
m
.
node
.
MyInfo
.
NodeUUID
,
NumPartitions
:
1
,
// FIXME hardcoded
...
...
@@ -713,7 +755,7 @@ func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) {
}
node
=
m
.
nodeTab
.
Update
(
nodeInfo
,
n
.
conn
.
Link
())
// NOTE this notifies all nodeTab subscribers
return
node
,
true
return
node
,
accept
,
true
}
// allocUUID allocates new node uuid for a node of kind nodeType
...
...
@@ -734,6 +776,7 @@ func (m *Master) allocUUID(nodeType neo.NodeType) neo.NodeUUID {
panic
(
"all uuid allocated ???"
)
// XXX more robust ?
}
/* XXX goes away
// ServeLink serves incoming node-node link connection
// XXX +error return?
func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) {
...
...
@@ -781,9 +824,9 @@ func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) {
}
// convey identification request to master and we are done here - the master takes on the torch
m
.
nodeCome
<-
nodeCome
{
conn
,
idReq
,
nil
/*XXX kill*/
}
m.nodeCome <- nodeCome{conn, idReq, nil
XXX kill
}
/
*
/
/////////////////////////////////////////////////
// if master accepted this node - don't forget to notify when it leaves
_, rejected := idResp.(error)
if !rejected {
...
...
@@ -877,8 +920,9 @@ func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) {
// storage:
m.DriveStorage(ctx, link)
*
/
////////////////
/
}
*/
// ServeClient serves incoming connection on which peer identified itself as client
// XXX +error return?
...
...
go/neo/server/server.go
View file @
e8ac7104
...
...
@@ -79,6 +79,7 @@ func Serve(ctx context.Context, l *neo.Listener, srv Server) error {
}
*/
/*
// ----------------------------------------
// XXX goes away? (we need a func to make sure to recv RequestIdentification
...
...
@@ -128,3 +129,4 @@ func IdentifyPeer(link *neo.NodeLink, myNodeType neo.NodeType) (nodeInfo neo.Req
return req, nil
}
*/
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment