Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Levin Zimmermann
neoppod
Commits
4d25e431
Commit
4d25e431
authored
Feb 18, 2021
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
5cc06ec3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
19 deletions
+14
-19
go/neo/client_test.go
go/neo/client_test.go
+1
-2
go/neo/master.go
go/neo/master.go
+12
-15
go/neo/storage.go
go/neo/storage.go
+1
-2
No files found.
go/neo/client_test.go
View file @
4d25e431
...
...
@@ -27,7 +27,6 @@ import (
"net/url"
"os"
"os/exec"
"strings"
"testing"
"time"
...
...
@@ -280,7 +279,7 @@ func StartNEOGoSrv(opt NEOSrvOptions) (_ *NEOGoSrv, err error) {
if
err
==
nil
{
break
}
if
!
strings
.
HasSuffix
(
err
.
Error
(),
"start: cluster is non-operational"
)
{
// XXX
if
!
errors
.
Is
(
err
,
ErrStartNonOperational
)
{
return
nil
,
err
}
...
...
go/neo/master.go
View file @
4d25e431
...
...
@@ -180,6 +180,7 @@ func NewMaster(clusterName string, net xnet.Networker) *Master {
// NOTE upon successful return cluster is not yet in running state - the transition will
// take time and could be also automatically aborted due to cluster environment change (e.g.
// a storage node goes down).
var
ErrStartNonOperational
=
errors
.
New
(
"start: cluster is non-operational"
)
func
(
m
*
Master
)
Start
()
error
{
ech
:=
make
(
chan
error
)
m
.
ctlStart
<-
ech
...
...
@@ -195,6 +196,7 @@ func (m *Master) Stop() {
// setClusterState sets .clusterState and notifies subscribers.
func
(
m
*
Master
)
setClusterState
(
ctx
context
.
Context
,
state
proto
.
ClusterState
)
{
log
.
Infof
(
ctx
,
"cluster state <- %s"
,
state
)
m
.
node
.
State
.
Code
.
Set
(
state
)
//m.notifyAll(ctx, &_ΔStateCode{state}) TODO enable
}
...
...
@@ -235,7 +237,6 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {
m
.
mainWG
.
Go
(
func
(
ctx
context
.
Context
)
(
err
error
)
{
defer
task
.
Running
(
&
ctx
,
"accept"
)(
&
err
)
// XXX dup in storage
for
{
if
ctx
.
Err
()
!=
nil
{
return
ctx
.
Err
()
...
...
@@ -292,16 +293,11 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {
// main is the process that implements main master cluster management logic: node tracking, cluster
// state updates, scheduling data movement between storage nodes, etc.
//
// NOTE main's goroutine is the only mutator of nodeTab, partTab and other cluster state
func
(
m
*
Master
)
main
(
ctx
context
.
Context
)
(
err
error
)
{
defer
task
.
Running
(
&
ctx
,
"main"
)(
&
err
)
// NOTE Run's goroutine is the only mutator of nodeTab, partTab and other cluster state
// XXX however since clients request state reading we should use node.StateMu?
// XXX -> better rework protocol so that master pushes itself (not
// being pulled) to clients everything they need.
// -> it was reworked (see bf240897)
for
ctx
.
Err
()
==
nil
{
// recover partition table from storages and wait till enough
// storages connects us so that we can see the partition table
...
...
@@ -311,22 +307,24 @@ func (m *Master) main(ctx context.Context) (err error) {
// a command came to us to start the cluster.
err
:=
m
.
recovery
(
ctx
)
if
err
!=
nil
{
//
log.Error(ctx, err)
log
.
Error
(
ctx
,
err
)
return
err
// recovery cancelled
}
log
.
Infof
(
ctx
,
"recovered ok; partTab:
\n
%s"
,
m
.
node
.
State
.
PartTab
)
// make sure transactions on storages are properly finished, in
// case previously it was unclean shutdown.
err
=
m
.
verify
(
ctx
)
if
err
!=
nil
{
//log.Error
(ctx, err)
log
.
Warning
(
ctx
,
err
)
continue
// -> recovery
}
log
.
Info
(
ctx
,
"verified ok"
)
// provide service as long as partition table stays operational
err
=
m
.
serve
(
ctx
)
if
err
!=
nil
{
//log.Error
(ctx, err)
log
.
Warning
(
ctx
,
err
)
continue
// -> recovery
}
...
...
@@ -378,9 +376,8 @@ func (m *Master) recovery(ctx context.Context) (err error) {
// requests to .ctlStart received when readyToStart
// on success answered when full recovery completes
startReqv
:=
[]
chan
error
{}
errStartNonOperational
:=
fmt
.
Errorf
(
"start: cluster is non-operational"
)
defer
func
()
{
errStart
:=
e
rrStartNonOperational
errStart
:=
E
rrStartNonOperational
if
err
==
nil
{
errStart
=
nil
}
...
...
@@ -420,7 +417,7 @@ func (m *Master) recovery(ctx context.Context) (err error) {
// cluster became non-operational - cancel previously queued start requests
if
!
ready
{
for
_
,
ech
:=
range
startReqv
{
ech
<-
e
rrStartNonOperational
ech
<-
E
rrStartNonOperational
}
startReqv
=
startReqv
[
:
0
]
}
...
...
@@ -500,7 +497,7 @@ func (m *Master) recovery(ctx context.Context) (err error) {
startReqv
=
append
(
startReqv
,
ech
)
}
else
{
log
.
Infof
(
ctx
,
"start command - err - we are not ready"
)
ech
<-
e
rrStartNonOperational
ech
<-
E
rrStartNonOperational
}
case
ech
:=
<-
ctlStop
:
...
...
go/neo/storage.go
View file @
4d25e431
...
...
@@ -268,8 +268,7 @@ func (stor *Storage) serve(ctx context.Context) (err error) {
wg
:=
sync
.
WaitGroup
{}
defer
wg
.
Wait
()
// XXX dup from master -> Node.Listen() -> Accept() ?
// XXX ? -> Node.Accept(lli) (it will verify IdTime against Node.nodeTab[nid])
// XXX ? -> _MasteredNode.Accept(lli) (it will verify IdTime against .nodeTab[nid])
// XXX ? -> Node.Serve(lli -> func(idReq))
for
{
if
ctx
.
Err
()
!=
nil
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment