.

4d25e431 · Kirill Smelkov · 5cc06ec3 · 4d25e431 · 4d25e431 · 4d25e431
Commit 4d25e431 authored Feb 18, 2021 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 19 deletions

go/neo/client_test.go go/neo/client_test.go +1 -2

go/neo/master.go go/neo/master.go +12 -15

go/neo/storage.go go/neo/storage.go +1 -2

No files found.
--- a/go/neo/client_test.go
+++ b/go/neo/client_test.go
@@ -27,7 +27,6 @@ import (
 	"net/url"
 	"os"
 	"os/exec"
-	"strings"
 	"testing"
 	"time"

@@ -280,7 +279,7 @@ func StartNEOGoSrv(opt NEOSrvOptions) (_ *NEOGoSrv, err error) {
 		if err == nil {
 			break
 		}
-		if !strings.HasSuffix(err.Error(), "start: cluster is non-operational") { // XXX
+		if !errors.Is(err, ErrStartNonOperational) {
 			return nil, err
 		}


--- a/go/neo/master.go
+++ b/go/neo/master.go
@@ -180,6 +180,7 @@ func NewMaster(clusterName string, net xnet.Networker) *Master {
 // NOTE upon successful return cluster is not yet in running state - the transition will
 // take time and could be also automatically aborted due to cluster environment change (e.g.
 // a storage node goes down).
+var ErrStartNonOperational = errors.New("start: cluster is non-operational")
 func (m *Master) Start() error {
 	ech := make(chan error)
 	m.ctlStart <- ech
@@ -195,6 +196,7 @@ func (m *Master) Stop()  {

 // setClusterState sets .clusterState and notifies subscribers.
 func (m *Master) setClusterState(ctx context.Context, state proto.ClusterState) {
+	log.Infof(ctx, "cluster state <- %s", state)
 	m.node.State.Code.Set(state)
 	//m.notifyAll(ctx, &_ΔStateCode{state})		TODO enable
 }
@@ -235,7 +237,6 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {
 	m.mainWG.Go(func(ctx context.Context) (err error) {
 		defer task.Running(&ctx, "accept")(&err)

-		// XXX dup in storage
 		for {
 			if ctx.Err() != nil {
 				return ctx.Err()
@@ -292,16 +293,11 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {

 // main is the process that implements main master cluster management logic: node tracking, cluster
 // state updates, scheduling data movement between storage nodes, etc.
+//
+// NOTE main's goroutine is the only mutator of nodeTab, partTab and other cluster state
 func (m *Master) main(ctx context.Context) (err error) {
 	defer task.Running(&ctx, "main")(&err)

-	// NOTE Run's goroutine is the only mutator of nodeTab, partTab and other cluster state
-
-	// XXX however since clients request state reading we should use node.StateMu?
-	// XXX -> better rework protocol so that master pushes itself (not
-	//     being pulled) to clients everything they need.
-	//     -> it was reworked (see bf240897)
-
 	for ctx.Err() == nil {
 		// recover partition table from storages and wait till enough
 		// storages connects us so that we can see the partition table
@@ -311,22 +307,24 @@ func (m *Master) main(ctx context.Context) (err error) {
 		// a command came to us to start the cluster.
 		err := m.recovery(ctx)
 		if err != nil {
-			//log.Error(ctx, err)
+			log.Error(ctx, err)
 			return err // recovery cancelled
 		}
+		log.Infof(ctx, "recovered ok; partTab:\n%s", m.node.State.PartTab)

 		// make sure transactions on storages are properly finished, in
 		// case previously it was unclean shutdown.
 		err = m.verify(ctx)
 		if err != nil {
-			//log.Error(ctx, err)
+			log.Warning(ctx, err)
 			continue // -> recovery
 		}
+		log.Info(ctx, "verified ok")

 		// provide service as long as partition table stays operational
 		err = m.serve(ctx)
 		if err != nil {
-			//log.Error(ctx, err)
+			log.Warning(ctx, err)
 			continue // -> recovery
 		}

@@ -378,9 +376,8 @@ func (m *Master) recovery(ctx context.Context) (err error) {
 	// requests to .ctlStart received when readyToStart
 	// on success answered when full recovery completes
 	startReqv := []chan error{}
-	errStartNonOperational := fmt.Errorf("start: cluster is non-operational")
 	defer func() {
-		errStart := errStartNonOperational
+		errStart := ErrStartNonOperational
 		if err == nil {
 			errStart = nil
 		}
@@ -420,7 +417,7 @@ func (m *Master) recovery(ctx context.Context) (err error) {
 			// cluster became non-operational - cancel previously queued start requests
 			if !ready {
 				for _, ech := range startReqv {
-					ech <- errStartNonOperational
+					ech <- ErrStartNonOperational
 				}
 				startReqv = startReqv[:0]
 			}
@@ -500,7 +497,7 @@ func (m *Master) recovery(ctx context.Context) (err error) {
 				startReqv = append(startReqv, ech)
 			} else {
 				log.Infof(ctx, "start command - err - we are not ready")
-				ech <- errStartNonOperational
+				ech <- ErrStartNonOperational
 			}

 		case ech := <-ctlStop:

--- a/go/neo/storage.go
+++ b/go/neo/storage.go
@@ -268,8 +268,7 @@ func (stor *Storage) serve(ctx context.Context) (err error) {
 	wg := sync.WaitGroup{}
 	defer wg.Wait()

-	// XXX dup from master  -> Node.Listen() -> Accept() ?
-	// XXX ? -> Node.Accept(lli) (it will verify IdTime against Node.nodeTab[nid])
+	// XXX ? -> _MasteredNode.Accept(lli) (it will verify IdTime against .nodeTab[nid])
 	// XXX ? -> Node.Serve(lli -> func(idReq))
 	for {
 		if ctx.Err() != nil {