Commit 47b8448d authored by Xavier Thompson's avatar Xavier Thompson Committed by Julien Muchembled

master: add --backup to initialize in BACKINGUP

This applies only to an empty cluster and makes the cluster transition
from the initial state RECOVERING to BACKINGUP without passing through
state RUNNING:

(empty) -> RECOVERING -> VERIFYING -> STARTING_BACKUP -> BACKINGUP

This is the only way to reach BACKINGUP without having to pass through
RUNNING first and manually transitioning to STARTING_BACKUP. It is the
only way to reach state BACKINGUP fully automatically.

See merge request !25
parent 3d435f55
......@@ -92,6 +92,10 @@ class Application(BaseApplication):
help='the name of cluster to backup')
_('M', 'upstream-masters', parse=util.parseMasterList,
help='list of master nodes in the cluster to backup')
_.bool('B', 'backup',
help="transition automatically toward BACKINGUP instead of RUNNING"
" - without passing through RUNNING - if the cluster is empty"
" (this requires --upstream-cluster and --upstream-master)")
_.int('i', 'nid',
help="specify an NID to use for this process (testing purpose)")
......@@ -144,6 +148,7 @@ class Application(BaseApplication):
" different from cluster name")
self.backup_app = BackupApplication(self, upstream_cluster,
config['upstream_masters'])
self.backup_initially = config['backup']
  • erp5 software release tests are failing on slapos master branch, it seems related to this change because in t1_neomaster.log we can see:

    Traceback (most recent call last):
      File "/srv/slapgrid/slappart87/t/erh/i/0/tmp/soft/e691a3f0fd26ae171b124f2820cae94c/bin/neomaster", line 54, in <module>
        sys.exit(neo.scripts.neomaster.main())
      File "/srv/slapgrid/slappart87/t/erh/i/0/tmp/soft/e691a3f0fd26ae171b124f2820cae94c/parts/neoppod-repository/neo/scripts/neomaster.py", line 30, in main
        app = Application(config)
      File "/srv/slapgrid/slappart87/t/erh/i/0/tmp/soft/e691a3f0fd26ae171b124f2820cae94c/parts/neoppod-repository/neo/master/app.py", line 151, in __init__
        self.backup_initially = config['backup']
    KeyError: 'backup'
  • Yes, this is known, there is a fixup ready to be applied : xavier_thompson/neoppod@d1459a96

Please register or sign in to reply
self.administration_handler = administration.AdministrationHandler(
self)
......@@ -308,7 +313,11 @@ class Application(BaseApplication):
self.runManager(RecoveryManager)
try:
self.runManager(VerificationManager)
if not self.backup_tid:
if (self.backup_initially and
self.getLastTransaction() == ZERO_TID):
self.pt.setBackupTidDict({}) # {} <=> all ZERO_TID
self.backup_tid = ZERO_TID
elif not self.backup_tid:
self.provideService()
# self.provideService only returns without raising
# when switching to backup mode.
......
......@@ -764,7 +764,7 @@ class NEOCluster(object):
def __init__(self, master_count=1, partitions=1, replicas=0, upstream=None,
adapter=os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
storage_count=None, db_list=None, clear_databases=True,
compress=True, backup_count=0,
compress=True, backup_count=0, backup_initially=False,
importer=None, autostart=None, dedup=False, name=None):
self.name = name or self._allocateName()
self.backup_list = [self._allocateName() for x in xrange(backup_count)]
......@@ -782,6 +782,7 @@ class NEOCluster(object):
kw.update(upstream_cluster=upstream.name,
upstream_masters=parseMasterList(upstream.master_nodes))
self.master_list = [MasterApplication(autostart=autostart,
backup=backup_initially,
address=x, **kw)
for x in master_list]
if db_list is None:
......
......@@ -32,7 +32,7 @@ from neo.lib.connector import SocketConnector
from neo.lib.connection import ClientConnection
from neo.lib.protocol import CellStates, ClusterStates, NodeStates, Packets, \
ZERO_OID, ZERO_TID, MAX_TID, uuid_str
from neo.lib.util import add64, p64, u64
from neo.lib.util import add64, p64, u64, parseMasterList
from .. import Patch, TransactionalResource
from . import ConnectionFilter, LockLock, NEOCluster, NEOThreadedTest, \
predictable_random, with_cluster
......@@ -93,11 +93,22 @@ class ReplicationTests(NEOThreadedTest):
tid, upstream_name, source_dict = packet._args
return not upstream_name and all(source_dict.itervalues())
with NEOCluster(partitions=np, replicas=nr-1, storage_count=5,
upstream=upstream) as backup:
backup.start()
upstream=upstream, backup_initially=True) as backup:
state_list = []
def changeClusterState(orig, state):
state_list.append(state)
orig(state)
with Patch(backup.master, changeClusterState=changeClusterState):
# Initialize & catch up.
backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
backup.start()
self.tic()
# Check that backup cluster goes straight to BACKINGUP.
self.assertEqual(state_list, [
ClusterStates.RECOVERING,
ClusterStates.VERIFYING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP])
self.assertEqual(np*nr, self.checkBackup(backup))
# Normal case, following upstream cluster closely.
importZODB(17)
......@@ -229,11 +240,48 @@ class ReplicationTests(NEOThreadedTest):
# Do not start with an empty DB so that 'primary_dict' below is not
# empty on the first iteration.
importZODB(1)
# --- ASIDE ---
# Check that master crashes when started with --backup but without
# upstream (-C,--upstream-cluster and -M,--upstream-masters) info.
with NEOCluster(partitions=np, replicas=2, storage_count=4,
upstream=upstream) as backup:
backup.start()
backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
self.tic()
backup_initially=True) as backup:
exitmsg = []
def exit(orig, msg):
exitmsg.append(msg)
orig(msg)
state_list = []
def changeClusterState(orig, state):
state_list.append(state)
orig(state)
m = backup.master
with Patch(sys, exit=exit), Patch(
m, changeClusterState=changeClusterState):
self.assertRaises(AssertionError, backup.start)
backup.join((m,))
self.assertEqual(exitmsg, [m.no_upstream_msg])
self.assertEqual(state_list, [
ClusterStates.RECOVERING,
ClusterStates.VERIFYING])
del state_list[:]
# Now check that restarting the master with upstream info and
# with --backup makes the cluster go to BACKINGUP.
m.resetNode(
upstream_cluster=upstream.name,
upstream_masters=parseMasterList(upstream.master_nodes))
backup.upstream = upstream
with Patch(m, changeClusterState=changeClusterState):
# Initialize & catch up.
m.start()
self.tic()
# Check that backup cluster goes straight to BACKINGUP.
self.assertEqual(state_list, [
ClusterStates.RECOVERING,
ClusterStates.VERIFYING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP])
# --- END ASIDE ---
storage_list = [x.uuid for x in backup.storage_list]
slave = set(xrange(len(storage_list))).difference
for event in xrange(10):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment