Commit 2f070644 authored by Xavier Thompson's avatar Xavier Thompson

software/theia: Fix resiliency

See merge request nexedi/slapos!1096
parents 4c3725cf ef2a540a
Pipeline #18645 failed with stage
......@@ -15,11 +15,11 @@
[instance-theia]
_update_hash_filename_ = instance-theia.cfg.jinja.in
md5sum = 8e4f43e603a5dd57752758c987465d41
md5sum = 002ca13ec4923d9efa7a99f58ea7917f
[instance]
_update_hash_filename_ = instance.cfg.in
md5sum = a7d78b4002266c69ece05a476df82791
md5sum = 4d8d3a351f17c45048fd3ffaee978875
[instance-import]
_update_hash_filename_ = instance-import.cfg.jinja.in
......@@ -31,19 +31,19 @@ md5sum = 190a736471f0e0cffcb2838968e01d84
[instance-resilient]
_update_hash_filename_ = instance-resilient.cfg.jinja
md5sum = d78a9f885bdebf6720197209e0c21aa0
md5sum = 6f6b88d2802cd5eba6e3d2ebf435813a
[theia-common]
_update_hash_filename_ = theia_common.py
md5sum = e57396473b4b6a17d26a747f0030293c
md5sum = 6a25c6a7f1beb27232a3c9acd8a76500
[theia-export]
_update_hash_filename_ = theia_export.py
md5sum = b5f5ac1924b27d3f2be2e5ea291c119e
md5sum = e2f6c483cce09f87ab1e63ae8be0daf4
[theia-import]
_update_hash_filename_ = theia_import.py
md5sum = 9e8c17a4b2d802695caf0c2c052f0d11
md5sum = 1a668d6203d42b4d46d56e24c7606cb2
[yarn.lock]
_update_hash_filename_ = yarn.lock
......
{% import 'parts' as parts %}
{% import 'replicated' as replicated with context %}
{% import 'parts' as parts -%}
{% import 'replicated' as replicated with context -%}
{% set number_of_instances = slapparameter_dict.get('resilient-clone-number', 1)|int %}
{% set clones_amount = slapparameter_dict.get('resilient-clone-number', 1)|int + 1 -%}
[buildout]
eggs-directory = {{ eggs_directory }}
......@@ -11,59 +11,38 @@ extends =
{{ monitor_template }}
parts +=
# Generate the parts to request theia-export, pull-backup and theia-import
# See stack/resilient/template-parts.cfg.in and stack/resilient/template-replicated.cfg.in
# See below for the generation of the sections corresponding to the parts generated here
{{ parts.replicate("theia", number_of_instances + 1) }}
# Also publish some connection parameters
publish-connection-parameter
publish
{#- Generate the parts to request the main theia, the clones and the PBS. #}
{#- See ../../stack/resilient/template-parts.cfg.in #}
{{ parts.replicate("theia", clones_amount) }}
[ArgLeader]
[ArgBackup]
{#- Prepare monitoring information to transmit to and request from the main theia, the clones and the PBS #}
{%- set monitor_cors_domains = slapparameter_dict.pop('monitor-cors-domains', 'monitor.app.officejs.com') %}
{%- set monitor_username = slapparameter_dict.get('monitor-username', '${monitor-instance-parameter:username}') %}
{%- set monitor_password = slapparameter_dict.get('monitor-password', '${monitor-htpasswd:passwd}') %}
{%- set monitor_return = ['monitor-base-url', 'monitor-setup-url'] %}
{%- set monitor_parameter = {'monitor-cors-domains': monitor_cors_domains, 'monitor-username' : monitor_username, 'monitor-password': monitor_password} %}
{%- set monitor_dict = {'parameter': monitor_parameter, 'return': monitor_return, 'set-monitor-url': True} %}
# Generate sections to request theia-export, pull-backup and theia-import
# See stack/resilient/template-replicated.cfg.in
# In particular:
#
# [request-theia]
# <= ArgLeader
# software-type = export
# ...
#
# [request-theia-pseudo-replicating-1]
# <= ArgBackup
# software-type = import
# ...
#
# [request-pbs-theia-1]
# software-type = pull-backup
# ...
#
{{ replicated.replicate("theia", number_of_instances + 1,
"export", "import",
"ArgLeader", "ArgBackup",
slapparameter_dict=slapparameter_dict) }}
# Extend the list of return parameters for the export request
# The monitor parameters are only there to assert they are
# actually published by the export instance
{# Generate the sections to request the main theia, the clones and the PBS. #}
{#- See ../../stack/resilient/template-replicated.cfg.in #}
{{ replicated.replicate("theia", clones_amount, "export", "import", slapparameter_dict=slapparameter_dict, monitor_parameter_dict=monitor_dict) }}
# Ask for the connection parameters of the main theia
[request-theia]
return += url username password backend-url monitor-base-url monitor-setup-url
return += url username password backend-url
# Extend the list of return parameters for the import request
# with the monitor parameters to assert they are actually published
[request-theia-pseudo-replicating-1]
return += monitor-base-url monitor-setup-url
# Publish some parameters from the export instance
[publish-connection-parameter]
# Publish connection parameters of the main theia and resiliency parameters
[publish]
recipe = slapos.cookbook:publish
url = ${request-theia:connection-url}
username = ${request-theia:connection-username}
password = ${request-theia:connection-password}
backend-url = ${request-theia:connection-backend-url}
# Publish resiliency parameters fetched by the resilient stack
[publish-connection-parameter]
monitor-base-url = ${request-theia:connection-monitor-base-url}
monitor-setup-url = ${request-theia:connection-monitor-setup-url}
<= publish-connection-information
......@@ -63,6 +63,25 @@ bash-completions = $${:home}/.local/share/bash-completion/completions/
fish-completions = $${:home}/.config/fish/completions/
# Monitor
# -------
[monitor-instance-parameter]
monitor-httpd-port = {{ parameter_dict['monitor-httpd-port'] }}
{%- for k in ('monitor-cors-domains', 'monitor-username', 'monitor-password') %}
{%- set v = parameter_dict.get(k) %}
{%- if v %}
{{ k[8:] }} = {{ v }}
{%- endif %}
{%- endfor %}
{%- for k in ('monitor-url-list', ) %}
{%- set v = parameter_dict.get(k) %}
{%- if v %}
{{ k }} = {{ v }}
{%- endif %}
{%- endfor %}
# Promises
# --------
......
......@@ -51,7 +51,8 @@ default-parameters =
"additional-frontend-name":"Theia Additional Frontend",
"additional-frontend-sr": "$${:frontend-sr}",
"additional-frontend-sr-type": "RootSoftwareInstance",
"additional-frontend-guid": null
"additional-frontend-guid": null,
"monitor-httpd-port": 8386
}
frontend-sr = http://git.erp5.org/gitweb/slapos.git/blob_plain/HEAD:/software/apache-frontend/software.cfg
......
......@@ -123,14 +123,10 @@ initialization =
standalone.start()
try:
partition_count = 20
if len(glob.glob(os.path.join(standalone.instance_directory, '*'))) < partition_count:
print("Standalone SlapOS: Formatting {partition_count} partitions".format(
partition_count=partition_count))
standalone.format(
partition_count,
args.ipv4,
args.ipv6,
)
standalone.format(partition_count, args.ipv4, args.ipv6)
print("Standalone SlapOS for computer `{}` started".format(args.computer_id))
# Run instance at least once, to start the supervisor managing instances.
try:
......
......@@ -121,6 +121,10 @@ class TestTheiaResilienceERP5(ERP5Mixin, test_resiliency.TestTheiaResilience):
backup_max_tries = 480
backup_wait_interval = 60
def test_twice(self):
# do nothing
pass
def _prepareExport(self):
super(TestTheiaResilienceERP5, self)._prepareExport()
......
......@@ -225,6 +225,8 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC
script_relpath = os.path.join(
'srv', 'runner', 'instance', 'slappart0',
'srv', '.backup_identity_script')
signature_relpath = os.path.join(
'srv', 'backup', 'theia', 'backup.signature')
def assertPromiseFailure(self, *msg):
# Force promises to recompute regardless of periodicity
......@@ -291,6 +293,10 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC
self.customSignatureScript(content=None)
self.customRestoreScript(content=None)
self.cleanupExitfiles()
try:
os.remove(self._getPartitionPath('import', self.signature_relpath))
except OSError:
pass
def test_export_promise(self):
self.writeFile(self.getExportExitfile(), '1')
......@@ -303,17 +309,14 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC
def test_custom_hash_script(self):
errmsg = 'Bye bye'
self.customSignatureScript(content='>&2 echo "%s"\nexit 1' % errmsg)
backup_script = self._getPartitionPath(
'export', 'srv', 'backup', 'theia', self.script_relpath)
self.assertExportFailure('Compute backup signature\n ... ERROR !',
'Custom signature script %s failed' % os.path.abspath(backup_script),
custom_script = self._getPartitionPath('export', self.script_relpath)
self.assertExportFailure('Compute partitions backup signatures\n ... ERROR !',
'Custom signature script %s failed' % os.path.abspath(custom_script),
'and stderr:\n%s' % errmsg)
def test_signature_mismatch(self):
signature_file = self._getPartitionPath('import', 'srv', 'backup', 'theia', 'backup.signature')
moved_file = self._getPartitionPath('import', 'srv', 'backup', 'backup.signature.moved')
self.writeFile(moved_file, 'Bogus Hash\n', mode='a')
os.rename(moved_file, signature_file)
signature_file = self._getPartitionPath('import', self.signature_relpath)
self.writeFile(signature_file, 'Bogus Hash\n', mode='a')
self.assertImportFailure('ERROR the backup signatures do not match')
def test_restore_script_error(self):
......@@ -363,12 +366,15 @@ class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientT
self.writeFile(os.path.join(dummy_root, 'exclude', 'excluded'),
'This file should be excluded from resilient backup')
# Check that ~/srv/exporter.exclude and ~/srv/runner-import-restore
# Check that ~/srv/exporter.exclude and ~/srv/runner-import-restore exist
# As well as ~/srv/.backup_identity_script
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'exporter.exclude')))
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'runner-import-restore')))
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', '.backup_identity_script')))
# Remember content of ~/etc in the import theia
self.etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
def _doSync(self):
self._doExport()
self._doTransfer()
......@@ -384,14 +390,20 @@ class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientT
self.assertIn(adapted_test_url, proxy_content)
self.assertNotIn(self._test_software_url, proxy_content)
# Check that ~/etc still contains everything it did before
etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
self.assertTrue(set(self.etc_listdir).issubset(etc_listdir))
# Check that ~/srv/project was exported
self.assertTrue(os.path.exists(adapted_test_url))
# Check that the dummy instance is not yet started
self.checkLog(os.path.join(dummy_root, 'log.log'), self.initial_log, newline=None)
# Check that ~/srv/.backup_identity_script was called
signature = self._getPartitionPath('import', 'srv', 'backup', 'backup.signature.proof')
# Check that ~/srv/.backup_identity_script was detected and called
signature = self._getPartitionPath(
'import', 'srv', 'backup', 'theia', 'slappart0.backup.signature.custom')
self.assertTrue(os.path.exists(signature))
with open(signature) as f:
self.assertIn('Custom script', f.read())
......@@ -477,6 +489,14 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase
_test_software_url = dummy_software_url
def test_twice(self):
# Run two synchronisations on the same instances
# to make sure everything still works the second time
# Check ~/etc in import theia again
self.etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
self._doSync()
self._checkSync()
def _prepareExport(self):
# Deploy test instance
self._deployEmbeddedSoftware(self._test_software_url, 'test_instance', self.test_instance_max_retries)
......@@ -485,6 +505,9 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase
self.export_id = self._getPartitionId('export')
self.import_id = self._getPartitionId('import')
# Remember content of ~/etc in the import theia
self.etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
def _doSync(self):
start = time.time()
......@@ -499,6 +522,11 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase
# Wait for takoever to be ready
self._waitTakeoverReady(takeover_url, start, self.backup_max_tries, self.backup_wait_interval)
def _checkSync(self):
# Check that ~/etc still contains everything it did before
etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
self.assertTrue(set(self.etc_listdir).issubset(etc_listdir))
def _doTakeover(self):
# Takeover
takeover_url, takeover_password = self._getTakeoverUrlAndPassword()
......
......@@ -4,6 +4,7 @@ import glob
import hashlib
import os
import re
import shutil
import subprocess as sp
import sqlite3
......@@ -21,13 +22,19 @@ EXCLUDE_FLAGS = ['--exclude={}'.format(x) for x in sorted(EXCLUDE_PATTERNS)]
def makedirs(path):
try:
os.makedirs(path if os.path.isdir(path) else os.path.dirname(path))
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def copytree(rsyncbin, src, dst, exclude=[], extrargs=[], verbosity='-v'):
def copyfile(src, dst):
dst = os.path.abspath(dst)
makedirs(os.path.dirname(dst))
shutil.copy2(src, dst)
def copytree(rsyncbin, src, dst, exclude=(), extrargs=(), verbosity='-v'):
# Ensure there is a trailing slash in the source directory
# to avoid creating an additional directory level at the destination
src = os.path.join(src, '')
......@@ -60,21 +67,20 @@ def copytree(rsyncbin, src, dst, exclude=[], extrargs=[], verbosity='-v'):
def copydb(sqlite3bin, src_db, dst_db):
makedirs(dst_db)
makedirs(os.path.dirname(dst_db))
sp.check_output((sqlite3bin, src_db, '.backup ' + dst_db))
def remove(path):
try:
os.remove(path)
except OSError:
if os.path.exists(path):
except OSError as e:
if e.errno != errno.ENOENT:
raise
def parse_installed(partition):
paths = []
custom_script = os.path.join(partition, 'srv', '.backup_identity_script')
for cfg in glob.glob(os.path.join(partition, '.installed*.cfg')):
try:
with open(cfg) as f:
......@@ -86,7 +92,7 @@ def parse_installed(partition):
for section in six.itervalues(installed_cfg):
for p in section.get('__buildout_installed__', '').splitlines():
p = p.strip()
if p and p != custom_script:
if p:
paths.append(p)
return paths
......@@ -101,31 +107,44 @@ def sha256sum(file_path, chunk_size=1024 * 1024):
return sha256.hexdigest()
def hashwalk(backup_dir, mirror_partitions):
scripts = {}
for p in mirror_partitions:
script_path = os.path.join(p, 'srv', '.backup_identity_script')
if os.path.exists(script_path):
scripts[os.path.abspath(p)] = script_path
for dirpath, dirnames, filenames in os.walk(backup_dir):
filenames.sort()
def fast_hashwalk(root_dir):
for dirpath, dirnames, filenames in os.walk(root_dir):
for f in filenames:
filepath = os.path.join(dirpath, f)
if os.path.isfile(filepath):
displaypath = os.path.relpath(filepath, start=backup_dir)
displaypath = os.path.relpath(filepath, start=root_dir)
yield '%s %s' % (sha256sum(filepath), displaypath)
remaining_dirnames = []
for subdir in dirnames:
subdirpath = os.path.abspath(os.path.join(dirpath, subdir))
custom_hashscript = scripts.get(subdirpath)
if custom_hashscript:
print('Using custom signature script %s' % custom_hashscript)
for s in hashcustom(subdirpath, backup_dir, custom_hashscript):
yield s
else:
remaining_dirnames.append(subdir)
remaining_dirnames.sort()
dirnames[:] = remaining_dirnames
def exclude_hashwalk(root_dir, instance_dir):
root_dir = os.path.abspath(root_dir)
instance_dir = os.path.abspath(instance_dir)
for dirpath, dirnames, filenames in os.walk(root_dir):
for f in filenames:
filepath = os.path.join(dirpath, f)
if os.path.isfile(filepath):
displaypath = os.path.relpath(filepath, start=root_dir)
yield '%s %s' % (sha256sum(filepath), displaypath)
if dirpath == instance_dir:
remaining_dirs = []
for d in dirnames:
if not d.startswith('slappart'):
remaining_dirs.append(d)
dirnames[:] = remaining_dirs
def hashwalk(root_dir, instance_dir=None):
if instance_dir and not os.path.relpath(
instance_dir, start=root_dir).startswith(os.pardir):
return exclude_hashwalk(root_dir, instance_dir)
return fast_hashwalk(root_dir)
def hashscript(partition):
script = os.path.join(partition, 'srv', '.backup_identity_script')
if os.path.exists(script):
return script
return None
@contextlib.contextmanager
......@@ -138,10 +157,11 @@ def cwd(path):
os.chdir(old_path)
def hashcustom(mirrordir, backup_dir, custom_hashscript):
workingdir = os.path.join(mirrordir, os.pardir, os.pardir, os.pardir)
def hashcustom(partition, script):
workingdir = os.path.join(partition, os.pardir, os.pardir, os.pardir)
with cwd(os.path.abspath(workingdir)):
for dirpath, _, filenames in os.walk(mirrordir):
for dirpath, dirnames, filenames in os.walk(partition):
dirnames.sort()
filepaths = []
for f in filenames:
path = os.path.join(dirpath, f)
......@@ -150,16 +170,16 @@ def hashcustom(mirrordir, backup_dir, custom_hashscript):
if not filepaths:
continue
hashprocess = sp.Popen(
custom_hashscript, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE)
script, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE)
out, err = hashprocess.communicate(str2bytes('\0'.join(filepaths)))
if hashprocess.returncode != 0:
template = "Custom signature script %s failed on inputs:\n%s"
msg = template % (custom_hashscript, '\n'.join(filepaths))
msg = template % (script, '\n'.join(filepaths))
msg += "\nwith stdout:\n%s" % bytes2str(out)
msg += "\nand stderr:\n%s" % bytes2str(err)
raise Exception(msg)
signatures = bytes2str(out).strip('\n').split('\n')
signatures.sort()
displaypath = os.path.relpath(dirpath, start=backup_dir)
displaypath = os.path.relpath(dirpath, start=partition)
for s in signatures:
yield '%s %s/ (custom)' % (s, displaypath)
yield '%s %s' % (s, displaypath)
......@@ -9,8 +9,8 @@ import traceback
import six
from six.moves import configparser
sys.path.append(os.path.dirname(__file__))
from theia_common import copytree, copydb, hashwalk, parse_installed, remove
sys.path.insert(0, os.path.dirname(__file__))
from theia_common import *
os.environ['LC_ALL'] = 'C'
......@@ -55,45 +55,74 @@ class TheiaExport(object):
self.copytree_partitions_args = {}
self.logs = []
def mirrorpath(self, src):
def mirror_path(self, src):
return os.path.abspath(os.path.join(
self.backup_dir, os.path.relpath(src, start=self.root_dir)))
def backuptree(self, src, exclude=[], extrargs=[], verbosity='-v'):
dst = self.mirrorpath(src)
return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity)
def backup_tree(self, src):
return copytree(self.rsync_bin, src, self.mirror_path(src))
def backupdb(self):
copydb(self.sqlite3_bin, self.proxy_db, self.mirrorpath(self.proxy_db))
def backup_file(self, src):
return copyfile(src, self.mirror_path(src))
def backuppartition(self, partition):
def backup_db(self):
copydb(self.sqlite3_bin, self.proxy_db, self.mirror_path(self.proxy_db))
def backup_partition(self, partition):
installed = parse_installed(partition)
rules = os.path.join(partition, 'srv', 'exporter.exclude')
extrargs = ('--filter=.-/ ' + rules,) if os.path.exists(rules) else ()
self.backuptree(partition, exclude=installed, extrargs=extrargs)
self.copytree_partitions_args[partition] = (installed, extrargs)
dst = self.mirror_path(partition)
copytree(self.rsync_bin, partition, dst, installed, extrargs)
self.copytree_partitions_args[partition] = (dst, installed, extrargs)
def sign(self, signaturefile):
def sign(self, signaturefile, signatures):
remove(signaturefile)
pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir))
tmpfile = os.path.join(pardir, 'backup.signature.tmp')
mirror_partitions = [self.mirrorpath(p) for p in self.partition_dirs]
tmpfile = os.path.join(pardir, os.path.basename(signaturefile) + '.tmp')
with open(tmpfile, 'w') as f:
for s in hashwalk(self.backup_dir, mirror_partitions):
for s in signatures:
f.write(s + '\n')
os.rename(tmpfile, signaturefile)
def checkpartition(self, partition, pattern='/srv/backup/'):
installed, extrargs = self.copytree_partitions_args[partition]
output = self.backuptree(
def sign_root(self):
signaturefile = os.path.join(self.backup_dir, 'backup.signature')
signatures = hashwalk(self.backup_dir, self.mirror_path(self.instance_dir))
self.sign(signaturefile, signatures)
def sign_partition(self, partition):
dst = self.mirror_path(partition)
filename = os.path.basename(partition) + '.backup.signature'
signaturefile = os.path.join(self.backup_dir, filename)
script = hashscript(partition)
if script:
signaturefile += '.custom'
self.sign(signaturefile, hashcustom(dst, script))
else:
self.sign(signaturefile, hashwalk(dst))
def remove_signatures(self):
pattern = os.path.join(self.backup_dir, '*backup.signature*')
signature_files = glob.glob(pattern)
for f in signature_files:
try:
os.remove(f)
except OSError:
pass
def check_partition(self, partition, pattern='/srv/backup/'):
dst, installed, extrargs = self.copytree_partitions_args[partition]
output = copytree(
self.rsync_bin,
partition,
dst,
exclude=installed,
extrargs=extrargs + ('--dry-run', '--update'),
verbosity='--out-format=%n',
)
return [path for path in output.splitlines() if pattern in path]
def loginfo(self, msg):
def log(self, msg):
print(msg)
self.logs.append(msg)
......@@ -118,40 +147,46 @@ class TheiaExport(object):
def export(self):
export_start_date = int(time.time())
etc_dir = os.path.join(self.root_dir, 'etc')
with open(os.path.join(etc_dir, '.resilient_timestamp'), 'w') as f:
timestamp = os.path.join(self.root_dir, 'etc', '.resilient_timestamp')
with open(timestamp, 'w') as f:
f.write(str(export_start_date))
self.loginfo('Backup directory ' + etc_dir)
self.backuptree(etc_dir, extrargs=('--filter=- */', '--filter=-! .*'))
self.remove_signatures()
self.log('Backup resilient timestamp ' + timestamp)
self.backup_file(timestamp)
for d in self.dirs:
self.loginfo('Backup directory ' + d)
self.backuptree(d)
self.log('Backup directory ' + d)
self.backup_tree(d)
self.loginfo('Backup slapproxy database')
self.backupdb()
self.log('Backup slapproxy database')
self.backup_db()
self.loginfo('Backup partitions')
self.log('Backup partitions')
for p in self.partition_dirs:
self.backuppartition(p)
self.backup_partition(p)
self.loginfo('Compute backup signature')
self.sign(os.path.join(self.backup_dir, 'backup.signature'))
self.log('Compute root backup signature')
self.sign_root()
self.log('Compute partitions backup signatures')
for p in self.partition_dirs:
self.sign_partition(p)
time.sleep(10)
self.loginfo('Check partitions')
self.log('Check partitions')
modified = list(itertools.chain.from_iterable(
self.checkpartition(p) for p in self.partition_dirs))
self.check_partition(p) for p in self.partition_dirs))
if modified:
msg = 'Some files have been modified since the backup started'
self.loginfo(msg + ':')
self.loginfo('\n'.join(modified))
self.loginfo("Let's wait %d minutes and try again" % BACKUP_WAIT)
self.log(msg + ':')
self.log('\n'.join(modified))
self.log("Let's wait %d minutes and try again" % BACKUP_WAIT)
time.sleep(BACKUP_WAIT * 60)
raise Exception(msg)
self.loginfo('Done')
self.log('Done')
if __name__ == '__main__':
......
......@@ -10,7 +10,7 @@ import six
from six.moves import configparser
sys.path.append(os.path.dirname(__file__))
from theia_common import copytree, copydb, hashwalk, parse_installed, remove
from theia_common import *
os.environ['LC_ALL'] = 'C'
......@@ -57,28 +57,32 @@ class TheiaImport(object):
configp.read(cfg)
self.proxy_db = configp.get('slapproxy', 'database_uri')
self.instance_dir = configp.get('slapos', 'instance_root')
mirror_dir = self.mirrorpath(self.instance_dir)
mirror_dir = self.mirror_path(self.instance_dir)
partitions = glob.glob(os.path.join(mirror_dir, 'slappart*'))
self.mirror_partition_dirs = [p for p in partitions if os.path.isdir(p)]
self.logs = []
def mirrorpath(self, dst):
def mirror_path(self, dst):
return os.path.abspath(os.path.join(
self.backup_dir, os.path.relpath(dst, start=self.root_dir)))
def dstpath(self, src):
def dst_path(self, src):
return os.path.abspath(os.path.join(
self.root_dir, os.path.relpath(src, start=self.backup_dir)))
def restoretree(self, dst, exclude=[], extrargs=[], verbosity='-v'):
src = self.mirrorpath(dst)
def restore_tree(self, dst, exclude=(), extrargs=(), verbosity='-v'):
src = self.mirror_path(dst)
return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity)
def restoredb(self):
copydb(self.sqlite3_bin, self.mirrorpath(self.proxy_db), self.proxy_db)
def restore_file(self, dst):
src = self.mirror_path(dst)
return copyfile(src, dst)
def restorepartition(self, mirror_partition):
p = self.dstpath(mirror_partition)
def restore_db(self):
copydb(self.sqlite3_bin, self.mirror_path(self.proxy_db), self.proxy_db)
def restore_partition(self, mirror_partition):
p = self.dst_path(mirror_partition)
installed = parse_installed(p) if os.path.exists(p) else []
copytree(self.rsync_bin, mirror_partition, p, exclude=installed)
......@@ -86,38 +90,67 @@ class TheiaImport(object):
supervisor_command = (self.supervisorctl_bin, '-c', self.supervisord_conf)
command = supervisor_command + args
print(' '.join(command))
sp.check_call(command)
print(sp.check_output(command, stderr=sp.STDOUT, universal_newlines=True))
def slapos(self, *args):
command = (self.slapos_bin,) + args + ('--cfg', self.slapos_cfg)
print(' '.join(command))
sp.check_call(command)
print(sp.check_output(command, stderr=sp.STDOUT, universal_newlines=True))
def verify(self, signaturefile):
pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir))
moved = os.path.join(pardir, 'backup.signature.moved')
proof = os.path.join(pardir, 'backup.signature.proof')
if os.path.exists(signaturefile):
os.rename(signaturefile, moved)
if not os.path.exists(moved):
msg = 'ERROR the backup signature file is missing'
print(msg)
def sign(self, signaturefile, root_dir):
with open(signaturefile, 'r') as f:
for line in f:
try:
_, relpath = line.strip().split(None, 1)
except ValueError:
yield 'Could not parse: %s' % line
continue
filepath = os.path.join(root_dir, relpath)
try:
signature = sha256sum(filepath)
except IOError:
yield 'Could not read: %s' % filepath
continue
yield '%s %s' % (signature, relpath)
def sign_custom(self, root_dir):
partition = self.dst_path(root_dir)
script = hashscript(partition)
if not script:
msg = 'ERROR: missing custom signature script for partition ' + partition
raise Exception(msg)
return hashcustom(root_dir, script)
def find_signature_file(self, partition):
filename = os.path.basename(partition) + '.backup.signature'
signaturefile = os.path.join(self.backup_dir, filename)
if os.path.exists(signaturefile):
return signaturefile, False
signaturefile += '.custom'
if os.path.exists(signaturefile):
return signaturefile, True
raise Exception('ERROR: missing signature file for partition ' + partition)
def verify(self, signaturefile, root_dir, custom=False):
proof = signaturefile + '.proof'
if custom:
signatures = self.sign_custom(root_dir)
else:
signatures = self.sign(signaturefile, root_dir)
with open(proof, 'w') as f:
for s in hashwalk(self.backup_dir, self.mirror_partition_dirs):
for s in signatures:
f.write(s + '\n')
diffcommand = ('diff', moved, proof)
print(' '.join(diffcommand))
diffcommand = ('diff', signaturefile, proof)
try:
sp.check_output(
diffcommand, stderr=sp.STDOUT, universal_newlines=True)
except sp.CalledProcessError as e:
template = 'ERROR the backup signatures do not match\n\n%s'
msg = template % e.output
template = 'ERROR the backup signatures do not match\n\n%s\n%s'
msg = template % (' '.join(diffcommand), e.output)
print(msg)
raise Exception(msg)
def loginfo(self, msg):
def log(self, msg):
print(msg)
self.logs.append(msg)
......@@ -126,9 +159,11 @@ class TheiaImport(object):
exitcode = 0
try:
self.restore()
except Exception:
except Exception as e:
exitcode = 1
exc = traceback.format_exc()
if isinstance(e, sp.CalledProcessError) and e.output:
exc = "%s\n\n%s" % (exc, e.output)
with open(self.error_file, 'w') as f:
f.write('\n ... OK\n\n'.join(self.logs))
f.write('\n ... ERROR !\n\n')
......@@ -140,44 +175,54 @@ class TheiaImport(object):
sys.exit(exitcode)
def restore(self):
self.loginfo('Verify backup signature')
self.verify(os.path.join(self.backup_dir, 'backup.signature'))
self.log('Verify main backup signature')
signaturefile = os.path.join(self.backup_dir, 'backup.signature')
self.verify(signaturefile, self.backup_dir)
self.loginfo('Stop slapproxy')
custom_partition_signatures = []
for m in self.mirror_partition_dirs:
signaturefile, custom = self.find_signature_file(m)
if custom:
custom_partition_signatures.append((signaturefile, m))
else:
self.log('Verify backup signature for ' + m)
self.verify(signaturefile, m)
self.log('Stop slapproxy')
self.supervisorctl('stop', 'slapos-proxy')
self.loginfo('Restore partitions')
self.log('Restore partitions')
for m in self.mirror_partition_dirs:
self.restorepartition(m)
self.restore_partition(m)
for d in self.dirs:
self.loginfo('Restore directory ' + d)
self.restoretree(d)
self.log('Restore directory ' + d)
self.restore_tree(d)
self.loginfo('Restore slapproxy database')
self.restoredb()
self.log('Restore slapproxy database')
self.restore_db()
etc_dir = os.path.join(self.root_dir, 'etc')
self.loginfo('Restore directory ' + etc_dir)
self.restoretree(etc_dir, extrargs=('--filter=- */', '--filter=-! .*'))
timestamp = os.path.join(self.root_dir, 'etc', '.resilient_timestamp')
self.log('Restore resilient timestamp ' + timestamp)
self.restore_file(timestamp)
custom_script = os.path.join(self.root_dir, 'srv', 'runner-import-restore')
if os.path.exists(custom_script):
self.loginfo('Run custom restore script %s' % custom_script)
sp.check_call(custom_script)
self.log('Run custom restore script %s' % custom_script)
print(sp.check_output(custom_script))
self.loginfo('Start slapproxy again')
self.log('Start slapproxy again')
self.supervisorctl('start', 'slapos-proxy')
self.loginfo('Reformat partitions')
self.log('Reformat partitions')
self.slapos('node', 'format', '--now')
self.loginfo('Remove old supervisord configuration files')
self.log('Remove old supervisord configuration files')
conf_dir = os.path.join(self.instance_dir, 'etc', 'supervisor.conf.d')
for f in glob.glob(os.path.join(conf_dir, '*')):
os.remove(f)
self.loginfo('Build Software Releases')
self.log('Build Software Releases')
for i in range(3):
try:
self.slapos('node', 'software', '--all', '--logfile', self.sr_log)
......@@ -187,18 +232,18 @@ class TheiaImport(object):
else:
break
self.loginfo('Remove old custom instance scripts')
self.log('Remove old custom instance scripts')
partitions_glob = os.path.join(self.instance_dir, 'slappart*')
scripts = os.path.join(partitions_glob, 'srv', 'runner-import-restore')
for f in glob.glob(scripts):
remove(f)
self.loginfo('Remove partition timestamps')
self.log('Remove partition timestamps')
timestamps = os.path.join(partitions_glob, '.timestamp')
for f in glob.glob(timestamps):
remove(f)
self.loginfo('Build Instances')
self.log('Build Instances')
cp_log = self.cp_log
for i in range(3):
try:
......@@ -209,11 +254,15 @@ class TheiaImport(object):
else:
break
self.log('Verify custom backup signatures')
for signaturefile, m in custom_partition_signatures:
self.verify(signaturefile, m, True)
for custom_script in glob.glob(scripts):
self.loginfo('Running custom instance script %s' % custom_script)
sp.check_call(custom_script)
self.log('Running custom instance script %s' % custom_script)
print(sp.check_output(custom_script))
self.loginfo('Done')
self.log('Done')
if __name__ == '__main__':
......
......@@ -26,7 +26,7 @@ md5sum = 8f15263c4a27ec315eb3a12dbf7a7b34
[template-pull-backup]
filename = instance-pull-backup.cfg.in
md5sum = 4425db50d551fb8a974e547308990bac
md5sum = e7674770b85c983244255dd82642ebe8
[template-replicated]
filename = template-replicated.cfg.in
......
......@@ -250,6 +250,7 @@ monitor-base-url = $${monitor-publish-parameters:monitor-base-url}
monitor-url = $${monitor-publish-parameters:monitor-url}
monitor-user = $${monitor-publish-parameters:monitor-user}
monitor-password = $${monitor-publish-parameters:monitor-password}
monitor-setup-url = $${monitor-publish:monitor-setup-url}
#----------------
#--
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment