Commit a271e8a8 authored by Xavier Thompson's avatar Xavier Thompson

Theia Resiliency

See merge request nexedi/slapos!931
parents eac9043c 60f9d461
......@@ -15,11 +15,35 @@
[instance-theia]
_update_hash_filename_ = instance-theia.cfg.jinja.in
md5sum = 11d347dd2bf762902341746a388673a0
md5sum = e0fb65c34b2d3d524cbca85579728e54
[instance]
_update_hash_filename_ = instance.cfg.in
md5sum = 063d3e19da9d3d4bfb77e8e638aa3a77
md5sum = a7d78b4002266c69ece05a476df82791
[instance-import]
_update_hash_filename_ = instance-import.cfg.jinja.in
md5sum = 861ef130f27175c2978a9b946b138dd5
[instance-export]
_update_hash_filename_ = instance-export.cfg.jinja.in
md5sum = b3cedaa1603ca8ed83fdd94ef4b35cc8
[instance-resilient]
_update_hash_filename_ = instance-resilient.cfg.jinja
md5sum = d78a9f885bdebf6720197209e0c21aa0
[theia-common]
_update_hash_filename_ = theia_common.py
md5sum = e57396473b4b6a17d26a747f0030293c
[theia-export]
_update_hash_filename_ = theia_export.py
md5sum = b5f5ac1924b27d3f2be2e5ea291c119e
[theia-import]
_update_hash_filename_ = theia_import.py
md5sum = 9e8c17a4b2d802695caf0c2c052f0d11
[yarn.lock]
_update_hash_filename_ = yarn.lock
......
{%- set parameter_dict = dict(default_parameter_dict, **parameter_dict) -%}
[buildout]
extends = {{ theia_instance_cfg }}
{{ pbsready_export_cfg }}
parts +=
monitor-base
$${:theia-parts}
$${:theia-environment-parts}
resilient-publish-connection-parameter
# The resilient stack makes the 'resilient' instance
# request the 'export' instance with a 'namebase' parameter.
# The export template then expects to receive it in
# slap-parameter:namebase
[slap-parameter]
namebase = {{ parameter_dict['namebase'] }}
# The resilient export stack periodically calls exporter:wrapper
# and then notifies the pull-backup instance that data is ready
# to be pulled from the export instance.
# All it expects is that a script be available in exporter:wrapper.
[exporter]
wrapper = $${theia-export-script:rendered}
[theia-export-script]
recipe = slapos.recipe.template:jinja2
rendered = $${directory:bin}/theia-export-script
mode = 0700
exitcode-file = $${directory:srv}/export-exitcode-file
error-file = $${directory:srv}/export-errormessage-file
context =
raw python ${software-info:python-with-eggs}
raw theia_export ${software-info:theia-export}
raw bash ${software-info:bash}
raw rsync ${software-info:rsync}
raw sqlite3 ${software-info:sqlite3}
raw root_path $${buildout:directory}
raw backup_path $${directory:backup}
raw slapos_cfg $${directory:runner}/etc/slapos.cfg
raw project_path $${directory:project}
raw public_path $${directory:frontend-static-public}
key exitfile :exitcode-file
key errorfile :error-file
{%- raw %}
template =
inline:#!{{ bash }}
{{ python }} {{ theia_export }} \
--rsync {{ rsync }} \
--sqlite3 {{ sqlite3 }} \
--root {{ root_path }} \
--backup {{ backup_path }} \
--cfg {{ slapos_cfg }} \
--dirs {{ project_path }} \
--dirs {{ public_path }} \
--exitfile {{ exitfile }} \
--errorfile {{ errorfile }}
{%- endraw %}
# Add a promise to check that the export script has run
# successfully and recently (at most 2 days ago).
[promises]
export-promises =
$${export-promise:name}
[export-promise]
<= monitor-promise-base
module = check_command_execute
name = resiliency-export-promise.py
config-command = $${export-promise-script:rendered}
[initial-export-exitcode-file]
recipe = slapos.recipe.template:jinja2
rendered = $${theia-export-script:exitcode-file}
template = inline:0
once = $${:rendered}
[export-promise-script]
recipe = slapos.recipe.template:jinja2
rendered = $${directory:bin}/export-promise-script
exitcode-file = $${initial-export-exitcode-file:rendered}
context =
key exitcodefile :exitcode-file
key errorfile theia-export-script:error-file
{%- raw %}
template =
inline:#!/bin/sh
if [ -z $(find {{ repr(exitcodefile) }} -mtime -2) ]
then
echo "ERROR export script last ran on " $(date -r {{ repr(exitcodefile) }})
exit 1
elif [ $( cat {{ repr(exitcodefile) }}) -ne 0 ]
then
echo "ERROR export script failed on " $(date -r {{ repr(exitcodefile) }})
cat {{ repr(errorfile) }}
exit 1
else
echo "OK export script last ran on " $(date -r {{ repr(exitcodefile) }})
exit 0
fi
{%- endraw %}
# Extend resilient parameters with normal theia connection parameters
[resilient-publish-connection-parameter]
<= publish-connection-parameter
{%- set parameter_dict = dict(default_parameter_dict, **parameter_dict) -%}
{%- set additional_frontend = parameter_dict['additional-frontend-guid'] -%}
[buildout]
extends = {{ theia_instance_cfg }}
{{ pbsready_import_cfg }}
parts +=
monitor-base
$${:theia-parts}
$${:theia-environment-parts}
# The resilient stack makes the 'resilient' instance
# request the 'import' instance with a 'namebase' parameter.
# The import template then expects to receive it in
# slap-parameter:namebase
[slap-parameter]
namebase = {{ parameter_dict['namebase'] }}
# Change frontend name to avoid conflicts
[remote-frontend]
name = Import {{ parameter_dict['frontend-name'] }}
{% if additional_frontend -%}
[remote-additional-frontend]
name = Import {{ parameter_dict['additional-frontend-name'] }}
{%- endif %}
# Change port ranges to avoid race conditions on port allocation
[frontend-instance-port]
minimum = 3200
maximum = 3300
[theia-service-port]
minimum = 3700
maximum = 3800
[slapos-standalone-port]
minimum = 4200
maximum = 4300
# Always disable autoprocessing in the import instance
[slapos-autorun]
autorun = stopped
# Change the gravatar favicon seed
[favicon.ico]
seed = Import {{ root_title }}
# The resilient stack calls post-notification-run:output followed by
# importer:wrapper when the instance is notified that the backup files
# have just been pushed to it. All it expects is the path of a script
# in post-notification-run:output and in importer:wrapper.
[post-notification-run]
recipe = slapos.recipe.template:jinja2
rendered = $${directory:bin}/post-notification-run-script
output = $${:rendered}
mode = 0700
template =
inline:#!${software-info:bash}
# Do nothing because the backup signature will
# be verified by the import script itself
exit 0
[importer]
wrapper = $${theia-import-script:rendered}
[theia-import-script]
recipe = slapos.recipe.template:jinja2
rendered = $${directory:bin}/theia-import-script
mode = 0700
exitcode-file = $${directory:srv}/import-exitcode-file
error-file = $${directory:srv}/import-errormessage-file
context =
raw python ${software-info:python-with-eggs}
raw theia_import ${software-info:theia-import}
raw bash ${software-info:bash}
raw rsync ${software-info:rsync}
raw sqlite3 ${software-info:sqlite3}
raw slapos ${software-info:slapos}
raw slapos_node_software_log $${directory:runner}/var/log/slapos-node-software.log
raw slapos_node_instance_log $${directory:runner}/var/log/slapos-node-instance.log
raw supervisorctl ${software-info:supervisorctl}
raw supervisord_conf $${directory:runner}/etc/supervisord.conf
raw root_path $${buildout:directory}
raw backup_path $${directory:backup}
raw slapos_cfg $${directory:runner}/etc/slapos.cfg
raw project_path $${directory:project}
raw public_path $${directory:frontend-static-public}
key exitfile :exitcode-file
key errorfile :error-file
{%- raw %}
template =
inline:#!{{ bash }}
. $${common-environment:rendered}
. $${slapos-standalone-activate:rendered}
{{ python }} {{ theia_import }} \
--rsync {{ rsync }} \
--sqlite3 {{ sqlite3 }} \
--slapos {{ slapos }} \
--srlog {{ slapos_node_software_log }} \
--cplog {{ slapos_node_instance_log }} \
--supervisorctl {{ supervisorctl }} \
--supervisordconf {{ supervisord_conf }} \
--root {{ root_path }} \
--backup {{ backup_path }} \
--cfg {{ slapos_cfg }} \
--dirs {{ project_path }} \
--dirs {{ public_path }} \
--exitfile {{ exitfile }} \
--errorfile {{ errorfile }}
{%- endraw %}
# Add a promise to check that the import script has run
# successfully and recently (at most 2 days ago).
[promises]
import-promises =
$${import-promise:name}
[import-promise]
<= monitor-promise-base
module = check_command_execute
name = resiliency-import-promise.py
config-command = $${import-promise-script:rendered}
[initial-import-exitcode-file]
recipe = slapos.recipe.template:jinja2
rendered = $${theia-import-script:exitcode-file}
template = inline:0
once = $${:rendered}
[import-promise-script]
recipe = slapos.recipe.template:jinja2
rendered = $${directory:bin}/import-promise-script
exitcode-file = $${initial-import-exitcode-file:rendered}
context =
key exitcodefile :exitcode-file
key errorfile theia-import-script:error-file
{%- raw %}
template =
inline:#!/bin/sh
if [ -z $(find {{ repr(exitcodefile) }} -mtime -2) ]
then
echo "ERROR import script last ran on " $(date -r {{ repr(exitcodefile) }})
exit 1
elif [ $( cat {{ repr(exitcodefile) }}) -ne 0 ]
then
echo "ERROR import script failed on " $(date -r {{ repr(exitcodefile) }})
cat {{ repr(errorfile) }}
exit 1
else
echo "OK import script last ran on " $(date -r {{ repr(exitcodefile) }})
exit 0
fi
{%- endraw %}
# Resilient connection parameters of import instance are published
# through the resilient stack.
# Extend resilient parameters with normal theia connection parameters
[resilient-publish-connection-parameter]
<= publish-connection-parameter
{
"$schema": "http://json-schema.org/draft-04/schema",
"type": "object",
"description": "Parameters to instantiate resilient Theia",
"allOf": [
{
"$ref": "instance-input-schema.json#/"
},
{
"properties": {
"resilient-clone-number": {
"title": "Amount of backup(s) to create",
"description": "Amount of backup(s) to create. Each backup consists of a Pull Backup Server and a clone.",
"type": "integer",
"default": 1,
"minimum": 0,
"maximum": 2,
"optional": true
},
"-sla-theia0-computer_guid": {
"title": "Target computer for main instance",
"description": "Target computer GUID for main instance.",
"type": "string",
"optional": true
},
"-sla-theia1-computer_guid": {
"title": "Target computer for first clone",
"description": "Target computer for first clone instance.",
"type": "string",
"optional": true
},
"-sla-pbs1-computer_guid": {
"title": "Target computer for first PBS",
"description": "Target computer for first PBS instance.",
"type": "string",
"optional": true
},
"-sla-theia2-computer_guid": {
"title": "Target computer for second clone",
"description": "Target computer for second clone instance.",
"type": "string",
"optional": true
},
"-sla-pbs2-computer_guid": {
"title": "Target computer for second PBS",
"description": "Target computer for second PBS instance.",
"type": "string",
"optional": true
},
"resiliency-backup-periodicity": {
"title": "Periodicity of backup",
"description": "Periodicity of backup, in cron format.",
"type": "string",
"optional": true
},
"remove-backup-older-than": {
"title": "Remove backups older than...",
"description": "Remove all the backups in PBS that are older than specified value. It should be rdiff-backup-compatible.",
"type": "string",
"default": "2W",
"optional": true
},
"ignore-known-hosts-file": {
"title": "Ignore known_hosts file",
"description": "Set either to fill known_hosts file for ssh or not. Useful if main instance and PBS are using the same IP (slapos proxy, theia).",
"type": "boolean",
"default": false,
"optional": true
}
}
}
]
}
{% import 'parts' as parts %}
{% import 'replicated' as replicated with context %}
{% set number_of_instances = slapparameter_dict.get('resilient-clone-number', 1)|int %}
[buildout]
eggs-directory = {{ eggs_directory }}
develop-eggs-directory = {{ develop_eggs_directory }}
extends =
{{ monitor_template }}
parts +=
# Generate the parts to request theia-export, pull-backup and theia-import
# See stack/resilient/template-parts.cfg.in and stack/resilient/template-replicated.cfg.in
# See below for the generation of the sections corresponding to the parts generated here
{{ parts.replicate("theia", number_of_instances + 1) }}
# Also publish some connection parameters
publish-connection-parameter
[ArgLeader]
[ArgBackup]
# Generate sections to request theia-export, pull-backup and theia-import
# See stack/resilient/template-replicated.cfg.in
# In particular:
#
# [request-theia]
# <= ArgLeader
# software-type = export
# ...
#
# [request-theia-pseudo-replicating-1]
# <= ArgBackup
# software-type = import
# ...
#
# [request-pbs-theia-1]
# software-type = pull-backup
# ...
#
{{ replicated.replicate("theia", number_of_instances + 1,
"export", "import",
"ArgLeader", "ArgBackup",
slapparameter_dict=slapparameter_dict) }}
# Extend the list of return parameters for the export request
# The monitor parameters are only there to assert they are
# actually published by the export instance
[request-theia]
return += url username password backend-url monitor-base-url monitor-setup-url
# Extend the list of return parameters for the import request
# with the monitor parameters to assert they are actually published
[request-theia-pseudo-replicating-1]
return += monitor-base-url monitor-setup-url
# Publish some parameters from the export instance
[publish-connection-parameter]
recipe = slapos.cookbook:publish
url = ${request-theia:connection-url}
username = ${request-theia:connection-username}
password = ${request-theia:connection-password}
backend-url = ${request-theia:connection-backend-url}
# Publish resiliency parameters fetched by the resilient stack
[publish-connection-parameter]
<= publish-connection-information
......@@ -41,23 +41,26 @@ backend-url = $${frontend-instance:url}
[directory]
recipe = slapos.cookbook:mkdirectory
etc = $${buildout:directory}/etc
var = $${buildout:directory}/var
srv = $${buildout:directory}/srv
bin = $${buildout:directory}/bin
tmp = $${buildout:directory}/tmp
dot-theia = $${buildout:directory}/.theia/
home = $${buildout:directory}
etc = $${:home}/etc
var = $${:home}/var
srv = $${:home}/srv
bin = $${:home}/bin
tmp = $${:home}/tmp
dot-theia = $${:home}/.theia/
pidfiles = $${:var}/run
services = $${:etc}/service
runner = $${:srv}/runner
backup = $${:srv}/backup/theia
project = $${:srv}/project
frontend-static = $${:srv}/frontend-static
frontend-static-public = $${:frontend-static}/public
frontend-static-css = $${:frontend-static}/css
bash-completions = $${buildout:directory}/.local/share/bash-completion/completions/
fish-completions = $${buildout:directory}/.config/fish/completions/
bash-completions = $${:home}/.local/share/bash-completion/completions/
fish-completions = $${:home}/.config/fish/completions/
# Promises
......@@ -68,11 +71,13 @@ recipe =
instance-promises =
$${theia-listen-promise:name}
$${frontend-listen-promise:name}
$${frontend-authentification-promise:name}
$${remote-frontend-url-available-promise:name}
{% if additional_frontend %}
$${remote-additional-frontend-url-available-promise:name}
{% endif %}
$${slapos-standalone-listen-promise:name}
$${slapos-standalone-ready-promise:name}
$${slapos-autorun-promise:name}
[theia-listen-promise]
......@@ -89,6 +94,16 @@ name = $${:_buildout_section_name_}.py
config-host = $${frontend-instance:ip}
config-port = $${frontend-instance:port}
[frontend-authentification-promise]
<= monitor-promise-base
module = check_url_available
name = $${:_buildout_section_name_}.py
username = $${frontend-instance-password:username}
password = $${frontend-instance-password:passwd}
ip = $${frontend-instance:ip}
port = $${frontend-instance:port}
config-url = https://$${:username}:$${:password}@[$${:ip}]:$${:port}
[remote-frontend-url-available-promise]
<= monitor-promise-base
module = check_url_available
......@@ -113,13 +128,19 @@ name = standalone-listen-promise.py
config-host = $${slapos-standalone-instance:hostname}
config-port = $${slapos-standalone-instance:port}
[slapos-standalone-ready-promise]
<= monitor-promise-base
module = check_socket_listening
name = standalone-ready-promise.py
config-abstract = $${directory:runner}/standalone_ready
[slapos-autorun-promise]
<= monitor-promise-base
module = check_service_state
# XXX promise plugins can not contain "slapos" in their names
name = autorun-state-promise.py
config-service = $${slapos-autorun:service-name}
config-expect = $${slapos-autorun:autorun}
config-run-directory = $${directory:runner}/var/run
# Remote Caddy Frontend
......@@ -279,13 +300,14 @@ wait-for-files = $${frontend-instance:pidfile}
[favicon.ico]
# generate a pseudo random favicon, different for each instance name.
recipe = slapos.recipe.build
seed = {{ root_title }}
install =
import hashlib, shutil
buildout_offline = self.buildout['buildout']['offline']
self.buildout['buildout']['offline'] = 'false'
try:
gravatar_url = "https://www.gravatar.com/avatar/" + hashlib.md5(
b'''{{ root_title }}'''
b'''$${:seed}'''
).hexdigest() + "?s=256&d=retro"
shutil.copy(self.download(gravatar_url), '''$${:location}''')
except Exception:
......@@ -310,7 +332,7 @@ mode = 0700
template =
inline:
#!/bin/sh
export HOME=$${buildout:directory}
export HOME=$${directory:home}
export PATH=${python-language-server:location}/bin:${java-jdk:location}/bin:${cli-utilities:PATH}:$HOME/.cargo/bin:$PATH
......@@ -419,6 +441,7 @@ ip = {{ ipv4_random }}
ipv4 = {{ ipv4_random }}
ipv6 = {{ ipv6_random }}
port = $${slapos-standalone-port:port}
local-software-release-root = $${directory:home}
slapos-configuration = $${directory:runner}/etc/slapos.cfg
computer-id = slaprunner
......@@ -447,6 +470,7 @@ template =
$${slapos-standalone-config:ipv4} \
$${slapos-standalone-config:ipv6} \
$${slapos-standalone-config:port} \
$${slapos-standalone-config:local-software-release-root} \
$${slapos-standalone-config:computer-id} \
{%- if parameter_dict.get('embedded-sr') %}
--sr='{{ parameter_dict['embedded-sr'] }}' \
......
......@@ -2,6 +2,9 @@
parts =
switch-softwaretype
extends =
${template-resilient-templates:output}
eggs-directory = ${buildout:eggs-directory}
develop-eggs-directory = ${buildout:develop-eggs-directory}
......@@ -18,6 +21,11 @@ recipe = slapos.cookbook:switch-softwaretype
RootSoftwareInstance = $${:default}
default = $${:theia}
theia = theia:rendered
export = export:rendered
import = import:rendered
resilient = resilient:rendered
frozen = instance-frozen:rendered
pull-backup = template-pull-backup:rendered
[theia]
recipe = slapos.recipe.template:jinja2
......@@ -46,3 +54,43 @@ default-parameters =
"additional-frontend-guid": null
}
frontend-sr = http://git.erp5.org/gitweb/slapos.git/blob_plain/HEAD:/software/apache-frontend/software.cfg
[import]
recipe = slapos.recipe.template:jinja2
template = ${instance-import:output}
rendered = $${buildout:directory}/instance-import.cfg
mode = 0644
context =
jsonkey default_parameter_dict theia:default-parameters
key parameter_dict slap-configuration:configuration
key theia_instance_cfg theia:rendered
key pbsready_import_cfg template-pbsready-import:rendered
key root_title slap-configuration:root-instance-title
[export]
recipe = slapos.recipe.template:jinja2
template = ${instance-export:output}
rendered = $${buildout:directory}/instance-export.cfg
mode = 0644
context =
jsonkey default_parameter_dict theia:default-parameters
key parameter_dict slap-configuration:configuration
key theia_instance_cfg theia:rendered
key pbsready_export_cfg template-pbsready-export:rendered
[resilient]
recipe = slapos.recipe.template:jinja2
template = ${instance-resilient:output}
rendered = $${buildout:directory}/instance-resilient.cfg
mode = 0644
extensions = jinja2.ext.do
context =
key buildout buildout:bin-directory
key develop_eggs_directory buildout:develop-eggs-directory
key eggs_directory buildout:eggs-directory
key slapparameter_dict slap-configuration:configuration
raw monitor_template ${monitor-template:rendered}
template-parts-destination = ${template-parts:target}
template-replicated-destination = ${template-replicated:target}
import-list = file parts :template-parts-destination
file replicated :template-replicated-destination
......@@ -15,6 +15,7 @@ extends =
../../stack/nodejs.cfg
../../stack/slapos.cfg
../../stack/monitor/buildout.cfg
../../stack/resilient/buildout.cfg
../../component/defaults.cfg
./download-plugins.cfg
./buildout.hash.cfg
......@@ -22,8 +23,14 @@ extends =
parts =
theia-wrapper
slapos-cookbook
python-with-eggs
instance-theia
instance
instance-import
instance-export
instance-resilient
theia-common
theia-export
# default for slapos-standalone
shared-part-list =
......@@ -54,9 +61,10 @@ initialization =
import argparse
import glob
import json
import os.path
import sys
import os
import signal
import socket
import sys
import time
import slapos.slap.standalone
......@@ -66,6 +74,7 @@ initialization =
parser.add_argument('ipv4')
parser.add_argument('ipv6')
parser.add_argument('server_port', type=int)
parser.add_argument('local_software_release_root')
parser.add_argument('computer_id')
parser.add_argument('--sr')
parser.add_argument('--srtype')
......@@ -101,58 +110,64 @@ initialization =
instance_root="%s/instance" % args.base_directory,
partition_forward_configuration=partition_forward_configuration,
slapos_bin="${buildout:bin-directory}/slapos",
local_software_release_root=args.local_software_release_root,
)
standalone.start()
partition_count = 20
if len(glob.glob(os.path.join(standalone.instance_directory, '*'))) < partition_count:
print("Standalone SlapOS: Formatting {partition_count} partitions".format(
partition_count=partition_count))
standalone.format(
partition_count,
args.ipv4,
args.ipv6
)
print("Standalone SlapOS for computer `{}` started".format(args.computer_id))
# Run instance at least once, to start the supervisor managing instances.
try:
standalone.waitForInstance(max_retry=0)
except slapos.slap.standalone.SlapOSNodeCommandError as e:
print("Error instanciating: {}".format(e))
if args.sr:
try:
with open(args.srparams) as f:
params = json.load(f)
except Exception:
params = None
if not isinstance(params, dict):
params = None
print("Supplying and Requesting Embedded Software {sr} with type {srtype}".format(
sr=args.sr, srtype=args.srtype))
print("With parameters {param_dict} parsed from '{srparams}'".format(
param_dict=params, srparams=args.srparams))
standalone.supply(args.sr)
standalone.request(
args.sr,
"Embedded Instance",
args.srtype,
partition_parameter_kw=params,
)
quit_requested = []
def signal_handler(signum, frame):
print("Signal {signum} received".format(signum=signum))
quit_requested.append(True)
sys.exit()
signal.signal(signal.SIGTERM, signal_handler)
print("Standalone SlapOS ready")
while not quit_requested:
time.sleep(.1)
standalone.start()
try:
partition_count = 20
if len(glob.glob(os.path.join(standalone.instance_directory, '*'))) < partition_count:
print("Standalone SlapOS: Formatting {partition_count} partitions".format(
partition_count=partition_count))
standalone.format(
partition_count,
args.ipv4,
args.ipv6,
)
print("Standalone SlapOS for computer `{}` started".format(args.computer_id))
# Run instance at least once, to start the supervisor managing instances.
try:
standalone.waitForInstance(max_retry=0)
except slapos.slap.standalone.SlapOSNodeCommandError as e:
print("Error instanciating: {}".format(e))
if args.sr:
try:
with open(args.srparams) as f:
params = json.load(f)
except Exception:
params = None
if not isinstance(params, dict):
params = None
print("Supplying and Requesting Embedded Software {sr} with type {srtype}".format(
sr=args.sr, srtype=args.srtype))
print("With parameters {param_dict} parsed from '{srparams}'".format(
param_dict=params, srparams=args.srparams))
standalone.supply(args.sr)
standalone.request(
args.sr,
"Embedded Instance",
args.srtype,
partition_parameter_kw=params,
)
s = socket.socket(socket.AF_UNIX)
s.bind('\0' + os.path.join(args.base_directory, 'standalone_ready'))
s.listen(5)
print("Standalone SlapOS ready")
while True:
s.accept()[0].close()
finally:
print("Stopping standalone subsystem")
standalone.stop()
print("Exiting")
print("Stopping standalone subsystem")
standalone.stop()
print("Exiting")
sys.exit(0)
needs-these-eggs-scripts-in-path =
${supervisor:recipe}
......@@ -343,6 +358,18 @@ template =
#!/bin/sh
exec ${nodejs:location}/bin/node ${theia:location}/node_modules/.bin/theia-open "$@"
[python-with-eggs]
recipe = zc.recipe.egg
interpreter = ${:_buildout_section_name_}
eggs =
${slapos-toolbox:eggs}
six
zc.buildout
# Only generate the interpreter script to avoid conflicts with scripts
# for eggs that are also generated by another section, like slapos.toolbox
scripts = ${:interpreter}
[instance-theia]
<= template-base
output = ${buildout:directory}/instance-theia.cfg.jinja
......@@ -350,3 +377,37 @@ output = ${buildout:directory}/instance-theia.cfg.jinja
[instance]
<= template-base
output = ${buildout:directory}/instance.cfg
[instance-import]
<= template-base
output = ${buildout:directory}/instance-import.cfg.jinja
[instance-export]
<= template-base
output = ${buildout:directory}/instance-export.cfg.jinja
[instance-resilient]
<= download-base
[theia-common]
<= download-base
destination = ${buildout:directory}/theia_common.py
[theia-export]
<= download-base
destination = ${buildout:directory}/theia_export.py
[theia-import]
<= download-base
destination = ${buildout:directory}/theia_import.py
[software-info]
slapos = ${buildout:bin-directory}/slapos
python-with-eggs = ${buildout:bin-directory}/${python-with-eggs:interpreter}
python = ${python:location}/bin/python
rsync = ${rsync:location}/bin/rsync
sqlite3 = ${sqlite3:location}/bin/sqlite3
bash = ${bash:location}/bin/bash
supervisorctl = ${buildout:bin-directory}/supervisorctl
theia-export = ${theia-export:output}
theia-import = ${theia-import:output}
......@@ -9,6 +9,13 @@
"description": "Default",
"request": "instance-input-schema.json",
"response": "instance-output-schema.json",
"index": 0
},
"resilient": {
"title": "Resilient",
"description": "Resilient Theia",
"request": "instance-resilient-input-schema.json",
"response": "instance-output-schema.json",
"index": 1
}
}
......
##############################################################################
#
# Copyright (c) 2019 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from __future__ import unicode_literals
import gzip
import json
import os
import re
import subprocess
import time
import unittest
import requests
from datetime import datetime, timedelta
from six.moves.urllib.parse import urljoin
from slapos.testing.testcase import installSoftwareUrlList
import test_resiliency
from test import SlapOSInstanceTestCase, theia_software_release_url
erp5_software_release_url = os.path.abspath(
os.path.join(
os.path.dirname(__file__), '..', '..', 'erp5', 'software.cfg'))
def setUpModule():
installSoftwareUrlList(
SlapOSInstanceTestCase,
[theia_software_release_url, erp5_software_release_url],
debug=bool(int(os.environ.get('SLAPOS_TEST_DEBUG', 0))),
)
class ERP5Mixin(object):
_test_software_url = erp5_software_release_url
_connexion_parameters_regex = re.compile(r"{\s*'_'\s*:\s*'(.*)'\s*}")
def _getERP5ConnexionParameters(self, software_type='export'):
slapos = self._getSlapos(software_type)
out = subprocess.check_output(
(slapos, 'request', 'test_instance', self._test_software_url),
stderr=subprocess.STDOUT,
)
print(out)
return json.loads(self._connexion_parameters_regex.search(out).group(1))
def _getERP5Url(self, connexion_parameters, path=''):
return urljoin(connexion_parameters['family-default-v6'], path)
def _getERP5User(self, connexion_parameters):
return connexion_parameters['inituser-login']
def _getERP5Password(self, connexion_parameters):
return connexion_parameters['inituser-password']
def _waitERP5connected(self, url, user, password):
for _ in range(5):
try:
resp = requests.get('%s/getId' % url, auth=(user, password), verify=False, allow_redirects=False)
except Exception:
time.sleep(20)
continue
if resp.status_code != 200:
time.sleep(20)
continue
break
else:
self.fail('Failed to connect to ERP5')
self.assertEqual(resp.text, 'erp5')
def _getERP5Partition(self, servicename):
p = subprocess.Popen(
(self._getSlapos(), 'node', 'status'),
stdout=subprocess.PIPE, universal_newlines=True)
out, _ = p.communicate()
found = set()
for line in out.splitlines():
if servicename in line:
found.add(line.split(':')[0])
if not found:
raise Exception("ERP5 %s partition not found" % servicename)
elif len(found) > 1:
raise Exception("Found several partitions for ERP5 %s" % servicename)
return found.pop()
def _getERP5PartitionPath(self, software_type, servicename, *paths):
partition = self._getERP5Partition(servicename)
return self._getPartitionPath(
software_type, 'srv', 'runner', 'instance', partition, *paths)
class TestTheiaResilienceERP5(ERP5Mixin, test_resiliency.TestTheiaResilience):
test_instance_max_retries = 12
backup_max_tries = 480
backup_wait_interval = 60
def _prepareExport(self):
super(TestTheiaResilienceERP5, self)._prepareExport()
# Connect to erp5
info = self._getERP5ConnexionParameters()
user = self._getERP5User(info)
password = self._getERP5Password(info)
url = self._getERP5Url(info, 'erp5')
self._waitERP5connected(url, user, password)
# Change title
new_title = time.strftime("HelloTitle %a %d %b %Y %H:%M:%S", time.localtime(time.time()))
requests.get('%s/portal_types/setTitle?value=%s' % (url, new_title), auth=(user, password), verify=False)
resp = requests.get('%s/portal_types/getTitle' % url, auth=(user, password), verify=False, allow_redirects=False)
self.assertEqual(resp.text, new_title)
self._erp5_new_title = new_title
# Wait until changes have been catalogued
mariadb_partition = self._getERP5PartitionPath('export', 'mariadb')
mysql_bin = os.path.join(mariadb_partition, 'bin', 'mysql')
wait_activities_script = os.path.join(
mariadb_partition, 'software_release', 'parts', 'erp5',
'Products', 'CMFActivity', 'bin', 'wait_activities')
subprocess.check_call((wait_activities_script, 'erp5'), env={'MYSQL': mysql_bin})
# Check that changes have been catalogued
output = subprocess.check_output((mysql_bin, 'erp5', '-e', 'SELECT title FROM catalog WHERE id="portal_types"'))
self.assertIn(new_title, output)
# Compute backup date in the near future
soon = (datetime.now() + timedelta(minutes=4)).replace(second=0)
date = '*:%d:00' % soon.minute
params = '_={"zodb-zeo": {"backup-periodicity": "%s"}, "mariadb": {"backup-periodicity": "%s"} }' % (date, date)
# Update ERP5 parameters
print('Requesting ERP5 with parameters %s' % params)
slapos = self._getSlapos()
subprocess.check_call((slapos, 'request', 'test_instance', self._test_software_url, '--parameters', params))
# Process twice to propagate parameter changes
for _ in range(2):
subprocess.check_call((slapos, 'node', 'instance'))
# Restart cron (actually all) services to let them take the new date into account
# XXX this should not be required, updating ERP5 parameters should be enough
subprocess.call((slapos, 'node', 'restart', 'all'))
# Wait until after the programmed backup date, and a bit more
t = (soon - datetime.now()).total_seconds()
self.assertLess(0, t)
time.sleep(t + 120)
# Check that mariadb backup has started
mariadb_backup = os.path.join(mariadb_partition, 'srv', 'backup', 'mariadb-full')
mariadb_backup_dump, = os.listdir(mariadb_backup)
# Check that zodb backup has started
zodb_backup = self._getERP5PartitionPath('export', 'zeo', 'srv', 'backup', 'zodb', 'root')
self.assertEqual(len(os.listdir(zodb_backup)), 3)
# Check that mariadb catalog backup contains expected changes
with gzip.open(os.path.join(mariadb_backup, mariadb_backup_dump)) as f:
self.assertIn(new_title, f.read(), "Mariadb catalog backup %s is not up to date" % mariadb_backup_dump)
def _checkTakeover(self):
super(TestTheiaResilienceERP5, self)._checkTakeover()
# Connect to erp5
info = self._getERP5ConnexionParameters()
user = self._getERP5User(info)
password = self._getERP5Password(info)
url = self._getERP5Url(info, 'erp5')
self._waitERP5connected(url, user, password)
resp = requests.get('%s/portal_types/getTitle' % url, auth=(user, password), verify=False, allow_redirects=False)
self.assertEqual(resp.text, self._erp5_new_title)
# Check that the mariadb catalog is not yet restored
mariadb_partition = self._getERP5PartitionPath('export', 'mariadb')
mysql_bin = os.path.join(mariadb_partition, 'bin', 'mysql')
query = 'SELECT title FROM catalog WHERE id="portal_types"'
try:
out = subprocess.check_output((mysql_bin, 'erp5', '-e', query))
except subprocess.CalledProcessError:
out = ''
self.assertNotIn(self._erp5_new_title, out)
# Stop all services
slapos = self._getSlapos()
print("Stop all services")
subprocess.call((slapos, 'node', 'stop', 'all'))
# Manually restore mariadb from backup
mariadb_restore_script = os.path.join(mariadb_partition, 'bin', 'restore-from-backup')
print("Restore mariadb from backup")
subprocess.check_call(mariadb_restore_script)
# Check that the test instance is properly redeployed after restoring mariadb
# This restarts the services and checks the promises of the test instance
# Process twice to propagate state change
for _ in range(2):
self._processEmbeddedInstance(self.test_instance_max_retries)
# Check that the mariadb catalog was properly restored
out = subprocess.check_output((mysql_bin, 'erp5', '-e', query))
self.assertIn(self._erp5_new_title, out, 'Mariadb catalog is not properly restored')
# Resilience Dummy Software
A very simple SR to test resiliency:
- fast installation and deployment
- self-contained - no dependency outside this folder
- has a simple `exporter.exclude` and `runner-import-restore`
[buildout]
parts =
log-writer
exporter.exclude
runner-import-restore
backup-identity-script
eggs-directory = ${buildout:eggs-directory}
develop-eggs-directory = ${buildout:develop-eggs-directory}
offline = true
[directory]
recipe = plone.recipe.command
home = $${buildout:directory}
srv = $${:home}/srv
etc = $${:home}/etc
run = $${:etc}/run
command = mkdir -p $${:run} $${:srv}
[log-writer]
recipe = slapos.recipe.template:jinja2
template = inline:#!/bin/sh
echo "Hello : $(date)" >> $${directory:home}/log.log
rendered = $${directory:run}/log-writer
[exporter.exclude]
recipe = slapos.recipe.template:jinja2
template = inline:$${directory:home}/exclude
rendered = $${directory:srv}/exporter.exclude
[runner-import-restore]
recipe = slapos.recipe.template:jinja2
template = inline:#!/bin/sh
echo "Hello : $(date)" >> $${directory:home}/runner-import-restore.log
exit $TEST_RESTORE_STATUS
rendered = $${directory:srv}/runner-import-restore
[backup-identity-script]
recipe = slapos.recipe.template:jinja2
template = inline:#!/bin/sh
echo "Custom script"
for i in "$@"
do
echo $(sha256sum $i)
done
exit $TEST_BACKUP_STATUS
rendered = $${directory:srv}/.backup_identity_script
[buildout]
find-links +=
http://www.nexedi.org/static/packages/source/
http://www.nexedi.org/static/packages/source/slapos.buildout/
parts =
instance-template
plone-recipe-command
versions = versions
[plone-recipe-command]
recipe = zc.recipe.egg
eggs = plone.recipe.command
[instance-template]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg.in
output = ${buildout:directory}/instance.cfg
mode = 0644
[versions]
setuptools = 44.0.0
zc.buildout = 2.7.1+slapos010
zc.recipe.egg = 2.0.3+slapos003
Jinja2 = 2.11.2
MarkupSafe = 1.0
......@@ -26,31 +26,28 @@
##############################################################################
from __future__ import unicode_literals
import os
import textwrap
import json
import logging
import os
import re
import subprocess
import tempfile
import time
import re
import json
from six.moves.urllib.parse import urlparse, urljoin
import pexpect
import psutil
import requests
import sqlite3
import six
from six.moves.urllib.parse import urlparse, urljoin
from slapos.testing.testcase import makeModuleSetUpAndTestCaseClass
from slapos.grid.svcbackend import getSupervisorRPC
from slapos.grid.svcbackend import _getSupervisordSocketPath
from slapos.grid.svcbackend import getSupervisorRPC, _getSupervisordSocketPath
software_cfg = 'software%s.cfg' % ('-py3' if six.PY3 else '')
setUpModule, SlapOSInstanceTestCase = makeModuleSetUpAndTestCaseClass(
os.path.abspath(
os.path.join(os.path.dirname(__file__), '..', software_cfg)))
theia_software_release_url = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', software_cfg))
setUpModule, SlapOSInstanceTestCase = makeModuleSetUpAndTestCaseClass(theia_software_release_url)
class TheiaTestCase(SlapOSInstanceTestCase):
......@@ -67,12 +64,21 @@ class TestTheia(TheiaTestCase):
def setUp(self):
self.connection_parameters = self.computer_partition.getConnectionParameterDict()
def get(self, url, expect_code=requests.codes.ok):
resp = requests.get(url, verify=False)
self.assertEqual(
expect_code,
resp.status_code,
'%s returned %d instead of %d' % (url, resp.status_code, expect_code),
)
return resp
def test_backend_http_get(self):
resp = requests.get(self.connection_parameters['backend-url'], verify=False)
self.assertEqual(requests.codes.unauthorized, resp.status_code)
backend_url = self.connection_parameters['backend-url']
self.get(backend_url, requests.codes.unauthorized)
# with login/password, this is allowed
parsed_url = urlparse(self.connection_parameters['backend-url'])
parsed_url = urlparse(backend_url)
authenticated_url = parsed_url._replace(
netloc='{}:{}@[{}]:{}'.format(
self.connection_parameters['username'],
......@@ -80,12 +86,11 @@ class TestTheia(TheiaTestCase):
parsed_url.hostname,
parsed_url.port,
)).geturl()
resp = requests.get(authenticated_url, verify=False)
self.assertEqual(requests.codes.ok, resp.status_code)
self.get(authenticated_url)
def test_http_get(self):
resp = requests.get(self.connection_parameters['url'], verify=False)
self.assertEqual(requests.codes.unauthorized, resp.status_code)
url = self.connection_parameters['url']
self.get(url, requests.codes.unauthorized)
# with login/password, this is allowed
parsed_url = urlparse(self.connection_parameters['url'])
......@@ -96,33 +101,28 @@ class TestTheia(TheiaTestCase):
parsed_url.hostname,
parsed_url.port,
)).geturl()
resp = requests.get(authenticated_url, verify=False)
self.assertEqual(requests.codes.ok, resp.status_code)
self.get(authenticated_url)
# there's a public folder to serve file
with open('{}/srv/frontend-static/public/test_file'.format(
self.computer_partition_root_path), 'w') as f:
f.write("hello")
resp = requests.get(urljoin(authenticated_url, '/public/'), verify=False)
resp = self.get(urljoin(authenticated_url, '/public/'))
self.assertIn('test_file', resp.text)
resp = requests.get(
urljoin(authenticated_url, '/public/test_file'), verify=False)
resp = self.get(urljoin(authenticated_url, '/public/test_file'))
self.assertEqual('hello', resp.text)
# there's a (not empty) favicon
resp = requests.get(
urljoin(authenticated_url, '/favicon.ico'), verify=False)
self.assertEqual(requests.codes.ok, resp.status_code)
resp = self.get(urljoin(authenticated_url, '/favicon.ico'))
self.assertTrue(resp.raw)
# there is a CSS referencing fonts
css_text = requests.get(urljoin(authenticated_url, '/css/slapos.css'), verify=False).text
css_text = self.get(urljoin(authenticated_url, '/css/slapos.css')).text
css_urls = re.findall(r'url\([\'"]+([^\)]+)[\'"]+\)', css_text)
self.assertTrue(css_urls)
# and fonts are served
for url in css_urls:
resp = requests.get(urljoin(authenticated_url, url), verify=False)
self.assertEqual(requests.codes.ok, resp.status_code)
resp = self.get(urljoin(authenticated_url, url))
self.assertTrue(resp.raw)
def test_theia_slapos(self):
......@@ -281,7 +281,7 @@ class TestTheiaEnv(TheiaTestCase):
}
def test_theia_env(self):
"""Make sure environment variables are the same wether we use shell or supervisor services.
"""Make sure environment variables are the same whether we use shell or supervisor services.
"""
# The path of the env.json file expected to be generated by building the dummy software release
env_json_path = os.path.join(self.computer_partition_root_path, 'srv', 'runner', 'software', 'env.json')
......@@ -303,47 +303,104 @@ class TestTheiaEnv(TheiaTestCase):
# Start a theia shell that inherits the environment of the theia process
# This simulates the environment of a shell launched from the browser application
theia_shell_process = pexpect.spawnu('{}/bin/theia-shell'.format(self.computer_partition_root_path), env=theia_env)
theia_shell_process.expect_exact('Standalone SlapOS for computer `slaprunner` activated')
# Launch slapos node software from theia shell
theia_shell_process.sendline('slapos node software')
theia_shell_process.expect('Installing software release %s' % self.dummy_software_path)
theia_shell_process.expect('Finished software releases.')
# Get the theia shell environment
with open(env_json_path) as f:
theia_shell_env = json.load(f)
# Remove the env.json file to later be sure that a new one has been generated
os.remove(env_json_path)
# Launch slapos node software service from the embedded supervisord.
# Note that we have two services, slapos-not-software and slapos-not-software-all
# The later uses --all which is what we want to use here, because the software
# is already installed and we want to install it again, this time from supervisor
embedded_run_path = os.path.join(self.computer_partition_root_path, 'srv', 'runner', 'var', 'run')
embedded_supervisord_socket_path = _getSupervisordSocketPath(embedded_run_path, self.logger)
with getSupervisorRPC(embedded_supervisord_socket_path) as embedded_supervisor:
previous_stop_time = embedded_supervisor.getProcessInfo('slapos-node-software-all')['stop']
embedded_supervisor.startProcess('slapos-node-software-all')
for _retries in range(20):
time.sleep(1)
if embedded_supervisor.getProcessInfo('slapos-node-software-all')['stop'] != previous_stop_time:
break
else:
self.fail("the supervisord service 'slapos-node-software-all' takes too long to finish")
# Get the supervisord environment
with open(env_json_path) as f:
supervisord_env = json.load(f)
# Compare relevant variables from both environments
self.maxDiff = None
self.assertEqual(theia_shell_env['PATH'].split(':'), supervisord_env['PATH'].split(':'))
self.assertEqual(theia_shell_env['SLAPOS_CONFIGURATION'], supervisord_env['SLAPOS_CONFIGURATION'])
self.assertEqual(theia_shell_env['SLAPOS_CLIENT_CONFIGURATION'], supervisord_env['SLAPOS_CLIENT_CONFIGURATION'])
self.assertEqual(theia_shell_env['HOME'], supervisord_env['HOME'])
# Cleanup the theia shell process
theia_shell_process.terminate()
theia_shell_process.wait()
try:
theia_shell_process.expect_exact('Standalone SlapOS for computer `slaprunner` activated')
# Launch slapos node software from theia shell
theia_shell_process.sendline('slapos node software')
theia_shell_process.expect('Installing software release %s' % self.dummy_software_path)
theia_shell_process.expect('Finished software releases.')
# Get the theia shell environment
with open(env_json_path) as f:
theia_shell_env = json.load(f)
# Remove the env.json file to later be sure that a new one has been generated
os.remove(env_json_path)
# Launch slapos node software service from the embedded supervisord.
# Note that we have two services, slapos-node-software and slapos-node-software-all
# The later uses --all which is what we want to use here, because the software
# is already installed and we want to install it again, this time from supervisor
embedded_run_path = os.path.join(self.computer_partition_root_path, 'srv', 'runner', 'var', 'run')
embedded_supervisord_socket_path = _getSupervisordSocketPath(embedded_run_path, self.logger)
with getSupervisorRPC(embedded_supervisord_socket_path) as embedded_supervisor:
previous_stop_time = embedded_supervisor.getProcessInfo('slapos-node-software-all')['stop']
embedded_supervisor.startProcess('slapos-node-software-all')
for _retries in range(20):
time.sleep(1)
if embedded_supervisor.getProcessInfo('slapos-node-software-all')['stop'] != previous_stop_time:
break
else:
self.fail("the supervisord service 'slapos-node-software-all' takes too long to finish")
# Get the supervisord environment
with open(env_json_path) as f:
supervisord_env = json.load(f)
# Compare relevant variables from both environments
self.maxDiff = None
self.assertEqual(theia_shell_env['PATH'].split(':'), supervisord_env['PATH'].split(':'))
self.assertEqual(theia_shell_env['SLAPOS_CONFIGURATION'], supervisord_env['SLAPOS_CONFIGURATION'])
self.assertEqual(theia_shell_env['SLAPOS_CLIENT_CONFIGURATION'], supervisord_env['SLAPOS_CLIENT_CONFIGURATION'])
self.assertEqual(theia_shell_env['HOME'], supervisord_env['HOME'])
finally:
# Cleanup the theia shell process
theia_shell_process.terminate()
theia_shell_process.wait()
class ResilientTheiaMixin(object):
@classmethod
def setUpClass(cls):
super(ResilientTheiaMixin, cls).setUpClass()
# Add resiliency files to snapshot patterns
cls._save_instance_file_pattern_list += (
'*/srv/export-exitcode-file',
'*/srv/export-errormessage-file',
'*/srv/import-exitcode-file',
'*/srv/import-errormessage-file',
)
@classmethod
def _getPartition(cls, software_type):
software_url = cls.getSoftwareURL()
for computer_partition in cls.slap.computer.getComputerPartitionList():
partition_url = computer_partition.getSoftwareRelease()._software_release
partition_type = computer_partition.getType()
if partition_url == software_url and partition_type == software_type:
return computer_partition
raise Exception("Theia %s partition not found" % software_type)
@classmethod
def _getPartitionId(cls, software_type):
return cls._getPartition(software_type).getId()
@classmethod
def _getPartitionPath(cls, software_type, *paths):
return os.path.join(cls.slap._instance_root, cls._getPartitionId(software_type), *paths)
@classmethod
def _getSlapos(cls, software_type='export'):
return cls._getPartitionPath(software_type, 'srv', 'runner', 'bin', 'slapos')
@classmethod
def getInstanceSoftwareType(cls):
return 'resilient'
class TestTheiaResilientInterface(ResilientTheiaMixin, TestTheia):
@classmethod
def setUpClass(cls):
super(TestTheiaResilientInterface, cls).setUpClass()
# Patch the computer root path to that of the export theia instance
cls.computer_partition_root_path = cls._getPartitionPath('export')
class TestTheiaResilientWithSR(ResilientTheiaMixin, TestTheiaWithSR):
@classmethod
def setUpClass(cls):
super(TestTheiaResilientWithSR, cls).setUpClass()
# Patch the computer root path to that of the export theia instance
cls.computer_partition_root_path = cls._getPartitionPath('export')
##############################################################################
#
# Copyright (c) 2019 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from __future__ import unicode_literals
import errno
import os
import re
import shutil
import six
import subprocess
import time
import unittest
import requests
from slapos.testing.testcase import SlapOSNodeCommandError, installSoftwareUrlList
from test import TheiaTestCase, ResilientTheiaMixin, theia_software_release_url
dummy_software_url = os.path.abspath(
os.path.join('resilience_dummy', 'software.cfg'))
class WorkaroundSnapshotConflict(TheiaTestCase):
@classmethod
def _copySnapshot(cls, source_file_name, name):
# Workaround setUpModule snapshots name conflicts
if not name.startswith(cls.__module__):
name = '%s.%s' % (cls.__module__, name)
super(WorkaroundSnapshotConflict, cls)._copySnapshot(source_file_name, name)
def setUpModule():
installSoftwareUrlList(
WorkaroundSnapshotConflict,
[theia_software_release_url],
debug=bool(int(os.environ.get('SLAPOS_TEST_DEBUG', 0))),
)
class ResilientTheiaTestCase(ResilientTheiaMixin, TheiaTestCase):
@classmethod
def _processEmbeddedInstance(cls, retries=0, software_type='export'):
slapos = cls._getSlapos(software_type)
for _ in range(retries):
try:
output = subprocess.check_output((slapos, 'node', 'instance'), stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
continue
print(output)
break
else:
if retries:
# Sleep a bit as an attempt to workaround monitoring boostrap not being ready
print("Wait before running slapos node instance one last time")
time.sleep(120)
subprocess.check_call((slapos, 'node', 'instance'))
@classmethod
def _deployEmbeddedSoftware(cls, software_url, instance_name, retries=0, software_type='export'):
slapos = cls._getSlapos(software_type)
subprocess.check_call((slapos, 'supply', software_url, 'slaprunner'))
try:
subprocess.check_output((slapos, 'node', 'software'), stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
print(e.output)
raise
subprocess.check_call((slapos, 'request', instance_name, software_url))
cls._processEmbeddedInstance(retries, software_type)
@classmethod
def getInstanceParameterDict(cls):
return {'autorun': 'stopped'}
class ResilienceMixin(object):
def _prepareExport(self):
pass
def _doSync(self):
raise NotImplementedError
def _checkSync(self):
pass
def _doTakeover(self):
raise NotImplementedError
def _checkTakeover(self):
pass
def test(self):
# Do stuff on the main instance
# e.g. deploy an embedded software instance
self._prepareExport()
# Backup the main instance to a clone
# i.e. call export and import scripts
self._doSync()
# Check that the export-backup-import process went well
# e.g. look at logs and compare data
self._checkSync()
# Let the clone become a main instance
# i.e. start embedded services
self._doTakeover()
# Check that the takeover went well
# e.g. check services
self._checkTakeover()
class ExportAndImportMixin(object):
def getExportExitfile(self):
return self._getPartitionPath('export', 'srv', 'export-exitcode-file')
def getExportErrorfile(self):
return self._getPartitionPath('export', 'srv', 'export-errormessage-file')
def getImportExitfile(self):
return self._getPartitionPath('import', 'srv', 'import-exitcode-file')
def getImportErrorfile(self):
return self._getPartitionPath('import', 'srv', 'import-errormessage-file')
def makedirs(self, path):
try:
os.makedirs(path if os.path.isdir(path) else os.path.dirname(path))
except OSError as e:
if e.errno != errno.EEXIST:
raise
def writeFile(self, path, content, mode='w'):
self.makedirs(path)
executable = mode == 'exec'
mode = 'w' if executable else mode
with open(path, mode) as f:
if executable:
f.write('#!/bin/sh\n')
f.write(content)
if executable:
os.chmod(path, 0o700)
def assertPromiseSucess(self):
# Force promises to recompute regardless of periodicity
self.slap._force_slapos_node_instance_all = True
try:
self.slap.waitForInstance(error_lines=0)
except SlapOSNodeCommandError as e:
s = str(e)
self.assertNotIn("Promise 'resiliency-export-promise.py' failed", s)
self.assertNotIn('ERROR export script', s)
self.assertNotIn("Promise 'resiliency-import-promise.py' failed", s)
self.assertNotIn('ERROR import script', s)
else:
pass
def _doExport(self):
# Compute last modification of the export exitcode file
exitfile = self.getExportExitfile()
initial_exitdate = os.path.getmtime(exitfile)
# Call export script manually
theia_export_script = self._getPartitionPath('export', 'bin', 'theia-export-script')
subprocess.check_call((theia_export_script,), stderr=subprocess.STDOUT)
# Check that the export exitcode file was modified
self.assertGreater(os.path.getmtime(exitfile), initial_exitdate)
with open(exitfile) as f:
self.assertEqual('0', f.read())
# Check promises
self.assertPromiseSucess()
def _doTransfer(self):
# Copy <export>/srv/backup/theia to <import>/srv/backup/theia manually
export_backup_path = self._getPartitionPath('export', 'srv', 'backup', 'theia')
import_backup_path = self._getPartitionPath('import', 'srv', 'backup', 'theia')
shutil.rmtree(import_backup_path)
shutil.copytree(export_backup_path, import_backup_path)
def _doImport(self):
# Compute last modification of the import exitcode file
exitfile = self.getImportExitfile()
initial_exitdate = os.path.getmtime(exitfile)
# Call the import script manually
theia_import_script = self._getPartitionPath('import', 'bin', 'theia-import-script')
subprocess.check_call((theia_import_script,), stderr=subprocess.STDOUT)
# Check that the import exitcode file was updated
self.assertGreater(os.path.getmtime(exitfile), initial_exitdate)
with open(exitfile) as f:
self.assertEqual('0', f.read())
# Check promises
self.assertPromiseSucess()
class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestCase):
script_relpath = os.path.join(
'srv', 'runner', 'instance', 'slappart0',
'srv', '.backup_identity_script')
def assertPromiseFailure(self, *msg):
# Force promises to recompute regardless of periodicity
self.slap._force_slapos_node_instance_all = True
try:
self.slap.waitForInstance(error_lines=0)
except SlapOSNodeCommandError as e:
s = str(e).replace('\\n', '\n')
for m in msg:
self.assertIn(m, s)
else:
self.fail('No promise failed')
def assertScriptFailure(self, func, errorfile, exitfile, *msg):
self.assertRaises(
subprocess.CalledProcessError,
func,
)
if msg:
with open(errorfile) as f:
error = f.read()
for m in msg:
self.assertIn(m, error)
with open(exitfile) as f:
self.assertNotEqual('0', f.read())
def assertExportFailure(self, *msg):
self.assertScriptFailure(
self._doExport,
self.getExportErrorfile(),
self.getExportExitfile(),
*msg)
self.assertPromiseFailure('ERROR export script failed', *msg)
def assertImportFailure(self, *msg):
self.assertScriptFailure(
self._doImport,
self.getImportErrorfile(),
self.getImportExitfile(),
*msg)
self.assertPromiseFailure('ERROR import script failed', *msg)
def customScript(self, path, content=None):
if content:
self.writeFile(path, content, mode='exec')
else:
if os.path.exists(path):
os.remove(path)
def customSignatureScript(self, content=None):
custom_script = self._getPartitionPath('export', self.script_relpath)
self.customScript(custom_script, content)
def customRestoreScript(self, content=None):
restore_script = self._getPartitionPath('import', 'srv', 'runner-import-restore')
self.customScript(restore_script, content)
return restore_script
def cleanupExitfiles(self):
self.writeFile(self.getExportExitfile(), '0')
self.writeFile(self.getImportExitfile(), '0')
def setUp(self):
self.customSignatureScript(content=None)
self.customRestoreScript(content=None)
self.cleanupExitfiles()
def test_export_promise(self):
self.writeFile(self.getExportExitfile(), '1')
self.assertPromiseFailure('ERROR export script failed')
def test_import_promise(self):
self.writeFile(self.getImportExitfile(), '1')
self.assertPromiseFailure('ERROR import script failed')
def test_custom_hash_script(self):
errmsg = 'Bye bye'
self.customSignatureScript(content='>&2 echo "%s"\nexit 1' % errmsg)
backup_script = self._getPartitionPath(
'export', 'srv', 'backup', 'theia', self.script_relpath)
self.assertExportFailure('Compute backup signature\n ... ERROR !',
'Custom signature script %s failed' % os.path.abspath(backup_script),
'and stderr:\n%s' % errmsg)
def test_signature_mismatch(self):
signature_file = self._getPartitionPath('import', 'srv', 'backup', 'theia', 'backup.signature')
moved_file = self._getPartitionPath('import', 'srv', 'backup', 'backup.signature.moved')
self.writeFile(moved_file, 'Bogus Hash\n', mode='a')
os.rename(moved_file, signature_file)
self.assertImportFailure('ERROR the backup signatures do not match')
def test_restore_script_error(self):
self._doExport()
self._doTransfer()
restore_script = self.customRestoreScript('exit 1')
self.assertImportFailure('Run custom restore script %s\n ... ERROR !' % restore_script)
class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientTheiaTestCase):
def test_twice(self):
# Run two synchronisations on the same instances
# to make sure everything still works the second time
self._doSync()
def checkLog(self, log_path, initial=[], newline="Hello"):
with open(log_path) as f:
log = f.readlines()
self.assertEqual(len(log), len(initial) + int(bool(newline)))
for line, initial_line in zip(log, initial):
self.assertEqual(line, initial_line)
if newline:
self.assertTrue(log[-1].startswith(newline), log[-1])
return log
def _prepareExport(self):
# Copy ./resilience_dummy SR in export theia ~/srv/project/dummy
dummy_target_path = self._getPartitionPath('export', 'srv', 'project', 'dummy')
shutil.copytree(os.path.dirname(dummy_software_url), dummy_target_path)
self._test_software_url = os.path.join(dummy_target_path, 'software.cfg')
# Deploy dummy instance in export partition
self._deployEmbeddedSoftware(self._test_software_url, 'dummy_instance')
relpath_dummy = os.path.join('srv', 'runner', 'instance', 'slappart0')
self.export_dummy_root = dummy_root = self._getPartitionPath('export', relpath_dummy)
self.import_dummy_root = self._getPartitionPath('import', relpath_dummy)
# Check that dummy instance was properly deployed
self.initial_log = self.checkLog(os.path.join(dummy_root, 'log.log'))
# Create ~/include and ~/include/included
self.writeFile(os.path.join(dummy_root, 'include', 'included'),
'This file should be included in resilient backup')
# Create ~/exclude and ~/exclude/excluded
self.writeFile(os.path.join(dummy_root, 'exclude', 'excluded'),
'This file should be excluded from resilient backup')
# Check that ~/srv/exporter.exclude and ~/srv/runner-import-restore
# As well as ~/srv/.backup_identity_script
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'exporter.exclude')))
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'runner-import-restore')))
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', '.backup_identity_script')))
def _doSync(self):
self._doExport()
self._doTransfer()
self._doImport()
def _checkSync(self):
dummy_root = self.import_dummy_root
# Check that the software url is correct
adapted_test_url = self._getPartitionPath('import', 'srv', 'project', 'dummy', 'software.cfg')
proxy_content = subprocess.check_output(
(self._getSlapos('import'), 'proxy', 'show'), universal_newlines=True)
self.assertIn(adapted_test_url, proxy_content)
self.assertNotIn(self._test_software_url, proxy_content)
# Check that ~/srv/project was exported
self.assertTrue(os.path.exists(adapted_test_url))
# Check that the dummy instance is not yet started
self.checkLog(os.path.join(dummy_root, 'log.log'), self.initial_log, newline=None)
# Check that ~/srv/.backup_identity_script was called
signature = self._getPartitionPath('import', 'srv', 'backup', 'backup.signature.proof')
with open(signature) as f:
self.assertIn('Custom script', f.read())
# Check that ~/include and ~/include/included were included
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'include', 'included')))
# Check that ~/exclude was excluded
self.assertFalse(os.path.exists(os.path.join(dummy_root, 'exclude')))
# Check that ~/srv/runner-import-restore was called
self.checkLog(os.path.join(dummy_root, 'runner-import-restore.log'))
def _doTakeover(self):
# Start the dummy instance as a sort of fake takeover
subprocess.check_call((self._getSlapos('import'), 'node', 'instance'))
def _checkTakeover(self):
# Check that dummy instance was properly re-deployed
log_path = os.path.join(self.import_dummy_root, 'log.log')
self.checkLog(log_path, self.initial_log)
class TakeoverMixin(ExportAndImportMixin):
def _getTakeoverUrlAndPassword(self, scope="theia-1"):
parameter_dict = self.computer_partition.getConnectionParameterDict()
takeover_url = parameter_dict["takeover-%s-url" % scope]
takeover_password = parameter_dict["takeover-%s-password" % scope]
return takeover_url, takeover_password
def _getTakeoverPage(self, takeover_url):
resp = requests.get(takeover_url, verify=True)
self.assertEqual(requests.codes.ok, resp.status_code)
return resp.text
def _waitScriptDone(self, name, start, exitfile, errorfile, maxtries, interval):
print('Wait until %s script has run' % name.lower())
for t in range(maxtries):
if os.path.getmtime(exitfile) < start:
time.sleep(interval)
continue
with open(exitfile) as f:
if f.read() == '0':
print(name + ' script ran successfully')
return maxtries - t
print(name + ' script failed:\n')
with open(errorfile) as f:
print(f.read())
self.fail(name + ' script failed')
self.fail(name + ' script did not finish before timeout')
def _waitTakeoverReady(self, takeover_url, start, maxtries, interval):
export_exitfile = self.getExportExitfile()
export_errorfile = self.getExportErrorfile()
tries = self._waitScriptDone(
'Export', start, export_exitfile, export_errorfile, maxtries, interval)
import_exitfile = self.getImportExitfile()
import_errorfile = self.getImportErrorfile()
tries = self._waitScriptDone(
'Import', start, import_exitfile, import_errorfile, tries, interval)
for _ in range(tries):
info = self._getTakeoverPage(takeover_url)
if "No backup downloaded yet, takeover should not happen now." in info:
print('Takeover page still reports export script in progress')
elif "<b>Importer script(s) of backup in progress:</b> True" in info:
print('Takeover page still reports import script in progress')
else:
return
time.sleep(interval)
self.fail('Takeover page failed to report readiness')
def _requestTakeover(self, takeover_url, takeover_password):
resp = requests.get("%s?password=%s" % (takeover_url, takeover_password), verify=True)
self.assertEqual(requests.codes.ok, resp.status_code)
self.assertNotIn("Error", resp.text, "An Error occured: %s" % resp.text)
self.assertIn("Success", resp.text, "An Error occured: %s" % resp.text)
return resp.text
@unittest.skipIf(six.PY3, "resilient stack is not python3-compatible")
class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase):
test_instance_max_retries = 0
backup_max_tries = 70
backup_wait_interval = 10
_test_software_url = dummy_software_url
def _prepareExport(self):
# Deploy test instance
self._deployEmbeddedSoftware(self._test_software_url, 'test_instance', self.test_instance_max_retries)
# Check that there is an export and import instance and get their partition IDs
self.export_id = self._getPartitionId('export')
self.import_id = self._getPartitionId('import')
def _doSync(self):
start = time.time()
# Call exporter script instead of waiting for cron job
# XXX Accelerate cron frequency instead ?
exporter_script = self._getPartitionPath('export', 'bin', 'exporter')
transaction_id = str(int(time.time()))
subprocess.check_call((exporter_script, '--transaction-id', transaction_id))
takeover_url, _ = self._getTakeoverUrlAndPassword()
# Wait for takoever to be ready
self._waitTakeoverReady(takeover_url, start, self.backup_max_tries, self.backup_wait_interval)
def _doTakeover(self):
# Takeover
takeover_url, takeover_password = self._getTakeoverUrlAndPassword()
self._requestTakeover(takeover_url, takeover_password)
# Wait for import instance to become export instance and new import to be allocated
# This also checks that all promises of theia instances succeed
self.slap.waitForInstance(self.instance_max_retry)
self.computer_partition = self.requestDefaultInstance()
def _checkTakeover(self):
# Check that there is an export, import and frozen instance and get their new partition IDs
import_id = self.import_id
export_id = self.export_id
new_export_id = self._getPartitionId('export')
new_import_id = self._getPartitionId('import')
new_frozen_id = self._getPartitionId('frozen')
# Check that old export instance is now frozen
self.assertEqual(export_id, new_frozen_id)
# Check that old import instance is now the new export instance
self.assertEqual(import_id, new_export_id)
# Check that there is a new import instance
self.assertNotIn(new_import_id, (export_id, new_export_id))
# Check that the test instance is properly redeployed
# This checks the promises of the test instance
self._processEmbeddedInstance(self.test_instance_max_retries)
import contextlib
import errno
import glob
import hashlib
import os
import re
import subprocess as sp
import sqlite3
import six
import zc.buildout.configparser
from slapos.util import bytes2str, str2bytes
RSYNC_FLAGS = ('-rlptgo', '--safe-links', '--stats', '--ignore-missing-args', '--delete', '--delete-excluded')
RSYNC_REGEX = '^(file has vanished: |rsync warning: some files vanished before they could be transferred)'
EXCLUDE_PATTERNS = ('*.sock', '*.socket', '*.pid', '.installed*.cfg')
EXCLUDE_FLAGS = ['--exclude={}'.format(x) for x in sorted(EXCLUDE_PATTERNS)]
def makedirs(path):
try:
os.makedirs(path if os.path.isdir(path) else os.path.dirname(path))
except OSError as e:
if e.errno != errno.EEXIST:
raise
def copytree(rsyncbin, src, dst, exclude=[], extrargs=[], verbosity='-v'):
# Ensure there is a trailing slash in the source directory
# to avoid creating an additional directory level at the destination
src = os.path.join(src, '')
# Compute absolute path of destination
dst = os.path.abspath(dst)
# Create destination dir if it doesn't exist
makedirs(dst)
command = [rsyncbin]
command.extend(RSYNC_FLAGS)
# Exclude destination file from sources
command.append('--filter=-/ {}'.format(dst))
command.extend(EXCLUDE_FLAGS)
command.extend(('--filter=-/ {}'.format(x) for x in sorted(exclude)))
command.extend(extrargs)
command.append(verbosity)
command.append(src)
command.append(dst)
try:
return sp.check_output(command, universal_newlines=True)
except sp.CalledProcessError as e:
# Not all rsync errors are to be considered as errors
if e.returncode != 24 or re.search(RSYNC_REGEX, e.output, re.M) is None:
raise
return e.output
def copydb(sqlite3bin, src_db, dst_db):
makedirs(dst_db)
sp.check_output((sqlite3bin, src_db, '.backup ' + dst_db))
def remove(path):
try:
os.remove(path)
except OSError:
if os.path.exists(path):
raise
def parse_installed(partition):
paths = []
custom_script = os.path.join(partition, 'srv', '.backup_identity_script')
for cfg in glob.glob(os.path.join(partition, '.installed*.cfg')):
try:
with open(cfg) as f:
installed_cfg = zc.buildout.configparser.parse(f, cfg)
except IOError as e:
if e.errno != errno.ENOENT:
raise
else:
for section in six.itervalues(installed_cfg):
for p in section.get('__buildout_installed__', '').splitlines():
p = p.strip()
if p and p != custom_script:
paths.append(p)
return paths
def sha256sum(file_path, chunk_size=1024 * 1024):
sha256 = hashlib.sha256()
with open(file_path, 'rb') as f:
chunk = f.read(chunk_size)
while chunk:
sha256.update(chunk)
chunk = f.read(chunk_size)
return sha256.hexdigest()
def hashwalk(backup_dir, mirror_partitions):
scripts = {}
for p in mirror_partitions:
script_path = os.path.join(p, 'srv', '.backup_identity_script')
if os.path.exists(script_path):
scripts[os.path.abspath(p)] = script_path
for dirpath, dirnames, filenames in os.walk(backup_dir):
filenames.sort()
for f in filenames:
filepath = os.path.join(dirpath, f)
if os.path.isfile(filepath):
displaypath = os.path.relpath(filepath, start=backup_dir)
yield '%s %s' % (sha256sum(filepath), displaypath)
remaining_dirnames = []
for subdir in dirnames:
subdirpath = os.path.abspath(os.path.join(dirpath, subdir))
custom_hashscript = scripts.get(subdirpath)
if custom_hashscript:
print('Using custom signature script %s' % custom_hashscript)
for s in hashcustom(subdirpath, backup_dir, custom_hashscript):
yield s
else:
remaining_dirnames.append(subdir)
remaining_dirnames.sort()
dirnames[:] = remaining_dirnames
@contextlib.contextmanager
def cwd(path):
old_path = os.getcwd()
try:
os.chdir(path)
yield
finally:
os.chdir(old_path)
def hashcustom(mirrordir, backup_dir, custom_hashscript):
workingdir = os.path.join(mirrordir, os.pardir, os.pardir, os.pardir)
with cwd(os.path.abspath(workingdir)):
for dirpath, _, filenames in os.walk(mirrordir):
filepaths = []
for f in filenames:
path = os.path.join(dirpath, f)
if os.path.isfile(path):
filepaths.append('./' + os.path.relpath(path, start=workingdir))
if not filepaths:
continue
hashprocess = sp.Popen(
custom_hashscript, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE)
out, err = hashprocess.communicate(str2bytes('\0'.join(filepaths)))
if hashprocess.returncode != 0:
template = "Custom signature script %s failed on inputs:\n%s"
msg = template % (custom_hashscript, '\n'.join(filepaths))
msg += "\nwith stdout:\n%s" % bytes2str(out)
msg += "\nand stderr:\n%s" % bytes2str(err)
raise Exception(msg)
signatures = bytes2str(out).strip('\n').split('\n')
signatures.sort()
displaypath = os.path.relpath(dirpath, start=backup_dir)
for s in signatures:
yield '%s %s/ (custom)' % (s, displaypath)
import argparse
import glob
import itertools
import os
import sys
import time
import traceback
import six
from six.moves import configparser
sys.path.append(os.path.dirname(__file__))
from theia_common import copytree, copydb, hashwalk, parse_installed, remove
os.environ['LC_ALL'] = 'C'
os.umask(0o77)
BACKUP_WAIT = 10
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--rsync', required=True)
parser.add_argument('--sqlite3', required=True)
parser.add_argument('--root', required=True)
parser.add_argument('--backup', required=True)
parser.add_argument('--cfg', required=True)
parser.add_argument('--dirs', action='append')
parser.add_argument('--exitfile', required=True)
parser.add_argument('--errorfile', required=True)
args = parser.parse_args()
TheiaExport(args)()
class TheiaExport(object):
def __init__(self, args):
self.rsync_bin = args.rsync
self.sqlite3_bin = args.sqlite3
self.root_dir = args.root
self.backup_dir = args.backup
self.slapos_cfg = cfg = args.cfg
self.dirs = args.dirs
self.exit_file = args.exitfile
self.error_file = args.errorfile
configp = configparser.SafeConfigParser()
configp.read(cfg)
self.proxy_db = configp.get('slapproxy', 'database_uri')
self.instance_dir = configp.get('slapos', 'instance_root')
partitions = glob.glob(os.path.join(self.instance_dir, 'slappart*'))
self.partition_dirs = [p for p in partitions if os.path.isdir(p)]
self.copytree_partitions_args = {}
self.logs = []
def mirrorpath(self, src):
return os.path.abspath(os.path.join(
self.backup_dir, os.path.relpath(src, start=self.root_dir)))
def backuptree(self, src, exclude=[], extrargs=[], verbosity='-v'):
dst = self.mirrorpath(src)
return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity)
def backupdb(self):
copydb(self.sqlite3_bin, self.proxy_db, self.mirrorpath(self.proxy_db))
def backuppartition(self, partition):
installed = parse_installed(partition)
rules = os.path.join(partition, 'srv', 'exporter.exclude')
extrargs = ('--filter=.-/ ' + rules,) if os.path.exists(rules) else ()
self.backuptree(partition, exclude=installed, extrargs=extrargs)
self.copytree_partitions_args[partition] = (installed, extrargs)
def sign(self, signaturefile):
remove(signaturefile)
pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir))
tmpfile = os.path.join(pardir, 'backup.signature.tmp')
mirror_partitions = [self.mirrorpath(p) for p in self.partition_dirs]
with open(tmpfile, 'w') as f:
for s in hashwalk(self.backup_dir, mirror_partitions):
f.write(s + '\n')
os.rename(tmpfile, signaturefile)
def checkpartition(self, partition, pattern='/srv/backup/'):
installed, extrargs = self.copytree_partitions_args[partition]
output = self.backuptree(
partition,
exclude=installed,
extrargs=extrargs + ('--dry-run', '--update'),
verbosity='--out-format=%n',
)
return [path for path in output.splitlines() if pattern in path]
def loginfo(self, msg):
print(msg)
self.logs.append(msg)
def __call__(self):
remove(self.error_file)
exitcode = 0
try:
self.export()
except Exception:
exitcode = 1
exc = traceback.format_exc()
with open(self.error_file, 'w') as f:
f.write('\n ... OK\n\n'.join(self.logs))
f.write('\n ... ERROR !\n\n')
f.write(exc)
print('\n\nERROR\n\n' + exc)
finally:
with open(self.exit_file, 'w') as f:
f.write(str(exitcode))
sys.exit(exitcode)
def export(self):
export_start_date = int(time.time())
etc_dir = os.path.join(self.root_dir, 'etc')
with open(os.path.join(etc_dir, '.resilient_timestamp'), 'w') as f:
f.write(str(export_start_date))
self.loginfo('Backup directory ' + etc_dir)
self.backuptree(etc_dir, extrargs=('--filter=- */', '--filter=-! .*'))
for d in self.dirs:
self.loginfo('Backup directory ' + d)
self.backuptree(d)
self.loginfo('Backup slapproxy database')
self.backupdb()
self.loginfo('Backup partitions')
for p in self.partition_dirs:
self.backuppartition(p)
self.loginfo('Compute backup signature')
self.sign(os.path.join(self.backup_dir, 'backup.signature'))
time.sleep(10)
self.loginfo('Check partitions')
modified = list(itertools.chain.from_iterable(
self.checkpartition(p) for p in self.partition_dirs))
if modified:
msg = 'Some files have been modified since the backup started'
self.loginfo(msg + ':')
self.loginfo('\n'.join(modified))
self.loginfo("Let's wait %d minutes and try again" % BACKUP_WAIT)
time.sleep(BACKUP_WAIT * 60)
raise Exception(msg)
self.loginfo('Done')
if __name__ == '__main__':
main()
import argparse
import glob
import itertools
import os
import sys
import subprocess as sp
import traceback
import six
from six.moves import configparser
sys.path.append(os.path.dirname(__file__))
from theia_common import copytree, copydb, hashwalk, parse_installed, remove
os.environ['LC_ALL'] = 'C'
os.umask(0o77)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--rsync', required=True)
parser.add_argument('--sqlite3', required=True)
parser.add_argument('--slapos', required=True)
parser.add_argument('--srlog', required=True)
parser.add_argument('--cplog', required=True)
parser.add_argument('--supervisorctl', required=True)
parser.add_argument('--supervisordconf', required=True)
parser.add_argument('--root', required=True)
parser.add_argument('--backup', required=True)
parser.add_argument('--cfg', required=True)
parser.add_argument('--dirs', action='append')
parser.add_argument('--exitfile', required=True)
parser.add_argument('--errorfile', required=True)
args = parser.parse_args()
TheiaImport(args)()
class TheiaImport(object):
def __init__(self, args):
self.rsync_bin = args.rsync
self.sqlite3_bin = args.sqlite3
self.slapos_bin = args.slapos
self.sr_log = args.srlog
self.cp_log = args.cplog
self.supervisorctl_bin = args.supervisorctl
self.supervisord_conf = args.supervisordconf
self.root_dir = args.root
self.backup_dir = args.backup
self.slapos_cfg = cfg = args.cfg
self.dirs = args.dirs
self.exit_file = args.exitfile
self.error_file = args.errorfile
configp = configparser.SafeConfigParser()
configp.read(cfg)
self.proxy_db = configp.get('slapproxy', 'database_uri')
self.instance_dir = configp.get('slapos', 'instance_root')
mirror_dir = self.mirrorpath(self.instance_dir)
partitions = glob.glob(os.path.join(mirror_dir, 'slappart*'))
self.mirror_partition_dirs = [p for p in partitions if os.path.isdir(p)]
self.logs = []
def mirrorpath(self, dst):
return os.path.abspath(os.path.join(
self.backup_dir, os.path.relpath(dst, start=self.root_dir)))
def dstpath(self, src):
return os.path.abspath(os.path.join(
self.root_dir, os.path.relpath(src, start=self.backup_dir)))
def restoretree(self, dst, exclude=[], extrargs=[], verbosity='-v'):
src = self.mirrorpath(dst)
return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity)
def restoredb(self):
copydb(self.sqlite3_bin, self.mirrorpath(self.proxy_db), self.proxy_db)
def restorepartition(self, mirror_partition):
p = self.dstpath(mirror_partition)
installed = parse_installed(p) if os.path.exists(p) else []
copytree(self.rsync_bin, mirror_partition, p, exclude=installed)
def supervisorctl(self, *args):
supervisor_command = (self.supervisorctl_bin, '-c', self.supervisord_conf)
command = supervisor_command + args
print(' '.join(command))
sp.check_call(command)
def slapos(self, *args):
command = (self.slapos_bin,) + args + ('--cfg', self.slapos_cfg)
print(' '.join(command))
sp.check_call(command)
def verify(self, signaturefile):
pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir))
moved = os.path.join(pardir, 'backup.signature.moved')
proof = os.path.join(pardir, 'backup.signature.proof')
if os.path.exists(signaturefile):
os.rename(signaturefile, moved)
if not os.path.exists(moved):
msg = 'ERROR the backup signature file is missing'
print(msg)
raise Exception(msg)
with open(proof, 'w') as f:
for s in hashwalk(self.backup_dir, self.mirror_partition_dirs):
f.write(s + '\n')
diffcommand = ('diff', moved, proof)
print(' '.join(diffcommand))
try:
sp.check_output(
diffcommand, stderr=sp.STDOUT, universal_newlines=True)
except sp.CalledProcessError as e:
template = 'ERROR the backup signatures do not match\n\n%s'
msg = template % e.output
print(msg)
raise Exception(msg)
def loginfo(self, msg):
print(msg)
self.logs.append(msg)
def __call__(self):
remove(self.error_file)
exitcode = 0
try:
self.restore()
except Exception:
exitcode = 1
exc = traceback.format_exc()
with open(self.error_file, 'w') as f:
f.write('\n ... OK\n\n'.join(self.logs))
f.write('\n ... ERROR !\n\n')
f.write(exc)
print('\n\nERROR\n\n' + exc)
finally:
with open(self.exit_file, 'w') as f:
f.write(str(exitcode))
sys.exit(exitcode)
def restore(self):
self.loginfo('Verify backup signature')
self.verify(os.path.join(self.backup_dir, 'backup.signature'))
self.loginfo('Stop slapproxy')
self.supervisorctl('stop', 'slapos-proxy')
self.loginfo('Restore partitions')
for m in self.mirror_partition_dirs:
self.restorepartition(m)
for d in self.dirs:
self.loginfo('Restore directory ' + d)
self.restoretree(d)
self.loginfo('Restore slapproxy database')
self.restoredb()
etc_dir = os.path.join(self.root_dir, 'etc')
self.loginfo('Restore directory ' + etc_dir)
self.restoretree(etc_dir, extrargs=('--filter=- */', '--filter=-! .*'))
custom_script = os.path.join(self.root_dir, 'srv', 'runner-import-restore')
if os.path.exists(custom_script):
self.loginfo('Run custom restore script %s' % custom_script)
sp.check_call(custom_script)
self.loginfo('Start slapproxy again')
self.supervisorctl('start', 'slapos-proxy')
self.loginfo('Reformat partitions')
self.slapos('node', 'format', '--now')
self.loginfo('Remove old supervisord configuration files')
conf_dir = os.path.join(self.instance_dir, 'etc', 'supervisor.conf.d')
for f in glob.glob(os.path.join(conf_dir, '*')):
os.remove(f)
self.loginfo('Build Software Releases')
for i in range(3):
try:
self.slapos('node', 'software', '--all', '--logfile', self.sr_log)
except sp.CalledProcessError:
if i == 2:
raise
else:
break
self.loginfo('Remove old custom instance scripts')
partitions_glob = os.path.join(self.instance_dir, 'slappart*')
scripts = os.path.join(partitions_glob, 'srv', 'runner-import-restore')
for f in glob.glob(scripts):
remove(f)
self.loginfo('Remove partition timestamps')
timestamps = os.path.join(partitions_glob, '.timestamp')
for f in glob.glob(timestamps):
remove(f)
self.loginfo('Build Instances')
cp_log = self.cp_log
for i in range(3):
try:
self.slapos('node', 'instance', '--force-stop', '--logfile', cp_log)
except sp.CalledProcessError:
if i == 2:
raise
else:
break
for custom_script in glob.glob(scripts):
self.loginfo('Running custom instance script %s' % custom_script)
sp.check_call(custom_script)
self.loginfo('Done')
if __name__ == '__main__':
main()
......@@ -14,7 +14,7 @@
# not need these here).
[pbsready]
filename = pbsready.cfg.in
md5sum = 9ceceeee21fa90837c887d2d6866859e
md5sum = 005125621d157b3ae04c428ea6060e37
[pbsready-import]
filename = pbsready-import.cfg.in
......@@ -26,7 +26,7 @@ md5sum = 2b0c71b085cfe8017f28098c160b1f49
[template-pull-backup]
filename = instance-pull-backup.cfg.in
md5sum = e64e13854332bcc2595df187fcae1203
md5sum = b240dc76a663190304d8bcb9cabcda8f
[template-replicated]
filename = template-replicated.cfg.in
......
......@@ -137,12 +137,17 @@ command-line = ${buildout:bin-directory}/generatefeed --output $${:feed-path} --
feed-path = $${directory:monitor-resilient}/pbs-status-rss
wrapper-path = $${rootdirectory:bin}/resilient-genstatusrss.py
[pbs-status-feed-first-run]
recipe = plone.recipe.command
command = $${pbs-resilient-status-feed:wrapper-path}
stop-on-error = true
[cron-pbs-status-feed]
<= cron
recipe = slapos.cookbook:cron.d
name = resilient-pbs-status-feed
frequency = */5 * * * *
command = $${pbs-resilient-status-feed:wrapper-path}
command = $${pbs-status-feed-first-run:command}
[logrotate-entry-notifier]
<= logrotate-entry-base
......
......@@ -145,12 +145,17 @@ command-line = ${buildout:bin-directory}/generatefeed --output $${:feed-path} --
feed-path = $${directory:monitor-resilient}/notifier-status-rss
wrapper-path = $${rootdirectory:bin}/resilient-genstatusrss.py
[notifier-status-feed-first-run]
recipe = plone.recipe.command
command = $${notifier-resilient-status-feed:wrapper-path}
stop-on-error = true
[cron-entry-notifier-status-feed]
<= cron
recipe = slapos.cookbook:cron.d
name = resilient-notifier-status-feed
frequency = */5 * * * *
command = $${notifier-resilient-status-feed:wrapper-path}
command = $${notifier-status-feed-first-run:command}
[notifier-stalled-promise-bin]
recipe = slapos.cookbook:wrapper
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment