Commit 973a0724 authored by Jérome Perrin's avatar Jérome Perrin

Bug fixes and important missing features for standalone / testcase

The most important are:
 * sometimes instance was not properly destroyed, apparently we need to retry `slapos node report`
 * if instance is not properly destroy, remove the files ourself and mark test failed
 * we need to see log files and config files from the instance

there's also a few small commits not directly related.

/reviewed-on nexedi/slapos.core!145
parents 8307f425 88a73f75
...@@ -50,6 +50,7 @@ import psutil ...@@ -50,6 +50,7 @@ import psutil
from .interface.slap import IException from .interface.slap import IException
from .interface.slap import ISupply from .interface.slap import ISupply
from .interface.slap import IRequester from .interface.slap import IRequester
from ..grid.slapgrid import SLAPGRID_PROMISE_FAIL
from .slap import slap from .slap import slap
from ..grid.svcbackend import getSupervisorRPC from ..grid.svcbackend import getSupervisorRPC
...@@ -60,10 +61,12 @@ class SlapOSNodeCommandError(Exception): ...@@ -60,10 +61,12 @@ class SlapOSNodeCommandError(Exception):
"""Exception raised when running a SlapOS Node command failed. """Exception raised when running a SlapOS Node command failed.
""" """
def __str__(self): def __str__(self):
# This is a false positive in pylint https://github.com/PyCQA/pylint/issues/1498
called_process_error = self.args[0] # pylint: disable=unsubscriptable-object
return "{} exitstatus: {} output:\n{}".format( return "{} exitstatus: {} output:\n{}".format(
self.__class__.__name__, self.__class__.__name__,
self.args[0]['exitstatus'], called_process_error['exitstatus'],
self.args[0]['output'], called_process_error['output'],
) )
...@@ -471,7 +474,19 @@ class StandaloneSlapOS(object): ...@@ -471,7 +474,19 @@ class StandaloneSlapOS(object):
for part in unknown_partition_set: for part in unknown_partition_set:
self._logger.debug( self._logger.debug(
"removing partition no longer part of format spec %s", part) "removing partition no longer part of format spec %s", part)
# remove partition directory
shutil.rmtree(part) shutil.rmtree(part)
# remove partition supervisor config, if it was not removed cleanly
supervisor_conf = os.path.join(
self._instance_root,
'etc',
'supervisord.conf.d',
'%s.conf' % os.path.basename(part))
if os.path.exists(supervisor_conf):
self._logger.info(
"removing leftover supervisor config from destroyed partition at %s",
supervisor_conf)
os.unlink(supervisor_conf)
def supply(self, software_url, computer_guid=None, state="available"): def supply(self, software_url, computer_guid=None, state="available"):
"""Supply a software, see ISupply.supply """Supply a software, see ISupply.supply
...@@ -630,9 +645,14 @@ class StandaloneSlapOS(object): ...@@ -630,9 +645,14 @@ class StandaloneSlapOS(object):
prog = self._slapos_commands[command] prog = self._slapos_commands[command]
# used in format(**locals()) below # used in format(**locals()) below
debug_args = prog.get('debug_args', '') # pylint: disable=unused-variable debug_args = prog.get('debug_args', '') # pylint: disable=unused-variable
return subprocess.check_call( command = prog['command'].format(**locals())
prog['command'].format(**locals()), shell=True) try:
return subprocess.check_call(command, shell=True)
except subprocess.CalledProcessError as e:
if e.returncode == SLAPGRID_PROMISE_FAIL:
self._logger.exception('Promise error when running %s', command)
import pdb; pdb.post_mortem()
raise
with self.system_supervisor_rpc as supervisor: with self.system_supervisor_rpc as supervisor:
retry = 0 retry = 0
while True: while True:
......
...@@ -30,6 +30,8 @@ import unittest ...@@ -30,6 +30,8 @@ import unittest
import os import os
import glob import glob
import logging import logging
import shutil
from six.moves.urllib.parse import urlparse
try: try:
import subprocess32 as subprocess import subprocess32 as subprocess
...@@ -43,6 +45,7 @@ from ..slap.standalone import StandaloneSlapOS ...@@ -43,6 +45,7 @@ from ..slap.standalone import StandaloneSlapOS
from ..slap.standalone import SlapOSNodeCommandError from ..slap.standalone import SlapOSNodeCommandError
from ..slap.standalone import PathTooDeepError from ..slap.standalone import PathTooDeepError
from ..grid.utils import md5digest from ..grid.utils import md5digest
from ..util import mkdir_p
try: try:
from typing import Iterable, Tuple, Callable, Type from typing import Iterable, Tuple, Callable, Type
...@@ -59,6 +62,7 @@ def makeModuleSetUpAndTestCaseClass( ...@@ -59,6 +62,7 @@ def makeModuleSetUpAndTestCaseClass(
verbose=bool(int(os.environ.get('SLAPOS_TEST_VERBOSE', 0))), verbose=bool(int(os.environ.get('SLAPOS_TEST_VERBOSE', 0))),
shared_part_list=os.environ.get('SLAPOS_TEST_SHARED_PART_LIST', shared_part_list=os.environ.get('SLAPOS_TEST_SHARED_PART_LIST',
'').split(os.pathsep), '').split(os.pathsep),
snapshot_directory=os.environ.get('SLAPOS_TEST_LOG_DIRECTORY')
): ):
# type: (str, str, str, str, bool, bool, List[str]) -> Tuple[Callable[[], None], Type[SlapOSInstanceTestCase]] # type: (str, str, str, str, bool, bool, List[str]) -> Tuple[Callable[[], None], Type[SlapOSInstanceTestCase]]
"""Create a setup module function and a testcase for testing `software_url`. """Create a setup module function and a testcase for testing `software_url`.
...@@ -116,16 +120,22 @@ def makeModuleSetUpAndTestCaseClass( ...@@ -116,16 +120,22 @@ def makeModuleSetUpAndTestCaseClass(
'base directory ( {} ) is too deep, try setting ' 'base directory ( {} ) is too deep, try setting '
'SLAPOS_TEST_WORKING_DIR to a shallow enough directory'.format( 'SLAPOS_TEST_WORKING_DIR to a shallow enough directory'.format(
base_directory)) base_directory))
if not snapshot_directory:
snapshot_directory = os.path.join(base_directory, "snapshots")
cls = type( cls = type(
'SlapOSInstanceTestCase for {}'.format(software_url), 'SlapOSInstanceTestCase for {}'.format(software_url),
(SlapOSInstanceTestCase,), { (SlapOSInstanceTestCase,), {
'slap': slap, 'slap': slap,
'getSoftwareURL': classmethod(lambda _cls: software_url), 'getSoftwareURL': classmethod(lambda _cls: software_url),
'software_id': urlparse(software_url).path.split('/')[-2],
'_debug': debug, '_debug': debug,
'_verbose': verbose, '_verbose': verbose,
'_ipv4_address': ipv4_address, '_ipv4_address': ipv4_address,
'_ipv6_address': ipv6_address '_ipv6_address': ipv6_address,
'_base_directory': base_directory,
'_test_file_snapshot_directory': snapshot_directory
}) })
class SlapOSInstanceTestCase_(cls, SlapOSInstanceTestCase): class SlapOSInstanceTestCase_(cls, SlapOSInstanceTestCase):
...@@ -136,8 +146,8 @@ def makeModuleSetUpAndTestCaseClass( ...@@ -136,8 +146,8 @@ def makeModuleSetUpAndTestCaseClass(
# type: () -> None # type: () -> None
if debug: if debug:
unittest.installHandler() unittest.installHandler()
if verbose or debug: logging.basicConfig(
logging.basicConfig(level=logging.DEBUG) level=logging.DEBUG if (verbose or debug) else logging.WARNING)
installSoftwareUrlList(cls, [software_url], debug=debug) installSoftwareUrlList(cls, [software_url], debug=debug)
return setUpModule, SlapOSInstanceTestCase_ return setUpModule, SlapOSInstanceTestCase_
...@@ -260,7 +270,7 @@ class SlapOSInstanceTestCase(unittest.TestCase): ...@@ -260,7 +270,7 @@ class SlapOSInstanceTestCase(unittest.TestCase):
# maximum retries for `slapos node instance` # maximum retries for `slapos node instance`
instance_max_retry = 10 instance_max_retry = 10
# maximum retries for `slapos node report` # maximum retries for `slapos node report`
report_max_retry = 0 report_max_retry = 2
# number of partitions needed for this instance # number of partitions needed for this instance
partition_count = 10 partition_count = 10
# reference of the default requested partition # reference of the default requested partition
...@@ -274,6 +284,19 @@ class SlapOSInstanceTestCase(unittest.TestCase): ...@@ -274,6 +284,19 @@ class SlapOSInstanceTestCase(unittest.TestCase):
_ipv4_address = "" _ipv4_address = ""
_ipv6_address = "" _ipv6_address = ""
# a short name of that software URL.
# eg. helloworld instead of
# https://lab.nexedi.com/nexedi/slapos/raw/software/helloworld/software.cfg
software_id = ""
_base_directory = "" # base directory for standalone
_test_file_snapshot_directory = "" # directory to save snapshot files for inspections
# patterns of files to save for inspection, relative to instance directory
_save_instance_file_pattern_list = (
'*/etc/*',
'*/var/log/*',
'*/.*log',
)
# Methods to be defined by subclasses. # Methods to be defined by subclasses.
@classmethod @classmethod
def getSoftwareURL(cls): def getSoftwareURL(cls):
...@@ -361,6 +384,42 @@ class SlapOSInstanceTestCase(unittest.TestCase): ...@@ -361,6 +384,42 @@ class SlapOSInstanceTestCase(unittest.TestCase):
""" """
cls._cleanup() cls._cleanup()
def tearDown(self):
# copy log files from standalone
for standalone_log in glob.glob(os.path.join(
self._base_directory, 'var', 'log', '*')):
self._snapshot_instance_file(standalone_log)
# copy config and log files from partitions
for pattern in self._save_instance_file_pattern_list:
for f in glob.glob(os.path.join(self.slap.instance_directory, pattern)):
self._snapshot_instance_file(f)
def _snapshot_instance_file(self, source_file_name):
"""Save a file for later inspection.
The path are made relative to slapos root directory and
we keep the same directory structure.
"""
# we cannot use os.path.commonpath on python2, so implement something similar
common_path = os.path.commonprefix((source_file_name, self._base_directory))
if not os.path.isdir(common_path):
common_path = os.path.dirname(common_path)
relative_path = source_file_name[len(common_path):]
if relative_path[0] == os.sep:
relative_path = relative_path[1:]
destination = os.path.join(
self._test_file_snapshot_directory,
self.software_id,
self.id(),
relative_path)
destination_dirname = os.path.dirname(destination)
mkdir_p(destination_dirname)
if os.path.isfile(source_file_name):
self.logger.debug("copy %s as %s", source_file_name, destination)
shutil.copy(source_file_name, destination)
# implementation methods # implementation methods
@classmethod @classmethod
def _cleanup(cls): def _cleanup(cls):
...@@ -383,25 +442,30 @@ class SlapOSInstanceTestCase(unittest.TestCase): ...@@ -383,25 +442,30 @@ class SlapOSInstanceTestCase(unittest.TestCase):
cls.logger.critical( cls.logger.critical(
"The following partitions were not cleaned up: %s", "The following partitions were not cleaned up: %s",
[cp.getId() for cp in leaked_partitions]) [cp.getId() for cp in leaked_partitions])
for cp in leaked_partitions: for cp in leaked_partitions:
try:
cls.slap.request(
software_release=cp.getSoftwareRelease().getURI(),
# software_type=cp.getType(), # TODO
# XXX is this really the reference ?
partition_reference=cp.getInstanceParameterDict()['instance_title'],
state="destroyed")
except:
cls.logger.exception(
"Error during request destruction of leaked partition")
try: try:
cls.slap.request( cls.slap.waitForReport(max_retry=cls.report_max_retry, debug=cls._debug)
software_release=cp.getSoftwareRelease().getURI(),
# software_type=cp.getType(), # TODO
# XXX is this really the reference ?
partition_reference=cp.getInstanceParameterDict()['instance_title'],
state="destroyed")
except: except:
cls.logger.exception( cls.logger.exception("Error during leaked partitions actual destruction")
"Error during request destruction of leaked partition")
try:
cls.slap.waitForReport(max_retry=cls.report_max_retry, debug=cls._debug)
except:
cls.logger.exception("Error during leaked partitions actual destruction")
try: try:
cls.slap.stop() cls.slap.stop()
except: except:
cls.logger.exception("Error during stop") cls.logger.exception("Error during stop")
leaked_supervisor_configs = glob.glob(
os.path.join(cls.slap.instance_directory, 'etc', 'supervisord.conf.d', '*.conf'))
if leaked_supervisor_configs:
[os.unlink(config) for config in leaked_supervisor_configs]
raise AssertionError("Test leaked supervisor configurations: %s" % leaked_supervisor_configs)
@classmethod @classmethod
def requestDefaultInstance(cls, state='started'): def requestDefaultInstance(cls, state='started'):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment