Commit 1dade5cd authored by Cédric Le Ninivin's avatar Cédric Le Ninivin

Added test and entry-point for slapos-watchdog

parent 16b2e8b8
...@@ -59,7 +59,7 @@ setup(name=name, ...@@ -59,7 +59,7 @@ setup(name=name,
'slapproxy = slapos.proxy:main', 'slapproxy = slapos.proxy:main',
'bang = slapos.bang:main', 'bang = slapos.bang:main',
'slapos = slapos.entry:main', 'slapos = slapos.entry:main',
'watchdog = slapos.grid.watchdog:main', 'slapos-watchdog = slapos.grid.watchdog:main',
] ]
}, },
test_suite="slapos.tests", test_suite="slapos.tests",
......
...@@ -32,6 +32,7 @@ import os ...@@ -32,6 +32,7 @@ import os
import shutil import shutil
import signal import signal
import slapos.slap.slap import slapos.slap.slap
from slapos.grid.watchdog import Watchdog, getWatchdogID
import socket import socket
import sys import sys
import tempfile import tempfile
...@@ -40,6 +41,33 @@ import unittest ...@@ -40,6 +41,33 @@ import unittest
import urlparse import urlparse
import xml_marshaller import xml_marshaller
WATCHDOG_TEMPLATE = """#!%(python_path)s -S
import sys
sys.path=%(sys_path)s
import slapos.slap.slap
import slapos.grid.watchdog
def setBang():
def getBang():
def bang(self_partition,message):
report = ""
for key in self_partition.__dict__:
report += (key + ': ' + str(self_partition.__dict__[key]) + ' ')
if key == '_connection_helper':
for el in self_partition.__dict__[key].__dict__:
report += (' ' + el +': ' +
str(self_partition.__dict__[key].__dict__[el]) + ' ')
report += message
open('%(watchdog_banged)s','w').write(report)
return bang
slapos.slap.ComputerPartition.bang = getBang()
setBang()
slapos.grid.watchdog.main()
"""
WRAPPER_CONTENT = """#!/bin/sh WRAPPER_CONTENT = """#!/bin/sh
touch worked && touch worked &&
mkdir -p etc/run && mkdir -p etc/run &&
...@@ -48,6 +76,19 @@ echo "while :; do echo "Working\\nWorking\\n" ; sleep 0.1; done" >> etc/run/wrap ...@@ -48,6 +76,19 @@ echo "while :; do echo "Working\\nWorking\\n" ; sleep 0.1; done" >> etc/run/wrap
chmod 755 etc/run/wrapper chmod 755 etc/run/wrapper
""" """
DAEMON_CONTENT = """#!/bin/sh
mkdir -p etc/service &&
echo "#!/bin/sh" > etc/service/daemon &&
echo "touch launched
if [ -f ./crashed ]; then
while :; do echo "Working\\nWorking\\n" ; sleep 0.1; done
else
touch ./crashed; echo "Failing\\nFailing\\n"; sleep 1; return 111;
fi" >> etc/service/daemon &&
chmod 755 etc/service/daemon &&
touch worked
"""
class BasicMixin: class BasicMixin:
def assertSortedListEqual(self, list1, list2, msg=None): def assertSortedListEqual(self, list1, list2, msg=None):
self.assertListEqual(sorted(list1), sorted(list2), msg) self.assertListEqual(sorted(list1), sorted(list2), msg)
...@@ -241,6 +282,8 @@ class ComputerForTest: ...@@ -241,6 +282,8 @@ class ComputerForTest:
if parsed_url.path == 'destroyedComputerPartition': if parsed_url.path == 'destroyedComputerPartition':
instance.state = 'destroyed' instance.state = 'destroyed'
return (200, {}, '') return (200, {}, '')
if parsed_url.path == 'softwareInstanceBang':
return (200, {}, '')
if parsed_url.path == 'softwareInstanceError': if parsed_url.path == 'softwareInstanceError':
instance.error_log = '\n'.join([line for line \ instance.error_log = '\n'.join([line for line \
in parsed_qs['error_log'][0].splitlines() in parsed_qs['error_log'][0].splitlines()
...@@ -569,6 +612,177 @@ chmod 755 etc/run/wrapper ...@@ -569,6 +612,177 @@ chmod 755 etc/run/wrapper
self.assertEqual('stopped', instance.state) self.assertEqual('stopped', instance.state)
class TestSlapgridCPWithMasterWatchdog(MasterMixin, unittest.TestCase):
def test_one_failing_daemon_in_service_will_bang_with_watchdog(self):
"""
Check that a failing service watched by watchdog trigger bang
1.Prepare computer and set a service named daemon in etc/service
(to be watched by watchdog). This daemon will fail.
2.Prepare file for supervisord to call watchdog
-Set sys.path
-Monkeypatch computer partition bang
3.Check damemon is launched
4.Wait for it to fail
5.Wait for file generated by monkeypacthed bang to appear
"""
computer = ComputerForTest(self.software_root,self.instance_root)
partition = computer.instance_list[0]
partition.requested_state = 'started'
partition.software.setBuildout(DAEMON_CONTENT)
# Prepare watchdog
watchdog_path = os.path.join(self._tempdir,'watchdog')
watchdog_banged = os.path.join(self._tempdir,'watchdog_banged')
open(watchdog_path,'w').write(
WATCHDOG_TEMPLATE % dict(python_path=sys.executable,
sys_path=sys.path,
watchdog_banged=watchdog_banged))
os.chmod(watchdog_path,0755)
self.grid.watchdog_path = watchdog_path
self.assertTrue(self.grid.processComputerPartitionList())
self.assertSortedListEqual(os.listdir(self.instance_root), ['0', 'etc',
'var'])
self.assertSortedListEqual(os.listdir(partition.partition_path),
['.0_daemon.log','worked', 'buildout.cfg', 'etc'])
tries = 10
daemon_log = os.path.join(partition.partition_path, '.0_daemon.log')
while tries > 0:
tries -= 1
if os.path.getsize(daemon_log) > 0:
break
time.sleep(0.2)
self.assertTrue('Failing' in open(daemon_log, 'r').read())
tries = 25
while tries > 0:
tries -= 1
if os.path.exists(watchdog_banged):
break
time.sleep(0.2)
self.assertTrue(os.path.exists(watchdog_banged))
self.assertTrue('daemon' in open(watchdog_banged,'r').read())
RUN_CONTENT = """#!/bin/sh
mkdir -p etc/run &&
echo "#!/bin/sh" > etc/run/daemon &&
echo "touch launched
touch ./crashed; echo "Failing\\nFailing\\n"; sleep 1; return 111;
" >> etc/run/daemon &&
chmod 755 etc/run/daemon &&
touch worked
"""
def test_one_failing_daemon_in_run_will_not_bang_with_watchdog(self):
"""
Check that a failing service watched by watchdog trigger bang
1.Prepare computer and set a service named daemon in etc/run
(not watched by watchdog). This daemon will fail.
2.Prepare file for supervisord to call watchdog
-Set sys.path
-Monkeypatch computer partition bang
3.Check damemon is launched
4.Wait for it to fail
5.Check that file generated by monkeypacthed bang do not appear
"""
computer = ComputerForTest(self.software_root,self.instance_root)
partition = computer.instance_list[0]
partition.requested_state = 'started'
partition.software.setBuildout(self.RUN_CONTENT)
# Prepare watchdog
watchdog_path = os.path.join(self._tempdir,'watchdog')
watchdog_banged = os.path.join(self._tempdir,'watchdog_banged')
open(watchdog_path,'w').write(
WATCHDOG_TEMPLATE % dict(python_path=sys.executable,
sys_path=sys.path,
watchdog_banged=watchdog_banged))
os.chmod(watchdog_path,0755)
self.grid.watchdog_path = watchdog_path
self.assertTrue(self.grid.processComputerPartitionList())
self.assertSortedListEqual(os.listdir(self.instance_root), ['0', 'etc',
'var'])
self.assertSortedListEqual(os.listdir(partition.partition_path),
['.0_daemon.log','worked', 'buildout.cfg', 'etc'])
tries = 10
daemon_log = os.path.join(partition.partition_path, '.0_daemon.log')
while tries > 0:
tries -= 1
if os.path.getsize(daemon_log) > 0:
break
time.sleep(0.2)
self.assertTrue('Failing' in open(daemon_log, 'r').read())
tries = 25
while tries > 0:
tries -= 1
if os.path.exists(watchdog_banged):
break
time.sleep(0.2)
self.assertFalse(os.path.exists(watchdog_banged))
def test_watched_by_watchdog_bang(self):
"""
Test that a process going to fatal or exited mode in supervisord
is banged if watched by watchdog
(ie: watchdog id in process name)
"""
computer = ComputerForTest(self.software_root,self.instance_root)
instance = computer.instance_list[0]
watchdog = Watchdog(dict(master_url=self.master_url,
computer_id=self.computer_id,
key_file=None,
cert_file=None))
for event in watchdog.process_state_events:
instance.sequence = []
headers = dict(eventname=event)
payload = "processname:%s groupname:%s from_state:RUNNING"\
% ('daemon'+getWatchdogID(),instance.name)
watchdog.handle_event(headers,payload)
self.assertEqual(instance.sequence,['softwareInstanceBang'])
def test_unwanted_events_will_not_bang(self):
"""
Test that a process going to a mode not watched by watchdog
in supervisord is not banged if watched by watchdog
"""
computer = ComputerForTest(self.software_root,self.instance_root)
instance = computer.instance_list[0]
watchdog = Watchdog(dict(master_url=self.master_url,
computer_id=self.computer_id,
key_file=None,
cert_file=None))
for event in ['EVENT', 'PROCESS_STATE', 'PROCESS_STATE_RUNNING',
'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPED']:
computer.sequence = []
headers = dict(eventname=event)
payload = "processname:%s groupname:%s from_state:RUNNING"\
% ('daemon'+getWatchdogID(),instance.name)
watchdog.handle_event(headers,payload)
self.assertEqual(instance.sequence,[])
def test_not_watched_by_watchdog_do_not_bang(self):
"""
Test that a process going to fatal or exited mode in supervisord
is not banged if not watched by watchdog
(ie: no watchdog id in process name)
"""
computer = ComputerForTest(self.software_root,self.instance_root)
instance = computer.instance_list[0]
watchdog = Watchdog(dict(master_url=self.master_url,
computer_id=self.computer_id,
key_file=None,
cert_file=None))
for event in watchdog.process_state_events:
computer.sequence = []
headers = dict(eventname=event)
payload = "processname:%s groupname:%s from_state:RUNNING"\
% ('daemon',instance.name)
watchdog.handle_event(headers,payload)
self.assertEqual(computer.sequence,[])
class TestSlapgridCPPartitionProcessing (MasterMixin, unittest.TestCase): class TestSlapgridCPPartitionProcessing (MasterMixin, unittest.TestCase):
def test_partition_timestamp(self): def test_partition_timestamp(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment