Commit 203a89b0 authored by Ivan Tyagov's avatar Ivan Tyagov

All XML-RPC methods must be executed within *safe* method which allows

restart if task distribution node is down.
Refactor.
parent e267ed79
...@@ -34,6 +34,9 @@ import time ...@@ -34,6 +34,9 @@ import time
import xmlrpclib import xmlrpclib
import glob import glob
import SlapOSControler import SlapOSControler
import logging
DEFAULT_SLEEP_TIMEOUT = 120 # time in seconds to sleep
class SubprocessError(EnvironmentError): class SubprocessError(EnvironmentError):
def __init__(self, status_dict): def __init__(self, status_dict):
...@@ -43,7 +46,6 @@ class SubprocessError(EnvironmentError): ...@@ -43,7 +46,6 @@ class SubprocessError(EnvironmentError):
def __str__(self): def __str__(self):
return 'Error %i' % self.status_code return 'Error %i' % self.status_code
from Updater import Updater from Updater import Updater
supervisord_pid_file = None supervisord_pid_file = None
...@@ -58,8 +60,9 @@ def sigterm_handler(signal, frame): ...@@ -58,8 +60,9 @@ def sigterm_handler(signal, frame):
signal.signal(signal.SIGTERM, sigterm_handler) signal.signal(signal.SIGTERM, sigterm_handler)
import logging
def safeRpcCall(function, *args): def safeRpcCall(function, *args):
# XXX: this method will try infinitive calls to backend
# this can cause testnode to looked "stalled"
retry = 64 retry = 64
while True: while True:
try: try:
...@@ -188,7 +191,7 @@ branch = %(branch)s ...@@ -188,7 +191,7 @@ branch = %(branch)s
revision = ','.join(full_revision_list) revision = ','.join(full_revision_list)
if previous_revision == revision: if previous_revision == revision:
log('Sleeping a bit') log('Sleeping a bit')
time.sleep(120) time.sleep(DEFAULT_SLEEP_TIMEOUT)
if not(retry_software): if not(retry_software):
continue continue
log('Retrying install') log('Retrying install')
...@@ -204,7 +207,7 @@ branch = %(branch)s ...@@ -204,7 +207,7 @@ branch = %(branch)s
portal = xmlrpclib.ServerProxy("%s%s" % portal = xmlrpclib.ServerProxy("%s%s" %
(portal_url, 'portal_task_distribution'), (portal_url, 'portal_task_distribution'),
allow_none=1) allow_none=1)
master = portal.portal_task_distribution master = portal
assert safeRpcCall(master.getProtocolRevision) == 1 assert safeRpcCall(master.getProtocolRevision) == 1
test_result = safeRpcCall(master.createTestResult, test_result = safeRpcCall(master.createTestResult,
config['test_suite'], revision, [], config['test_suite'], revision, [],
...@@ -235,7 +238,7 @@ branch = %(branch)s ...@@ -235,7 +238,7 @@ branch = %(branch)s
slapos_controler = SlapOSControler.SlapOSControler(config, slapos_controler = SlapOSControler.SlapOSControler(config,
process_group_pid_set=process_group_pid_set, log=log, process_group_pid_set=process_group_pid_set, log=log,
slapproxy_log=slapproxy_log) slapproxy_log=slapproxy_log)
for method_name in ("runSoftwareRelease", "runComputerPartition"): for method_name in ("runSoftwareRelease", "runComputerPartition",):
stdout, stderr = getInputOutputFileList(config, method_name) stdout, stderr = getInputOutputFileList(config, method_name)
slapos_method = getattr(slapos_controler, method_name) slapos_method = getattr(slapos_controler, method_name)
status_dict = slapos_method(config, status_dict = slapos_method(config,
...@@ -288,7 +291,7 @@ branch = %(branch)s ...@@ -288,7 +291,7 @@ branch = %(branch)s
if remote_test_result_needs_cleanup: if remote_test_result_needs_cleanup:
safeRpcCall(master.reportTaskFailure, safeRpcCall(master.reportTaskFailure,
test_result_path, e.status_dict, config['test_node_title']) test_result_path, e.status_dict, config['test_node_title'])
time.sleep(120) time.sleep(DEFAULT_SLEEP_TIMEOUT)
continue continue
finally: finally:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment