Commit fb9db1c7 authored by Julien Muchembled's avatar Julien Muchembled Committed by Alain Takoudjou

monitor: fix stuck process

parent 25af007b
...@@ -53,7 +53,7 @@ mode = 0644 ...@@ -53,7 +53,7 @@ mode = 0644
recipe = hexagonit.recipe.download recipe = hexagonit.recipe.download
url = ${:_profile_base_location_}/${:filename} url = ${:_profile_base_location_}/${:filename}
download-only = true download-only = true
md5sum = cb2f15850d3dc82459a0044adb4416cf md5sum = 49587347fd82d93cca27715f787f8bec
destination = ${buildout:parts-directory}/monitor-template-monitor-bin destination = ${buildout:parts-directory}/monitor-template-monitor-bin
filename = monitor.py.in filename = monitor.py.in
mode = 0644 mode = 0644
......
...@@ -36,25 +36,29 @@ option_list = [ ...@@ -36,25 +36,29 @@ option_list = [
] ]
class Popen(subprocess.Popen): class Popen(subprocess.Popen):
__timeout = None
def set_timeout(self, timeout):
def timeout(self, delay, delay_before_kill=5): self.set_timeout = None # assert we're not called twice
if self.__timeout is not None: self.__timeout.cancel() event = threading.Event()
self.__timeout = threading.Timer(delay, self.stop, [delay_before_kill]) event.__killed = False # we just need a mutable
self.__timeout.start() def t():
def waiter(): # do not call wait() or poll() because they're not thread-safe
self.wait() if not event.wait(timeout) and self.returncode is None:
self.__timeout.cancel() # race condition if waitpid completes just before the signal sent ?
threading.Thread(target=waiter).start() self.terminate()
event.__killed = True
def stop(self, delay_before_kill=5): if event.wait(5):
if self.__timeout is not None: self.__timeout.cancel() return
self.terminate() if self.returncode is None:
t = threading.Timer(delay_before_kill, self.kill) self.kill() # same race as for terminate ?
t = threading.Thread(target=t)
t.daemon = True
t.start() t.start()
r = self.wait() def killed():
t.cancel() event.set()
return r t.join()
return event.__killed
return killed
def init_db(): def init_db():
db = sqlite3.connect(db_path) db = sqlite3.connect(db_path)
...@@ -109,34 +113,23 @@ def runServices(directory): ...@@ -109,34 +113,23 @@ def runServices(directory):
def runScripts(directory): def runScripts(directory):
scripts = getListOfScripts(directory)
# XXX script_timeout could be passed as parameters # XXX script_timeout could be passed as parameters
script_timeout = 60 # in seconds script_timeout = 60 # in seconds
result = {} result = {}
for script in scripts: with open(os.devnull, 'r+') as f:
command = [os.path.join(promise_dir, script)] for script in getListOfScripts(directory):
script = os.path.basename(command[0]) command = os.path.join(promise_dir, script),
result[script] = '' script = os.path.basename(script)
result[script] = ''
process_handler = Popen(command,
cwd=instance_path, p = Popen(command, cwd=instance_path,
env=None if sys.platform == 'cygwin' else {}, env=None if sys.platform == 'cygwin' else {},
stdout=subprocess.PIPE, stdin=f, stdout=f, stderr=subprocess.PIPE)
stderr=subprocess.PIPE, killed = p.set_timeout(script_timeout)
stdin=subprocess.PIPE) stderr = p.communicate()[1]
process_handler.stdin.flush() if killed():
process_handler.stdin.close() result[script] = "Time Out"
process_handler.stdin = None elif p.returncode:
process_handler.timeout(script_timeout)
process_handler.wait()
if process_handler.poll() is None:
process_handler.terminate()
result[script] = "Time Out"
elif process_handler.poll() != 0:
stderr = process_handler.communicate()[1]
if stderr is not None:
result[script] = stderr.strip() result[script] = stderr.strip()
return result return result
...@@ -183,10 +176,6 @@ def main(): ...@@ -183,10 +176,6 @@ def main():
print json.dumps(monitors) print json.dumps(monitors)
else: else:
writeFiles(monitors) writeFiles(monitors)
if len(monitors) == 0:
exit(0)
else:
exit(1)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment