Commit 14001ab2 authored by Julien Muchembled's avatar Julien Muchembled

monitor: fix stuck process

parent 8298d98a
......@@ -48,7 +48,7 @@ mode = 0644
recipe = hexagonit.recipe.download
url = ${:_profile_base_location_}/${:filename}
download-only = true
md5sum = cb2f15850d3dc82459a0044adb4416cf
md5sum = 49587347fd82d93cca27715f787f8bec
destination = ${buildout:parts-directory}/monitor-template-monitor-bin
filename = monitor.py.in
mode = 0644
......
......@@ -36,25 +36,29 @@ option_list = [
]
class Popen(subprocess.Popen):
__timeout = None
def timeout(self, delay, delay_before_kill=5):
if self.__timeout is not None: self.__timeout.cancel()
self.__timeout = threading.Timer(delay, self.stop, [delay_before_kill])
self.__timeout.start()
def waiter():
self.wait()
self.__timeout.cancel()
threading.Thread(target=waiter).start()
def stop(self, delay_before_kill=5):
if self.__timeout is not None: self.__timeout.cancel()
def set_timeout(self, timeout):
self.set_timeout = None # assert we're not called twice
event = threading.Event()
event.__killed = False # we just need a mutable
def t():
# do not call wait() or poll() because they're not thread-safe
if not event.wait(timeout) and self.returncode is None:
# race condition if waitpid completes just before the signal sent ?
self.terminate()
t = threading.Timer(delay_before_kill, self.kill)
event.__killed = True
if event.wait(5):
return
if self.returncode is None:
self.kill() # same race as for terminate ?
t = threading.Thread(target=t)
t.daemon = True
t.start()
r = self.wait()
t.cancel()
return r
def killed():
event.set()
t.join()
return event.__killed
return killed
def init_db():
db = sqlite3.connect(db_path)
......@@ -109,34 +113,23 @@ def runServices(directory):
def runScripts(directory):
scripts = getListOfScripts(directory)
# XXX script_timeout could be passed as parameters
script_timeout = 60 # in seconds
result = {}
for script in scripts:
command = [os.path.join(promise_dir, script)]
script = os.path.basename(command[0])
with open(os.devnull, 'r+') as f:
for script in getListOfScripts(directory):
command = os.path.join(promise_dir, script),
script = os.path.basename(script)
result[script] = ''
process_handler = Popen(command,
cwd=instance_path,
p = Popen(command, cwd=instance_path,
env=None if sys.platform == 'cygwin' else {},
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
process_handler.stdin.flush()
process_handler.stdin.close()
process_handler.stdin = None
process_handler.timeout(script_timeout)
process_handler.wait()
if process_handler.poll() is None:
process_handler.terminate()
stdin=f, stdout=f, stderr=subprocess.PIPE)
killed = p.set_timeout(script_timeout)
stderr = p.communicate()[1]
if killed():
result[script] = "Time Out"
elif process_handler.poll() != 0:
stderr = process_handler.communicate()[1]
if stderr is not None:
elif p.returncode:
result[script] = stderr.strip()
return result
......@@ -183,10 +176,6 @@ def main():
print json.dumps(monitors)
else:
writeFiles(monitors)
if len(monitors) == 0:
exit(0)
else:
exit(1)
if __name__ == "__main__":
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment