Commit db2d4929 authored by Xavier Thompson's avatar Xavier Thompson

SlapPopen: Improve logging and fix stderr and timeouts

See merge request nexedi/slapos.core!458
parents b0100fa6 4bdf69f8
Pipeline #25179 failed with stage
in 0 seconds
......@@ -59,7 +59,11 @@ from slapos import manager as slapmanager
from slapos.slap.slap import NotFoundError
from slapos.slap.slap import ServerError
from slapos.slap.slap import COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
from slapos.util import mkdir_p, chownDirectory, string_to_boolean, listifdir
from slapos.util import (mkdir_p,
chownDirectory,
string_to_boolean,
listifdir,
unicode2str)
from slapos.grid.exception import BuildoutFailedError
from slapos.grid.SlapObject import Software, Partition
from slapos.grid.svcbackend import (launchSupervisord,
......@@ -68,7 +72,6 @@ from slapos.grid.svcbackend import (launchSupervisord,
_getSupervisordSocketPath)
from slapos.grid.utils import (md5digest,
dropPrivileges,
killProcessTree,
SlapPopen,
updateFile)
from slapos.grid.promise import PromiseLauncher, PromiseError
......@@ -738,7 +741,6 @@ stderr_logfile_backups=1
os.dup2(1, 2)
dropPrivileges(uid, gid, logger=self.logger)
os.dup2(err, 2)
try:
process = SlapPopen(
command,
preexec_fn=preexec_fn,
......@@ -749,17 +751,12 @@ stderr_logfile_backups=1
logger=self.logger,
timeout=timeout,
)
stderr = process.stderr.read()
if process.returncode == 2:
raise PromiseError(stderr)
raise PromiseError(unicode2str(process.error))
elif process.returncode:
raise Exception(stderr)
elif stderr:
self.logger.warn('Unexpected promise runner output:\n%s', stderr)
except subprocess.TimeoutExpired:
killProcessTree(process.pid, self.logger)
# If this happens, it might be that the timeout margin is too small.
raise Exception('Promise runner timed out')
raise Exception(unicode2str(process.error))
elif process.error:
self.logger.warn('Unexpected promise runner output:\n%s', process.error)
else:
return PromiseLauncher(config=promise_config, logger=self.logger).run()
......
......@@ -33,9 +33,9 @@ import hashlib
import os
import pkg_resources
import pwd
import select
import stat
import sys
import threading
import logging
import psutil
import time
......@@ -93,24 +93,27 @@ LOCALE_ENVIRONMENT_REMOVE_LIST = [
]
def logAndAccumulateOutput(process_stdout, buffer, logger):
"""Read process output and place the output in `buffer`, logging the lines
one by one as they are emitted.
class LineLogger(object):
"""
current_output = ''
while 1:
data = os.read(process_stdout.fileno(), 4096)
if not data:
return
data = data.decode('utf-8', 'replace')
buffer.append(data)
current_output += data
for current_output_line in current_output.splitlines(True):
if current_output_line.endswith('\n'):
logger.info(current_output_line.rstrip('\n'))
current_output = ''
else:
current_output = current_output_line
Logger that takes chunks cut arbitrarily and logs them back line by line.
"""
def __init__(self, logger):
self.logger = logger
self.current = ''
def update(self, data):
lines = (self.current + data).splitlines()
self.current = lines.pop()
for line in lines:
self.logger.info(line)
if data.endswith('\n'):
self.logger.info(self.current)
self.current = ''
def flush(self):
if self.current:
self.logger.info(self.current)
self.current = ''
class SlapPopen(subprocess.Popen):
......@@ -145,24 +148,67 @@ class SlapPopen(subprocess.Popen):
if debug:
self.wait()
self.output = '(output not captured in debug mode)'
self.error = '(error not captured in debug mode)'
return
self.stdin.flush()
self.stdin.close()
self.stdin = None
output_lines = []
stderr_fileno = stdout_fileno = None
buffers = {}
if kwargs['stdout'] is subprocess.PIPE:
line_logger = LineLogger(logger)
stdout_fileno = self.stdout.fileno()
buffers[stdout_fileno] = []
if kwargs['stderr'] is subprocess.PIPE:
stderr_fileno = self.stderr.fileno()
buffers[stderr_fileno] = []
poll = select.poll()
for fd in buffers:
poll.register(fd)
active = len(buffers)
if timeout is not None:
deadline = time.time() + timeout
while active:
for fd, _ in poll.poll(timeout):
data = os.read(fd, 4096).decode('utf-8', 'replace')
if data:
if fd == stdout_fileno:
line_logger.update(data)
buffers[fd].append(data)
else:
if fd == stdout_fileno:
line_logger.flush()
poll.unregister(fd)
active -= 1
if timeout is not None:
timeout = deadline - time.time()
if timeout <= 0:
timeout = 0
break
# BBB: reading output in a separate thread is not needed on python 3,
# iterating on self.stdout seems enough.
t = threading.Thread(
target=logAndAccumulateOutput,
args=(self.stdout, output_lines, logger))
t.start()
try:
self.wait(timeout=timeout)
except subprocess.TimeoutExpired as e:
for p in killProcessTree(self.pid, logger):
p.wait(timeout=10) # arbitrary timeout, wait until process is killed
self.poll() # set returncode (and avoid still-running warning)
e.output = e.stdout = ''.join(buffers.get(stdout_fileno, ()))
e.stderr = ''.join(buffers.get(stderr_fileno, ()))
raise
finally:
t.join()
self.output = ''.join(output_lines)
for s in (self.stdout, self.stderr):
if s:
try:
s.close()
except OSError:
pass
self.output = ''.join(buffers.get(stdout_fileno, ()))
self.error = ''.join(buffers.get(stderr_fileno, ()))
def md5digest(url):
......@@ -449,7 +495,7 @@ def killProcessTree(pid, logger):
process = psutil.Process(pid)
process.suspend()
except psutil.Error:
return
return ()
process_list = [process]
running_process_list = process.children(recursive=True)
......@@ -469,3 +515,5 @@ def killProcessTree(pid, logger):
process.kill()
except psutil.Error as e:
logger.debug("Process kill: %s" % e)
return process_list
......@@ -27,12 +27,18 @@
from __future__ import unicode_literals
import logging
import os
import subprocess
import psutil
import sys
import tempfile
import textwrap
import time
import unittest
if sys.version_info >= (3,):
import subprocess
else:
import subprocess32 as subprocess
import mock
import slapos.grid.utils
......@@ -169,6 +175,232 @@ class SlapPopenTestCase(unittest.TestCase):
for fd in (child_stdin_r, child_stdout_r, child_stdout_w, stdin_backup, stdout_backup):
os.close(fd)
def test_stderr(self):
self.script.write(textwrap.dedent("""\
#!/bin/sh
>&2 echo "hello"
exit 123
""").encode())
self.script.close()
logger = mock.MagicMock()
program = slapos.grid.utils.SlapPopen(
self.script.name,
stdout=None,
stderr=subprocess.PIPE,
logger=logger)
# error code, and error output are returned
self.assertEqual(123, program.returncode)
self.assertEqual('hello\n', program.error)
self.assertEqual('', program.output)
# no output, nothing is logged "live"
self.assertFalse(logger.info.called)
def test_stdout_and_stderr(self):
self.script.write(textwrap.dedent("""\
#!/bin/sh
echo "hello"
>&2 echo "world"
exit 123
""").encode())
self.script.close()
logger = mock.MagicMock()
program = slapos.grid.utils.SlapPopen(
self.script.name,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
logger=logger)
# error code, stderr and stdout are returned
self.assertEqual(123, program.returncode)
self.assertEqual('hello\n', program.output)
self.assertEqual('world\n', program.error)
# only stdout is logged
logger.info.assert_called_once_with('hello')
def test_timeout_stdout_multiline(self):
self.script.write(textwrap.dedent("""\
#!/bin/sh
for i in $(seq 100)
do
echo .
sleep 0.1
done
""").encode())
self.script.close()
logger = mock.MagicMock()
start = time.time()
with self.assertRaises(subprocess.TimeoutExpired) as cm:
program = slapos.grid.utils.SlapPopen(
self.script.name,
timeout=1,
logger=logger)
# the timeout was respected
elapsed = time.time() - start
self.assertLess(elapsed, 5)
self.assertGreaterEqual(elapsed, 1)
# the output before timeout is captured
self.assertEqual(cm.exception.output, '.\n' * 10)
# each line before timeout is logged "live" as well
self.assertEqual(logger.info.call_args_list, [mock.call('.')] * 10)
def test_timeout_stdout_oneline(self):
self.script.write(textwrap.dedent("""\
#!/bin/sh
for i in $(seq 100)
do
echo -n .
sleep 0.1
done
""").encode())
self.script.close()
logger = mock.MagicMock()
start = time.time()
with self.assertRaises(subprocess.TimeoutExpired) as cm:
program = slapos.grid.utils.SlapPopen(
self.script.name,
timeout=1,
logger=logger)
# the timeout was respected
elapsed = time.time() - start
self.assertLess(elapsed, 5)
self.assertGreaterEqual(elapsed, 1)
# the output before timeout is captured
self.assertEqual(cm.exception.output, '.' * 10)
# endline is never reached, so nothing is logged "live"
self.assertFalse(logger.info.called)
def test_timeout_stdout_and_stderr(self):
self.script.write(textwrap.dedent("""\
#!/bin/sh
for i in $(seq 100)
do
>&2 echo -n -
echo -n .
sleep 0.1
done
""").encode())
self.script.close()
logger = mock.MagicMock()
start = time.time()
with self.assertRaises(subprocess.TimeoutExpired) as cm:
program = slapos.grid.utils.SlapPopen(
self.script.name,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=1,
logger=logger)
# the timeout was respected
elapsed = time.time() - start
self.assertLess(elapsed, 5)
self.assertGreaterEqual(elapsed, 1)
# the output before timeout is captured
self.assertEqual(cm.exception.output, '.' * 10)
self.assertEqual(cm.exception.stderr, '-' * 10)
# endline is never reached, so nothing is logged "live"
self.assertFalse(logger.info.called)
def test_timeout_no_stdout_no_stderr(self):
self.script.write(b'#!/bin/sh\nsleep 20')
self.script.close()
logger = mock.MagicMock()
start = time.time()
with self.assertRaises(subprocess.TimeoutExpired) as cm:
program = slapos.grid.utils.SlapPopen(
self.script.name,
timeout=1,
logger=logger)
# the timeout was respected
elapsed = time.time() - start
self.assertLess(elapsed, 5)
self.assertGreaterEqual(elapsed, 1)
# no output
self.assertEqual(cm.exception.output, '')
self.assertEqual(cm.exception.stderr, '')
# nothing is logged "live"
self.assertFalse(logger.info.called)
def test_timeout_killed(self):
self.script.write(b'#!/bin/sh\necho -n $$\nsleep 20')
self.script.close()
logger = mock.MagicMock()
start = time.time()
with self.assertRaises(subprocess.TimeoutExpired) as cm:
program = slapos.grid.utils.SlapPopen(
self.script.name,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=1,
logger=logger)
# the timeout was respected
elapsed = time.time() - start
self.assertLess(elapsed, 5)
self.assertGreaterEqual(elapsed, 1)
# output pid
pid = int(cm.exception.output)
self.assertEqual(cm.exception.stderr, '')
# subprocess has been killed
self.assertFalse(psutil.pid_exists(pid))
# endline is never reached, so nothing is logged "live"
self.assertFalse(logger.info.called)
def test_timeout_killed_grandchild(self):
self.script.write(textwrap.dedent("""\
#!/bin/sh
(echo $(exec /bin/sh -c 'echo "$PPID"'); sleep 20)
""").encode())
self.script.close()
logger = mock.MagicMock()
start = time.time()
with self.assertRaises(subprocess.TimeoutExpired) as cm:
program = slapos.grid.utils.SlapPopen(
self.script.name,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=1,
logger=logger)
# the timeout was respected
elapsed = time.time() - start
self.assertLess(elapsed, 5)
self.assertGreaterEqual(elapsed, 1)
# output pid
pid = int(cm.exception.output)
self.assertEqual(cm.exception.stderr, '')
# sub-subprocess has been killed
self.assertFalse(psutil.pid_exists(pid))
# the pid is logged "live"
logger.info.assert_called_once_with(str(pid))
class DummySystemExit(Exception):
"""Dummy exception raised instead of SystemExit so that if something goes
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment