General alarms for node monitor

See merge request nexedi/slapos.toolbox!105

General alarms for node monitor
See merge request nexedi/slapos.toolbox!105
76f0a0bd · Justin · 3b7d673f · 3b0c79df · 76f0a0bd · 76f0a0bd
Commit 76f0a0bd authored Jan 18, 2023 by Justin
18 changed files
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,8 @@ for f in sorted(glob.glob(os.path.join('slapos', 'README.*.rst'))):

 long_description += open("CHANGES.txt").read() + "\n"

-test_require = ['mock', 'cryptography',]
+prediction_require = ['statsmodels', 'scipy', 'pandas']
+test_require = ['mock', 'cryptography',] + prediction_require

 setup(name=name,
      version=version,
@@ -61,6 +62,8 @@ setup(name=name,
        'lampconfigure':  ["mysqlclient"], #needed for MySQL Database access
        'zodbpack': ['ZODB3'], # needed to play with ZODB
        'flask_auth' : ["Flask-Auth"],
+        'pandas' : ['pandas'], # needed to monitor_partition_space promise
+        'prediction' : prediction_require, # needed to use ARIMA in check_free_disk_space
        'test': test_require,
      },
      tests_require=test_require,

--- a/slapos/monitor/collect.py
+++ b/slapos/monitor/collect.py
@@ -36,23 +36,12 @@ import time
 import json
 import argparse
 import psutil
-from time import strftime, gmtime
+from time import strftime
 from datetime import datetime, timedelta

 from slapos.collect.db import Database
 from slapos.collect.reporter import ConsumptionReportBase

-def get_date_scope():
-  return strftime('%Y-%m-%d', gmtime())
-
-def get_min_time():
-  gm = gmtime()
-  return '{}:{}:00'.format(gm.tm_hour, gm.tm_min - 1)
-
-def get_max_time():
-  gm = gmtime()
-  return '{}:{}:59'.format(gm.tm_hour, gm.tm_min - 1)
-
 def parseArguments():
  """
  Parse arguments for monitor collector instance.
@@ -114,11 +103,11 @@ class ResourceCollect:
    if where != "":
      where = "and %s" % where
    if not date_scope:
-      date_scope = get_date_scope()
+      date_scope = datetime.now().strftime('%Y-%m-%d')
    if not min_time:
-      min_time = get_min_time()
+      min_time = (datetime.now() - timedelta(minutes=1)).strftime('%H:%M:00')
    if not max_time:
-      max_time = get_max_time()
+      max_time = (datetime.now() - timedelta(minutes=1)).strftime('%H:%M:59')

    columns = """count(pid), SUM(cpu_percent) as cpu_result, SUM(cpu_time),
                MAX(cpu_num_threads), SUM(memory_percent), SUM(memory_rss), pid, SUM(io_rw_counter),
@@ -159,11 +148,11 @@ class ResourceCollect:
    if where != "":
      where = " and %s" % where
    if not date_scope:
-      date_scope = get_date_scope()
+      date_scope = datetime.now().strftime('%Y-%m-%d')
    if not min_time:
-      min_time = get_min_time()
+      min_time = (datetime.now() - timedelta(minutes=1)).strftime('%H:%M:00')
    if not max_time:
-      max_time = get_max_time()
+      max_time = (datetime.now() - timedelta(minutes=1)).strftime('%H:%M:59') 

    colums = """count(pid), SUM(cpu_percent), SUM(cpu_time), SUM(cpu_num_threads), SUM(memory_percent), 
                SUM(memory_rss), SUM(io_rw_counter), SUM(io_cycles_counter)"""  

--- a/slapos/promise/plugin/check_cpu_temperature.py
+++ b/slapos/promise/plugin/check_cpu_temperature.py
+import json
+import os
+import psutil
+import time
+
+from .util import JSONPromise
+
+from zope.interface import implementer
+from slapos.grid.promise import interface
+
+@implementer(interface.IPromise)
+class RunPromise(JSONPromise):
+  def __init__(self, config):
+    super(RunPromise, self).__init__(config)
+    self.setPeriodicity(float(self.getConfig('frequency', 2)))
+    self.avg_flag_file = self.getConfig('last-avg-computation-file', 'last_avg')
+    self.max_spot_temp = float(self.getConfig('max-spot-temp', 90)) # °C
+    self.max_avg_temp = float(self.getConfig('max-avg-temp', 80)) # °C
+    self.avg_temp_duration = int(self.getConfig('avg-temp-duration', 600)) # secondes
+
+  def sense(self):
+    success = True
+    # Get current temperature
+    try:
+      cpu_temp = psutil.sensors_temperatures()['coretemp'][0][1]
+    except (KeyError, IndexError) as e:
+      # Put logger.info to avoid errors when sensors are not 
+      # supported by OS (ex: VM)
+      self.logger.info("Could not read core temperature on VM")
+      return
+
+    # Check spot temperature
+    if cpu_temp > self.max_spot_temp:
+      success = False
+      self.logger.error(
+        "Temperature reached critical threshold: %s °C"
+        " (threshold is %s °C)",
+        cpu_temp, self.max_spot_temp)
+
+    # Log temperature
+    data = json.dumps({'cpu_temperature': cpu_temp})
+    self.json_logger.info("Temperature data", extra={'data': data})
+
+    # TODO: promise should compute average only with logs between interval
+    # Computer average temperature
+    avg_computation_period = self.avg_temp_duration / 4
+    try:
+      t = os.path.getmtime(self.avg_flag_file)
+    except OSError:
+      t = 0
+    if (time.time() - t) > avg_computation_period:
+      open(self.avg_flag_file, 'w').close()
+      temp_list = self.getJsonLogDataInterval(self.avg_temp_duration)
+      if temp_list:
+        avg_temp = sum(x['cpu_temperature'] for x in temp_list) / len(temp_list)
+        if avg_temp > self.max_avg_temp:
+          success = False
+          self.logger.error(
+            "Average temperature over the last %ds reached threshold: %s °C"
+            " (threshold is %s °C)",
+            self.avg_temp_duration, avg_temp, self.max_avg_temp)
+      else:
+        success = False
+        self.logger.error("Couldn't read temperature from log")
+
+    if success:
+      self.logger.info("Temperature OK (%s °C)", cpu_temp)
+
+  def test(self):
+    """
+    Called after sense() if the instance is still converging.
+    Returns success or failure based on sense results.
+
+    In this case, fail if the previous sensor result is negative.
+    """
+    return self._test(result_count=1, failure_amount=1)
+
+
+  def anomaly(self):
+    """
+    Called after sense() if the instance has finished converging.
+    Returns success or failure based on sense results.
+    Failure signals the instance has diverged.
+
+    In this case, fail if two out of the last three results are negative.
+    """
+    return self._anomaly(result_count=3, failure_amount=2)
--- a/slapos/promise/plugin/check_free_disk_space.py
+++ b/slapos/promise/plugin/check_free_disk_space.py
--- a/slapos/promise/plugin/check_network_errors_packets.py
+++ b/slapos/promise/plugin/check_network_errors_packets.py
+import psutil
+import math
+
+from zope.interface import implementer
+from slapos.grid.promise import interface
+from slapos.grid.promise.generic import GenericPromise
+
+@implementer(interface.IPromise)
+class RunPromise(GenericPromise):
+
+  def __init__(self, config):
+    super(RunPromise, self).__init__(config)
+    # Get reference values
+    self.setPeriodicity(float(self.getConfig('frequency', 5)))
+    self.max_lost_packets = int(self.getConfig('max-lost-packets-per-MB', 100))
+    self.max_error_messages = int(self.getConfig('max-error-messages-per-MB', 100))
+
+  def sense(self):
+    promise_success = True
+    # Get current Network statistics
+    network_data =  psutil.net_io_counters()
+    # Get total number of bytes recv and sent in MB (if > 1MB)
+    if (network_data.bytes_recv + network_data.bytes_sent) > 1e6:
+      total_MB = (network_data.bytes_recv + network_data.bytes_sent)/1e6
+    else:
+      total_MB = 1
+    # Get sum of errors and dropped packets
+    total_dropped = network_data.dropin + network_data.dropout
+    total_errors = network_data.errin + network_data.errout
+
+    # Check for network dropped packets
+    if total_dropped/total_MB >= self.max_lost_packets:
+      self.logger.error("Network packets lost reached critical threshold: %s "\
+        " (threshold is %s per MB)" % (math.ceil(total_dropped/total_MB), self.max_lost_packets))
+      promise_success = False
+
+    # Check for network errors
+    if total_errors/total_MB >= self.max_error_messages:
+      self.logger.error("Network errors reached critical threshold: %s "\
+        " (threshold is %s per MB)" % (math.ceil(total_errors/total_MB), self.max_error_messages))
+      promise_success = False
+
+    if promise_success:
+      self.logger.info("Network statistics OK")
+
+  def test(self):
+    """
+    Called after sense() if the instance is still converging.
+    Returns success or failure based on sense results.
+
+    In this case, fail if the previous sensor result is negative.
+    """
+    return self._test(result_count=1, failure_amount=1)
+
+
+  def anomaly(self):
+    """
+    Called after sense() if the instance has finished converging.
+    Returns success or failure based on sense results.
+    Failure signals the instance has diverged.
+
+    In this case, fail if two out of the last three results are negative.
+    """
+    return self._anomaly(result_count=3, failure_amount=2)
--- a/slapos/promise/plugin/check_network_transit.py
+++ b/slapos/promise/plugin/check_network_transit.py
+import json
+import os
+import psutil
+import time
+
+from psutil._common import bytes2human
+from .util import JSONPromise
+
+from zope.interface import implementer
+from slapos.grid.promise import interface
+
+@implementer(interface.IPromise)
+class RunPromise(JSONPromise):
+
+  def __init__(self, config):
+    super(RunPromise, self).__init__(config)
+    # Get reference values
+    self.setPeriodicity(float(self.getConfig('frequency', 1)))
+    self.last_transit_file = self.getConfig('last-transit-file', 'last_transit')
+    self.max_data_amount = float(self.getConfig('max-data-amount', 10e3))*1048576 #  MB converted into bytes
+    self.min_data_amount = float(self.getConfig('min-data-amount', 0.1))*1048576 #  MB converted into bytes
+    self.transit_period = int(self.getConfig('transit-period', 600)) # secondes
+
+  def sense(self):
+    promise_success = True    
+    # Get current network statistics, see https://psutil.readthedocs.io/en/latest/#network
+    network_data = psutil.net_io_counters(nowrap=True)
+    data_amount = network_data.bytes_recv + network_data.bytes_sent
+    # Log data amount
+    data = json.dumps({'network_data_amount': data_amount})
+    self.json_logger.info("Network data amount", extra={'data': data})
+
+    # Get last timestamp (i.e. last modification) of log file
+    try:
+      t = os.path.getmtime(self.last_transit_file)
+    except OSError:
+      t = 0
+    # We recalculate every quarter of transit_period since calculate over periodicity
+    # can be heavy in computation
+    if (time.time() - t) > self.transit_period / 4:
+      open(self.last_transit_file, 'w').close()
+      temp_list = self.getJsonLogDataInterval(self.transit_period)
+      if temp_list:
+        # If no previous data in log
+        if len(temp_list) == 1:
+          pass
+        else:
+          data_diff = temp_list[0]['network_data_amount'] - temp_list[-1]['network_data_amount']
+          if data_diff <= self.min_data_amount:
+            self.logger.error("Network congested, data amount over the last %s seconds "\
+              "reached minimum threshold: %7s (threshold is %7s)" 
+              % (self.transit_period, bytes2human(data_diff), bytes2human(self.min_data_amount)))
+            promise_success = False
+          if data_diff >= self.max_data_amount:
+            self.logger.error("Network congested, data amount over the last %s seconds "\
+              "reached maximum threshold: %7s (threshold is %7s)" 
+              % (self.transit_period, bytes2human(data_diff), bytes2human(self.max_data_amount)))
+            promise_success = False
+      else:
+        self.logger.error("Couldn't read network data from log")
+        promise_success = False
+
+    if promise_success:
+      self.logger.info("Network transit OK")
+
+  def test(self):
+    """
+    Called after sense() if the instance is still converging.
+    Returns success or failure based on sense results.
+
+    In this case, fail if the previous sensor result is negative.
+    """
+    return self._test(result_count=1, failure_amount=1)
+
+
+  def anomaly(self):
+    """
+    Called after sense() if the instance has finished converging.
+    Returns success or failure based on sense results.
+    Failure signals the instance has diverged.
+
+    In this case, fail if two out of the last three results are negative.
+    """
+    return self._anomaly(result_count=3, failure_amount=2)
--- a/slapos/promise/plugin/check_ram_usage.py
+++ b/slapos/promise/plugin/check_ram_usage.py
+import json
+import os
+import psutil
+import time
+
+from psutil._common import bytes2human
+from .util import JSONPromise
+
+from zope.interface import implementer
+from slapos.grid.promise import interface
+
+@implementer(interface.IPromise)
+class RunPromise(JSONPromise):
+
+  def __init__(self, config):
+    super(RunPromise, self).__init__(config)
+    # Get reference values
+    self.setPeriodicity(float(self.getConfig('frequency', 2)))
+    self.last_avg_ram_file = self.getConfig('last-avg-ram-file', 'last_avg')
+    self.min_threshold_ram = float(self.getConfig('min-threshold-ram', 500))*1048576 #  MB converted into bytes
+    self.min_avg_ram = float(self.getConfig('min-avg-ram', 1e3))*1048576 #  MB converted into bytes
+    self.avg_ram_period =  int(self.getConfig('avg-ram-period', 600)) # secondes
+
+  def sense(self):
+    promise_success = True
+    # Get current RAM usage
+    ram_data = psutil.virtual_memory()
+    # Check with min threshold and log error if below it
+    if ram_data.available <= self.min_threshold_ram:
+      self.logger.error("RAM usage reached critical threshold: %7s "\
+        " (threshold is %7s)" % (bytes2human(ram_data.available), bytes2human(self.min_threshold_ram)))
+      promise_success = False
+    
+    # Log RAM usage
+    data = json.dumps({'available_ram': ram_data.available})
+    self.json_logger.info("RAM data", extra={'data': data})
+
+    # Get last timestamp (i.e. last modification) of log file
+    try:
+      t = os.path.getmtime(self.last_avg_ram_file)
+    except OSError:
+      t = 0
+    # Get last available RAM from log file since avg_ram_period / 4
+    if (time.time() - t) > self.avg_ram_period / 4:
+      open(self.last_avg_ram_file, 'w').close()
+      temp_list = self.getJsonLogDataInterval(self.avg_ram_period)
+      if temp_list:
+        avg_ram = sum(map(lambda x: x['available_ram'], temp_list)) / len(temp_list)
+        if avg_ram < self.min_avg_ram:
+          self.logger.error("Average RAM usage over the last %s seconds "\
+            "reached threshold: %7s (threshold is %7s)" 
+            % (self.avg_ram_period, bytes2human(avg_ram), bytes2human(self.min_avg_ram)))
+          promise_success = False
+      else:
+        self.logger.error("Couldn't read available RAM from log")
+        promise_success = False
+
+    if promise_success:
+      self.logger.info("RAM usage OK")
+
+  def test(self):
+    """
+    Called after sense() if the instance is still converging.
+    Returns success or failure based on sense results.
+
+    In this case, fail if the previous sensor result is negative.
+    """
+    return self._test(result_count=1, failure_amount=1)
+
+
+  def anomaly(self):
+    """
+    Called after sense() if the instance has finished converging.
+    Returns success or failure based on sense results.
+    Failure signals the instance has diverged.
+
+    In this case, fail if two out of the last three results are negative.
+    """
+    return self._anomaly(result_count=3, failure_amount=2)
--- a/slapos/promise/plugin/check_server_cpu_load.py
+++ b/slapos/promise/plugin/check_server_cpu_load.py
@@ -12,7 +12,7 @@ class RunPromise(GenericPromise):
  def __init__(self, config):
    super(RunPromise, self).__init__(config)
    # test load every 3 minutes
-    self.setPeriodicity(minute=3)
+    self.setPeriodicity(float(self.getConfig('frequency', 3)))

  def checkCPULoad(self, tolerance=2.2):
    # tolerance=1.5 => accept CPU load up to 1.5 =150%

--- a/slapos/promise/plugin/monitor_partition_space.py
+++ b/slapos/promise/plugin/monitor_partition_space.py
+from __future__ import division
+
+from zope.interface import implementer
+from slapos.grid.promise import interface
+from slapos.grid.promise.generic import GenericPromise
+
+import os
+import sys
+import pwd
+
+import sqlite3
+import argparse
+import datetime
+import psutil
+import math
+import pkgutil
+
+# try to install pandas and numpy
+try:
+  import pandas as pd
+  import numpy as np
+except ImportError:
+  pass
+
+from slapos.collect.db import Database
+from contextlib import closing
+
+@implementer(interface.IPromise)
+class RunPromise(GenericPromise):
+
+  def __init__(self, config):
+    super(RunPromise, self).__init__(config)
+    # at least every hours (heavy in computation)
+    self.setPeriodicity(float(self.getConfig('frequency', 60)))
+
+  def getDiskSize(self, disk_partition, db_path):
+    database = Database(db_path, create=False, timeout=10)
+    # by using contextlib.closing, we don't need to close the database explicitly
+    with closing(database):
+      try:
+        database.connect()
+        where_query = "partition='%s'" % (disk_partition)
+        order = "datetime(date || ' ' || time) DESC"
+        result = database.select(
+          "disk",
+          columns="free+used",
+          where=where_query,
+          order=order,
+          limit=1).fetchone()
+        if not result or not result[0]:
+          return None
+        disk_size = result[0]
+      except sqlite3.OperationalError as e:
+        # if database is still locked after timeout expiration (another process is using it)
+        # we print warning message and try the promise at next run until max warn count
+        locked_message = "database is locked"
+        if locked_message in str(e) and \
+            not self.raiseOnDatabaseLocked(locked_message):
+          return None
+        raise
+    return disk_size
+
+  def getPartitionSize(self, disk_partition, db_path):
+    database = Database(db_path, create=False, timeout=10)
+    with closing(database):
+      try:
+        database.connect()
+        where_query = "partition='%s'" % (disk_partition)
+        order = "datetime(date || ' ' || time) DESC"
+        result = database.select(
+          "folder",
+          columns="disk_used*1024",
+          where=where_query,
+          order=order,
+          limit=1).fetchone()
+        if not result or not result[0]:
+          return None
+        partition_size = result[0]
+      except sqlite3.OperationalError as e:
+        # if database is still locked after timeout expiration (another process is using it)
+        # we print warning message and try the promise at next run until max warn count
+        locked_message = "database is locked"
+        if locked_message in str(e) and \
+            not self.raiseOnDatabaseLocked(locked_message):
+          return None
+        raise
+    return partition_size
+
+  def getAnomaly(self, disk_partition, db_path, user, date, time):
+    database = Database(db_path, create=False, timeout=10)
+    with closing(database):
+      try:
+        disk_size = self.getDiskSize(disk_partition, db_path)
+        if disk_size is None:
+          return None
+        database.connect()
+        result = database.select(
+          "folder",
+          columns = "%s-disk_used*1024, disk_used*1024, datetime(date || ' ' || time)" % disk_size,
+          where =  "partition='%s'" % (user),
+          order = "date ASC, time ASC"
+        ).fetchall()
+        if not result or not result[0]:
+          self.logger.info("No result from collector database for the user %s: skipped", user)
+          return None
+        datetime_now = datetime.datetime.strptime(date + ' ' + time, "%Y-%m-%d %H:%M:%S")
+        # check that the last data is less than 24 hours old
+        last_date = datetime.datetime.strptime(result[-1][2], "%Y-%m-%d %H:%M:%S")
+        if (datetime_now - last_date) > datetime.timedelta(days=1):
+          self.logger.info("Not enough recent data to detect anomalies: skipped")
+          return None
+        # check that the first data is at least 13 days old
+        first_date = datetime.datetime.strptime(result[0][2], "%Y-%m-%d %H:%M:%S")
+        if (datetime_now - first_date) < datetime.timedelta(days=13):
+          self.logger.info("Not enough data to detect anomalies: skipped")
+          return None
+
+        df = pd.DataFrame(result, columns=["free", "used", "date"])
+        df.loc[:,'date'] = pd.to_datetime(df.date)
+        # keep a sample every 5 minutes, set NaN when there is no information
+        freq = 5
+        df = df.resample(str(freq)+"min", on='date').mean()
+        # estimate the missing information
+        df['free'] = df.free.astype(float).interpolate(method='linear')
+        df['used'] = df.used.astype(float).interpolate(method='linear')
+        # calculate the median for the element-wise absolute value
+        # of the difference between each x and the median of x
+        df = df.reset_index()
+        x = df['date']
+        y = df['free']
+        mad = lambda x: np.median(np.fabs(x - np.median(x)))
+        # threshold is set at 8% of the disk size by default
+        threshold_ratio = float(self.getConfig('threshold-ratio', 0.08) or 0.08)
+        threshold = threshold_ratio*disk_size
+        # use a 1-day window
+        minutes_per_day = 60*24/freq
+        rolling_window = int(minutes_per_day*1)
+        rolling_mad = y.rolling(window=rolling_window, center=False).median() + \
+          y.rolling(window=rolling_window, center=False).apply(mad)
+        rolling_mad_upper = rolling_mad + threshold
+        rolling_mad_lower = rolling_mad - threshold
+        # create Pandas DataFrame and rename columns
+        data = pd.concat([x, y, df['used'], rolling_mad, rolling_mad_upper, rolling_mad_lower], axis=1)
+        data.columns = ["date", "free", "used", "mad", "upper_mad", "lower_mad"]
+        # drop initial values (outside rolling window)
+        data.dropna(subset=["mad"], inplace=True)
+        # determine anomalies and display their number
+        data["is_anomaly"] = ~(data["free"].between(data["lower_mad"], data["upper_mad"]))
+        data = data.set_index("date")
+        if (len(data)==0):
+          self.logger.info("No result from anomaly detection")
+          return None
+        self.logger.info("There were %s anomalies in the last 15 days " \
+          "(1 data every %s minutes, threshold: %s %% of the disk size)" % (
+          len(data[data['is_anomaly'] == True]), freq, threshold_ratio*100))
+        return data
+      except sqlite3.OperationalError as e:
+        # if database is still locked after timeout expiration (another process is using it)
+        # we print warning message and try the promise at next run until max warn count
+        locked_message = "database is locked"
+        if locked_message in str(e) and \
+            not self.raiseOnDatabaseLocked(locked_message):
+          return None
+        raise
+
+  @staticmethod
+  def _checkInodeUsage(path):
+    stat = os.statvfs(path)
+    total_inode = stat.f_files
+    if total_inode:
+      usage = 100 * (total_inode - stat.f_ffree) / total_inode
+      if usage >= 98:
+        return "Disk Inodes usage is really high: %.4f%%" % usage
+
+  def getInodeUsage(self, path):
+    return (self._checkInodeUsage(path) or
+       os.path.ismount('/tmp') and self._checkInodeUsage('/tmp') or
+       "")
+
+  def sense(self):
+    # check that the libraries are installed from the slapos.toolbox extra requires
+    pandas_found = pkgutil.find_loader("pandas")
+    numpy_found = pkgutil.find_loader("numpy")
+    if pandas_found is None or numpy_found is None:
+      self.logger.warning("Trying to use pandas but the module is not installed. Promise skipped.")
+      return
+    # find if a disk is mounted on the path
+    disk_partition = ""
+    db_path = self.getConfig('collectordb')
+    check_date = self.getConfig('test-check-date')
+    path = os.path.join(self.getPartitionFolder(), "") + "extrafolder"
+    partitions = psutil.disk_partitions()
+    while path is not '/':
+      if not disk_partition:
+        path = os.path.dirname(path)
+      else:
+        break
+      for p in partitions:
+        if p.mountpoint == path:
+          disk_partition = p.device
+          break
+    if not disk_partition:
+      self.logger.error("Couldn't find disk partition")
+      return
+
+    if db_path.endswith("collector.db"):
+      db_path=db_path[:-len("collector.db")]
+
+    if check_date:
+      # testing mode
+      currentdate = check_date
+      currenttime = self.getConfig('test-check-time', '09:30:30')
+      user = self.getConfig('test-partition', 'slapuser0')
+    else:
+      # get the user name of the partition
+      user = pwd.getpwuid(os.getuid()).pw_name
+      # get last minute
+      now = datetime.datetime.utcnow()
+      currentdate = now.strftime('%Y-%m-%d')
+      currenttime = now - datetime.timedelta(minutes=1)
+      currenttime = currenttime.time().strftime('%H:%M:%S')
+
+    partition_size = self.getPartitionSize(user, db_path)
+    data = self.getAnomaly(disk_partition, db_path, user, currentdate, currenttime)
+    if data is None:
+      return
+    last_data = data.iloc[-1]
+    last_date = data.index[-1]
+    if last_data.is_anomaly:
+      self.logger.error("Anomaly detected on %s. Space used by %s: %.2f G." % (
+        last_date, user, partition_size/(1024*1024*1024)))
+    else:
+      self.logger.info("No anomaly detected (last date checked: %s)" % (last_date))
+
+  def test(self):
+    return self._test(result_count=1, failure_amount=1)
+
+  def anomaly(self):
+    return self._test(result_count=3, failure_amount=3)
--- a/slapos/promise/plugin/util.py
+++ b/slapos/promise/plugin/util.py
+import itertools
+import json
+import logging
+import os
+import textwrap

+from dateutil import parser as dateparser
+from datetime import datetime
+from slapos.grid.promise.generic import GenericPromise
+
+
+def iter_reverse_lines(f):
+  """
+    Read lines from the end of the file
+  """
+  f.seek(0, os.SEEK_END)
+  while True:
+    try:
+      while f.seek(-2, os.SEEK_CUR) and f.read(1) != b'\n':
+        pass
+    except OSError:
+      return
+    pos = f.tell()
+    yield f.readline()
+    f.seek(pos, os.SEEK_SET)
+
+
+def iter_logrotate_file_handle(path, mode='r'):
+  """
+    Yield successive file handles for rotated logs
+    (XX.log, XX.log.1, XX.log.2, ...)
+  """
+  for i in itertools.count():
+    path_i = path + str(i or '')
+    try:
+      with open(path_i, mode) as f:
+        yield f
+    except OSError:
+      break
+
+
+class JSONPromise(GenericPromise):
+  def __init__(self, config):
+    self.__name = config.get('name', None)
+    self.__log_folder = config.get('log-folder', None)
+
+    super(JSONPromise, self).__init__(config)
+    json_log_name = os.path.splitext(self.__name)[0] + '.json.log'
+    self.__json_log_file = os.path.join(self.__log_folder, json_log_name)
+    self.json_logger = self.__makeJsonLogger(self.__json_log_file)
+
+  def __makeJsonLogger(self, json_log_file):
+    logger = logging.getLogger('json-logger')
+    logger.setLevel(logging.INFO)
+    handler = logging.FileHandler(json_log_file)
+    formatter = logging.Formatter(
+      '{"time": "%(asctime)s", "log_level": "%(levelname)s"'
+      ', "message": "%(message)s", "data": %(data)s}'
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger
+
+  def getJsonLogDataInterval(self, interval):
+    """
+      Get all data in the last "interval" seconds from JSON log
+      Reads rotated logs too (XX.log, XX.log.1, XX.log.2, ...)
+    """
+    current_time = datetime.now()
+    data_list = []
+    for f in iter_logrotate_file_handle(self.__json_log_file, 'rb'):
+      for line in iter_reverse_lines(f):
+        l = json.loads(line.decode().replace("'", '"'))
+        timestamp = dateparser.parse(l['time'])
+        if (current_time - timestamp).total_seconds() > interval:
+          return data_list
+        data_list.append(l['data'])
+    return data_list
+
+  def getJsonLogLatestTimestamp(log):
+    """
+      Get latest timestamp from JSON log
+      Reads rotated logs too (XX.log, XX.log.1, XX.log.2, ...)
+    """
+    for f in iter_logrotate_file_handle(self.__json_log_file, 'rb'):
+      for line in iter_reverse_lines(f):
+        l = json.loads(line.decode().replace("'", '"'))
+        return dateparser.parse(l['time'])
+    return 0
+
+from dateutil import parser
+from slapos.grid.promise.generic import GenericPromise

 def tail_file(file_path, line_count=10):
  """
  Returns the last lines of file.
  """
-
  line_list = []
  with open(file_path) as f:
    BUFSIZ = 1024
@@ -25,5 +115,4 @@ def tail_file(file_path, line_count=10):
      size -= line_len
      bytes -= BUFSIZ
      block -= 1
-
-  return '\n'.join(''.join(line_list).splitlines()[-line_count:])
\ No newline at end of file
+  return '\n'.join(''.join(line_list).splitlines()[-line_count:])
--- a/slapos/test/promise/data/disktest.sql
+++ b/slapos/test/promise/data/disktest.sql
 BEGIN TRANSACTION;
 CREATE TABLE disk (partition text, used text, free text, mountpoint text,  date text, time text, reported integer NULL DEFAULT 0);
+CREATE TABLE folder (partition text, disk_used real,  date text, time text, reported integer NULL DEFAULT 0);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-18','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-19','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-20','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-21','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-22','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-23','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-24','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-25','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-26','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-27','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-28','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-29','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-09-30','09:17:01',1);
+INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-10-01','09:17:01',1);
 INSERT INTO "disk" VALUES('/dev/sda1','159220666368','288948396032','/','2017-10-02','09:17:01',1);
 INSERT INTO "disk" VALUES('/dev/sda1','159237537792','288931524608','/','2017-10-02','09:18:01',1);
 INSERT INTO "disk" VALUES('/dev/sda1','159238090752','288930971648','/','2017-10-02','09:19:02',1);
@@ -14,4 +29,7 @@ INSERT INTO "disk" VALUES('/dev/sda1','159429677056','288739385344','/','2017-10
 INSERT INTO "disk" VALUES('/dev/sda1','159444549632','288724512768','/','2017-10-02','09:28:03',1);
 INSERT INTO "disk" VALUES('/dev/sda1','159472902144','288696160256','/','2017-10-02','09:29:02',1);
 INSERT INTO "disk" VALUES('/dev/sda1','159476805632','288692256768','/','2017-10-02','09:30:03',1);
+INSERT INTO "folder" VALUES('slapuser0',87533020.0,'2017-10-02','09:17:00',1);
+INSERT INTO "folder" VALUES('slapuser1',21883255.0,'2017-10-02','09:17:00',1);
+INSERT INTO "folder" VALUES('slapuser2',43766510.0,'2017-10-02','09:17:00',1);
 COMMIT;
--- a/slapos/test/promise/data/folder_disk_test.sql
+++ b/slapos/test/promise/data/folder_disk_test.sql
+BEGIN TRANSACTION;
+CREATE TABLE disk (partition text, used text, free text, mountpoint text,  date text, time text, reported integer NULL DEFAULT 0);
+CREATE TABLE folder (partition text, disk_used real,  date text, time text, reported integer NULL DEFAULT 0);
+INSERT INTO "disk" VALUES('disk_partition_name','159220666368','288948396032','/','2017-10-02','09:17:01',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159237537792','288931524608','/','2017-10-02','09:18:01',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159238090752','288930971648','/','2017-10-02','09:19:02',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159241568256','288927494144','/','2017-10-02','09:20:02',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159242719232','288926343168','/','2017-10-02','09:21:02',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159196176384','288972886016','/','2017-10-02','09:22:03',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159300747264','288868315136','/','2017-10-02','09:23:03',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159294308352','288874754048','/','2017-10-02','09:24:02',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159328468992','288840593408','/','2017-10-02','09:25:03',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159384883200','288784179200','/','2017-10-02','09:26:03',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159429677056','288739385344','/','2017-10-02','09:27:02',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159444549632','288724512768','/','2017-10-02','09:28:03',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159472902144','288696160256','/','2017-10-02','09:29:02',1);
+INSERT INTO "disk" VALUES('disk_partition_name','159476805632','288692256768','/','2017-10-02','09:30:03',1);
+INSERT INTO "folder" VALUES('slapuser0',21883255.0,'2017-09-18','09:30:20',1);
+INSERT INTO "folder" VALUES('slapuser0',24071580.5,'2017-09-19','09:30:11',1);
+INSERT INTO "folder" VALUES('slapuser0',23196250.3,'2017-09-20','09:30:08',1);
+INSERT INTO "folder" VALUES('slapuser0',23415082.8,'2017-09-21','09:30:21',1);
+INSERT INTO "folder" VALUES('slapuser0',23633915.4,'2017-09-22','09:30:09',1);
+INSERT INTO "folder" VALUES('slapuser0',30636557.0,'2017-09-23','09:30:06',1);
+INSERT INTO "folder" VALUES('slapuser0',28448231.5,'2017-09-24','09:30:19',1);
+INSERT INTO "folder" VALUES('slapuser0',27572901.3,'2017-09-25','09:30:13',1);
+INSERT INTO "folder" VALUES('slapuser0',29761226.8,'2017-09-26','09:30:15',1);
+INSERT INTO "folder" VALUES('slapuser0',30855389.5,'2017-09-27','09:30:06',1);
+INSERT INTO "folder" VALUES('slapuser0',30636557.0,'2017-09-28','09:30:11',1);
+INSERT INTO "folder" VALUES('slapuser0',31074222.0,'2017-09-29','09:30:08',1);
+INSERT INTO "folder" VALUES('slapuser0',31096105.3,'2017-09-30','09:30:05',1);
+INSERT INTO "folder" VALUES('slapuser0',31949552.2,'2017-10-01','09:30:10',1);
+INSERT INTO "folder" VALUES('slapuser0',87533020.0,'2017-10-02','09:30:02',1);
+COMMIT;
\ No newline at end of file
--- a/slapos/test/promise/plugin/test_check_cpu_temperature.py
+++ b/slapos/test/promise/plugin/test_check_cpu_temperature.py
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsibility of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# guarantees and support are strongly advised to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+##############################################################################
+
+import mock
+import os
+import time
+from slapos.grid.promise import PromiseError
+from slapos.promise.plugin.check_cpu_temperature import RunPromise
+from . import TestPromisePluginMixin
+
+
+class TestCheckCpuTemperature(TestPromisePluginMixin):
+
+  promise_name = "monitor-cpu-temperature.py"
+
+  def setUp(self):
+    super(TestCheckCpuTemperature, self).setUp()
+
+  def writePromise(self, **kw):
+    super(TestCheckCpuTemperature, self).writePromise(self.promise_name,
+      "from %s import %s\nextra_config_dict = %r\n"
+      % (RunPromise.__module__, RunPromise.__name__, kw))
+
+  def runPromise(self, summary, failed=False):
+    self.configureLauncher(enable_anomaly=True, force=True)
+    with mock.patch('psutil.sensors_temperatures', return_value=summary):
+      if failed:
+        self.assertRaises(PromiseError, self.launcher.run)
+      else:
+        self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)['result']
+    self.assertEqual(result['failed'], failed)
+    return result['message']
+
+  def test_temp_ok(self):
+    message = "Temperature OK (50 °C)"
+    self.writePromise(**{
+        'last-avg-computation-file':'last_avg_computation_file',
+        'max-spot-temp': 80,
+        'max-avg-temp': 100,
+    })
+    self.assertEqual(message, self.runPromise({'coretemp': [[0, 50]]}))
+
+  def test_spot_critical(self):
+    message = "Temperature reached critical threshold: 90 °C (threshold is 80.0 °C)"
+    self.writePromise(**{
+        'last-avg-computation-file':'last_avg_computation_file',
+        'max-spot-temp': 80,
+        'max-avg-temp': 100,
+    })
+    self.assertEqual(message, self.runPromise({'coretemp': [[0, 90]]}))
+
+
+  def test_avg_critical(self):
+    message = "Average temperature over the last 1s reached threshold: 45.0 °C (threshold is 40.0 °C)"
+    self.writePromise(**{
+        'last-avg-computation-file':'last_avg_computation_file',
+        'max-spot-temp': 99999,
+        'max-avg-temp': 40,
+        'avg-temp-duration': 1,
+    })
+    m = self.runPromise({'coretemp': [[0, 0]]})
+    time.sleep(0.6)
+    m = self.runPromise({'coretemp': [[0, 0]]})
+    time.sleep(0.5)
+    self.assertEqual(message, self.runPromise({'coretemp': [[0, 90]]}))
+
+if __name__ == '__main__':
+  unittest.main()
--- a/slapos/test/promise/plugin/test_check_free_disk_space.py
+++ b/slapos/test/promise/plugin/test_check_free_disk_space.py
@@ -74,18 +74,19 @@ extra_config_dict = {
 """ % {'collectordb': self.db_file}
    self.writePromise(self.promise_name, content)

-    self.configureLauncher()
+    self.configureLauncher(timeout=20)
    self.launcher.run()
    result = self.getPromiseResult(self.promise_name)
    self.assertEqual(result['result']['failed'], False)
    self.assertEqual(result['result']['message'], "No result from collector database: disk check skipped")

  def test_disk_space_ok(self):
-    self.configureLauncher()
+    self.configureLauncher(timeout=20)
    self.launcher.run()
    result = self.getPromiseResult(self.promise_name)
    self.assertEqual(result['result']['failed'], False)
-    self.assertEqual(result['result']['message'], "Disk usage: OK")
+    message = "Current disk usage: OK\nEnable to display disk space predictions: False"
+    self.assertEqual(result['result']['message'], message)

  def test_disk_space_nok(self):
    content = """from slapos.promise.plugin.check_free_disk_space import RunPromise
@@ -98,29 +99,62 @@ extra_config_dict = {
 """ % {'collectordb': self.db_file}
    self.writePromise(self.promise_name, content)

-    self.configureLauncher()
+    self.configureLauncher(timeout=20)
    with self.assertRaises(PromiseError):
      self.launcher.run()
    result = self.getPromiseResult(self.promise_name)
    self.assertEqual(result['result']['failed'], True)
-    self.assertEqual(result['result']['message'],
-      "Free disk space low: remaining 269.1 G (threshold: 278.0 G)")
+    message = "Free disk space low: remaining 269.10 G (disk size: 417 G, threshold: 278 G)."
+    self.assertIn(message, result['result']['message'])

-    self.configureLauncher()
+  def test_display_partition(self):
+    content = """from slapos.promise.plugin.check_free_disk_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-10-02',
+  'threshold': '278',
+  'display-partition' : '1',
+}
+""" % {'collectordb': self.db_file}
+    self.writePromise(self.promise_name, content)
+
+    self.configureLauncher(timeout=20)
    with self.assertRaises(PromiseError):
      self.launcher.run()
    result = self.getPromiseResult(self.promise_name)
    self.assertEqual(result['result']['failed'], True)
-    self.assertEqual(result['result']['message'], "Free disk space low: remaining 269.1 G (threshold: 278.0 G)")
+    message = "Free disk space low: remaining 269.10 G (disk size: 417 G, threshold: 278 G). " \
+      "The partition slappart0 uses 83.48 G (date checked: 2017-10-02 09:17:00). " \
+      "The partition slappart2 uses 41.74 G (date checked: 2017-10-02 09:17:00). " \
+      "The partition slappart1 uses 20.87 G (date checked: 2017-10-02 09:17:00)."
+    self.assertIn(message, result['result']['message'])
+
+  def test_display_prediction(self):
+    content = """from slapos.promise.plugin.check_free_disk_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-10-02',
+  'display-prediction' : '1',
+}
+""" % {'collectordb': self.db_file}
+    self.writePromise(self.promise_name, content)
+
+    self.configureLauncher(timeout=20)
+    self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)
+    self.assertEqual(result['result']['failed'], False)
+    self.assertIn("Prediction:", result['result']['message'])

  def test_check_free_disk_with_unicode_string_path(self):
    # set path unicode
    self.partition_dir = u'%s' % self.partition_dir
-    self.configureLauncher()
+    self.configureLauncher(timeout=20)
    self.launcher.run()
    result = self.getPromiseResult(self.promise_name)
    self.assertEqual(result['result']['failed'], False)
-    self.assertEqual(result['result']['message'], "Disk usage: OK")
+    self.assertIn("Current disk usage: OK", result['result']['message'])

 if __name__ == '__main__':
  unittest.main()
--- a/slapos/test/promise/plugin/test_check_network_errors_packets.py
+++ b/slapos/test/promise/plugin/test_check_network_errors_packets.py
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2022 Vifib SARL and Contributors. All Rights Reserved.
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsibility of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# guarantees and support are strongly advised to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+##############################################################################
+import mock
+
+from collections import namedtuple
+from slapos.grid.promise import PromiseError
+from slapos.promise.plugin.check_network_errors_packets import RunPromise
+from . import TestPromisePluginMixin
+
+
+class TestCheckNetwork(TestPromisePluginMixin):
+
+  promise_name = "monitor-network.py"
+
+  def setUp(self):
+    super(TestCheckNetwork, self).setUp()
+    self.network_data = namedtuple('network_data', 
+    ['bytes_recv', 'bytes_sent','errin', 'errout', 'dropin', 'dropout'])
+
+  def writePromise(self, **kw):
+    super(TestCheckNetwork, self).writePromise(self.promise_name,
+      "from %s import %s\nextra_config_dict = %r\n"
+      % (RunPromise.__module__, RunPromise.__name__, kw))
+
+  def runPromise(self, summary, failed=False):
+    self.configureLauncher(enable_anomaly=True, force=True)
+    with mock.patch('psutil.net_io_counters', return_value=summary):
+      if failed:
+        self.assertRaises(PromiseError, self.launcher.run)
+      else:
+        self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)['result']
+    self.assertEqual(result['failed'], failed)
+    return result['message']
+
+  def test_network_ok(self):
+    message = "Network statistics OK"
+    mock_stats = {'bytes_recv':0, 'bytes_sent':0,'errin':0, 'errout':0, 'dropin':0, 'dropout':0}
+    self.writePromise(**{
+        'max-lost-packets-per-MB': 5,
+        'max-error-messages-per-MB': 5,
+    })
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+  def test_network_dropped_packets_nok(self):
+    message = "Network packets lost reached critical threshold: 10  (threshold is 5 per MB)"
+    mock_stats = {'bytes_recv':0, 'bytes_sent':0,'errin':0, 'errout':0, 'dropin':5, 'dropout':5}
+    self.writePromise(**{
+        'max-lost-packets-per-MB': 5,
+        'max-error-messages-per-MB': 5,
+    })
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+  def test_network_errors_nok(self):
+    message = "Network errors reached critical threshold: 10  (threshold is 5 per MB)"
+    mock_stats = {'bytes_recv':0, 'bytes_sent':0,'errin':5, 'errout':5, 'dropin':0, 'dropout':0}
+    self.writePromise(**{
+        'max-lost-packets-per-MB': 5,
+        'max-error-messages-per-MB': 5,
+    })
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+  def test_network_nok(self):
+    message = "Network packets lost reached critical threshold: 10  (threshold is 5 per MB)"\
+      "\nNetwork errors reached critical threshold: 10  (threshold is 5 per MB)"
+    mock_stats = {'bytes_recv':0, 'bytes_sent':0, 'errin':5, 'errout':5, 'dropin':5, 'dropout':5}
+    self.writePromise(**{
+        'max-lost-packets-per-MB': 5,
+        'max-error-messages-per-MB': 5,
+    })
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+if __name__ == '__main__':
+  unittest.main()
--- a/slapos/test/promise/plugin/test_check_network_transit.py
+++ b/slapos/test/promise/plugin/test_check_network_transit.py
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsibility of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# guarantees and support are strongly advised to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+##############################################################################
+
+import mock
+import os
+import time
+
+from collections import namedtuple
+from slapos.grid.promise import PromiseError
+from slapos.promise.plugin.check_network_transit import RunPromise
+from . import TestPromisePluginMixin
+
+
+class TestCheckNetworkTransit(TestPromisePluginMixin):
+
+  promise_name = "monitor-network-transit.py"
+
+  def setUp(self):
+    super(TestCheckNetworkTransit, self).setUp()
+    self.network_data = namedtuple('network_data', ['bytes_recv', 'bytes_sent'])
+
+  def writePromise(self, **kw):
+    super(TestCheckNetworkTransit, self).writePromise(self.promise_name,
+      "from %s import %s\nextra_config_dict = %r\n"
+      % (RunPromise.__module__, RunPromise.__name__, kw))
+
+  def runPromise(self, summary=None, failed=False):
+    self.configureLauncher(enable_anomaly=True, force=True)
+    with mock.patch('psutil.net_io_counters', return_value=summary):
+      if failed:
+        self.assertRaises(PromiseError, self.launcher.run)
+      else:
+        self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)['result']
+    self.assertEqual(result['failed'], failed)
+    return result['message']
+
+  def test_network_transit_ok(self):
+    message = "Network transit OK"
+    mock_stats = {'bytes_recv':1e6, 'bytes_sent':1e6}
+    self.writePromise(**{
+        'transit-period': 1,
+    })
+    self.runPromise(self.network_data(**{'bytes_recv':1e3, 'bytes_sent':1e3}))
+    time.sleep(0.5)
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+  def test_network_min_nok(self):
+    message = "Network congested, data amount over the last 1 seconds"\
+      " reached minimum threshold:    1.4K (threshold is  102.4K)"
+    mock_stats = {'bytes_recv':1e3, 'bytes_sent':1e3}
+    self.writePromise(**{
+        'min-data-amount': 0.1, # MB
+        'transit-period': 1,
+    })
+    self.runPromise(self.network_data(**{'bytes_recv':300, 'bytes_sent':300}))
+    time.sleep(0.5)
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+  def test_network_max_nok(self):
+    message = "Network congested, data amount over the last 1 seconds"\
+      " reached maximum threshold:    1.1M (threshold is    1.0M)"
+    mock_stats = {'bytes_recv':0.7e6, 'bytes_sent':0.5e6}
+    self.writePromise(**{
+        'max-data-amount': 1, # MB
+        'transit-period': 1,
+    })
+    self.runPromise(self.network_data(**{'bytes_recv':300, 'bytes_sent':300}))
+    time.sleep(0.5)
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+  def test_network_transit_nok(self):
+    message = "Network congested, data amount over the last 1 seconds reached minimum threshold:    0.0B (threshold is    0.0B)\n"\
+      "Network congested, data amount over the last 1 seconds reached maximum threshold:    0.0B (threshold is    0.0B)"
+    mock_stats = {'bytes_recv':1e6, 'bytes_sent':1e6}
+    self.writePromise(**{
+        'last_transit_file':'last_transit_file',
+        'max-data-amount': 0,
+        'min-data-amount': 0,
+        'transit-period': 1,
+    })
+    self.runPromise(self.network_data(**{'bytes_recv':1e6, 'bytes_sent':1e6}))
+    time.sleep(0.5)
+    self.assertEqual(message, self.runPromise(self.network_data(**mock_stats)))
+
+if __name__ == '__main__':
+  unittest.main()
--- a/slapos/test/promise/plugin/test_check_ram_usage.py
+++ b/slapos/test/promise/plugin/test_check_ram_usage.py
+# -*- coding: utf-8 -*-
+##############################################################################
+# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsibility of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# guarantees and support are strongly advised to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+##############################################################################
+
+import mock
+import os
+import time
+
+from collections import namedtuple
+from slapos.grid.promise import PromiseError
+from slapos.promise.plugin.check_ram_usage import RunPromise
+from . import TestPromisePluginMixin
+
+
+class TestCheckRamUsage(TestPromisePluginMixin):
+
+  promise_name = "monitor-ram-usage.py"
+
+  def setUp(self):
+    super(TestCheckRamUsage, self).setUp()
+    self.ram_data = namedtuple('ram_data', ['available'])
+
+  def writePromise(self, **kw):
+    super(TestCheckRamUsage, self).writePromise(self.promise_name,
+      "from %s import %s\nextra_config_dict = %r\n"
+      % (RunPromise.__module__, RunPromise.__name__, kw))
+
+  def runPromise(self, summary, failed=False):
+    self.configureLauncher(enable_anomaly=True, force=True)
+    with mock.patch('psutil.virtual_memory', return_value=summary):
+      if failed:
+        self.assertRaises(PromiseError, self.launcher.run)
+      else:
+        self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)['result']
+    self.assertEqual(result['failed'], failed)
+    return result['message']
+
+  def test_ram_ok(self):
+    message = "RAM usage OK"
+    available_ram = {'available':1e9}
+    self.writePromise(**{
+        'last-avg-ram-file':'last_avg_ram_file',
+        'min-threshold-ram': 500, # 500MB
+        'min-avg-ram': 100,
+    })
+    self.assertEqual(message, self.runPromise(self.ram_data(**available_ram)))
+
+  def test_ram_below_threshold_nok(self):
+    message = "RAM usage reached critical threshold:  190.7M  (threshold is  500.0M)"
+    available_ram = {'available': 200e6}
+    self.writePromise(**{
+        'last-avg-ram-file':'last_avg_ram_file',
+        'min-threshold-ram': 500, # ≈500MB
+        'min-avg-ram': 100,
+    })
+    self.assertEqual(message, self.runPromise(self.ram_data(**available_ram)))
+
+  def test_ram_below_average_nok(self):
+    message = "Average RAM usage over the last 1 seconds reached threshold:  190.7M (threshold is  200.0M)"
+    available_ram = {'available': 200e6}
+    self.writePromise(**{
+        'last-avg-ram-file':'last_avg_ram_file',
+        'min-threshold-ram': 0,
+        'min-avg-ram': 200,
+        'avg-ram-period': 1,
+    })
+    m = self.runPromise(self.ram_data(**{'available': 300e6}))
+    m = self.runPromise(self.ram_data(**{'available': 200e6}))
+    time.sleep(1)
+    self.assertEqual(message, self.runPromise(self.ram_data(**available_ram)))
+    
+
+if __name__ == '__main__':
+  unittest.main()
--- a/slapos/test/promise/plugin/test_monitor_partition_space.py
+++ b/slapos/test/promise/plugin/test_monitor_partition_space.py
+##############################################################################
+#
+# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsibility of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# guarantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 3
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+##############################################################################
+
+from slapos.test.promise.plugin import TestPromisePluginMixin
+from slapos.grid.promise import PromiseError
+import os
+import sqlite3
+import psutil
+from slapos.grid.promise import PromiseError
+
+class TestMonitorPartitionSpace(TestPromisePluginMixin):
+
+  def setUp(self):
+    TestPromisePluginMixin.setUp(self)
+    log_folder = os.path.join(self.partition_dir, 'var/log')
+    os.makedirs(log_folder)
+
+    # get disk partition name
+    disk_partition = ""
+    path = os.path.join(self.partition_dir, "") + "extrafolder"
+    partitions = psutil.disk_partitions()
+    while path is not '/':
+      if not disk_partition:
+        path = os.path.dirname(path)
+      else:
+        break
+      for p in partitions:
+        if p.mountpoint == path:
+          disk_partition = p.device
+          break
+
+    self.db_file = '/tmp/collector.db'
+
+    # populate db
+    self.conn = sqlite3.connect(self.db_file)
+    f = open(self.base_path+"/folder_disk_test.sql")
+    sql = f.read()
+    # replace every disk_partition_name string with disk partition name
+    sql = sql.replace('disk_partition_name', disk_partition)
+    self.conn.executescript(sql)
+    self.conn.close()
+
+    self.promise_name = "monitor-partition-space.py"
+
+    content = """from slapos.promise.plugin.monitor_partition_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-10-02',
+}
+""" % {'collectordb': self.db_file}
+    self.writePromise(self.promise_name, content)
+
+  def tearDown(self):
+    TestPromisePluginMixin.tearDown(self)
+    if os.path.exists(self.db_file):
+      os.remove(self.db_file)
+
+  def test_no_data_for_a_partition(self):
+    content = """from slapos.promise.plugin.monitor_partition_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-10-02',
+  'test-partition': 'slapuser1'
+}
+""" % {'collectordb': self.db_file}
+
+    self.writePromise(self.promise_name, content)
+
+    self.configureLauncher(timeout=20)
+    self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)
+    self.assertEqual(result['result']['failed'], False)
+    self.assertEqual(result['result']['message'], 
+      "No result from collector database for the user slapuser1: skipped")
+
+  def test_no_recent_data(self):
+    content = """from slapos.promise.plugin.monitor_partition_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-10-04'
+}
+""" % {'collectordb': self.db_file}
+
+    self.writePromise(self.promise_name, content)
+
+    self.configureLauncher(force=True, timeout=20)
+    self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)
+    self.assertEqual(result['result']['failed'], False)
+    self.assertEqual(result['result']['message'], "Not enough recent data to detect anomalies: skipped")
+
+  def test_no_enough_data(self):
+    content = """from slapos.promise.plugin.monitor_partition_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-09-24'
+}
+""" % {'collectordb': self.db_file}
+
+    self.writePromise(self.promise_name, content)
+
+    self.configureLauncher(force=True, timeout=20)
+    self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)
+    self.assertEqual(result['result']['failed'], False)
+    self.assertEqual(result['result']['message'], 
+      "Not enough data to detect anomalies: skipped")
+
+  def test_no_anomalies(self):
+    content = """from slapos.promise.plugin.monitor_partition_space import RunPromise
+
+extra_config_dict = {
+  'collectordb': '%(collectordb)s',
+  'test-check-date': '2017-10-02',
+  'threshold-ratio': '0.70'
+}
+""" % {'collectordb': self.db_file}
+
+    self.writePromise(self.promise_name, content)
+
+    self.configureLauncher(force=True, timeout=20)
+    self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)
+    self.assertEqual(result['result']['failed'], False)
+    self.assertIn("No anomaly detected (last date checked: 2017-10-02 09:30:00)",
+                  result['result']['message'])
+
+  def test_presence_of_anomalies(self):
+    self.configureLauncher(force=True, timeout=20)
+    with self.assertRaises(PromiseError):
+      self.launcher.run()
+    result = self.getPromiseResult(self.promise_name)
+    self.assertEqual(result['result']['failed'], True)
+    msg = "Anomaly detected on 2017-10-02 09:30:00. Space used by slapuser0: %.2f G."
+    self.assertIn(msg % (87533020.0/(1024*1024)), result['result']['message'])
+
+
+if __name__ == '__main__':
+  unittest.main()
\ No newline at end of file