Commit 17d8fe38 authored by Łukasz Nowak's avatar Łukasz Nowak

monitor: Adapt parameters to new requirements

check-maximum-elapsed-time and failure-amount are added to support
monitoring distant backends with some margin on the failures.
parents d1409fe5 a5dc903a
Pipeline #7995 failed with stage
......@@ -14,4 +14,4 @@
# not need these here).
[surykatka-requirements]
_update_hash_filename_ = requirements.txt
md5sum = 2dfe4f8b8c5b6f5d3478b70e53c49201
md5sum = c4bd35bcc5c9c25efe1edff5dd022605
Click==7.0
certifi==2019.11.28
chardet==3.0.4
Click==7.0
dnspython==1.16.0
forcediphttpsadapter==1.0.1
idna==2.8
miniupnpc==2.0.2
peewee==3.13.1
requests==2.22.0
surykatka==0.2.0
urllib3==1.25.7
surykatka==0.4.2
urllib3==1.25.8
......@@ -14,7 +14,7 @@
# not need these here).
[template]
filename = instance.cfg
md5sum = d778b6f436ae6864819eb2ff2d12a86f
md5sum = dc9770bacea2c504b92ad2162e58d222
[template-monitor]
_update_hash_filename_ = instance-monitor.cfg.jinja2
......@@ -30,7 +30,7 @@ md5sum = 9e237dbdda59e788202f0da194a57d41
[template-monitor-edgebot]
_update_hash_filename_ = instance-monitor-edgebot.cfg.jinja2
md5sum = 8786e4245db0d27dfa4815222d970e52
md5sum = f28a329e830ed737d468abcb4e89e1a2
[network-bench-cfg]
filename = network_bench.cfg.in
......@@ -42,4 +42,4 @@ md5sum = cad2402bbd21907cfed6bc5af8c5d3ab
[template-surykatka-ini]
_update_hash_filename_ = surykatka.ini.jinja2
md5sum = 40870921e05d93b5843ab34abd7e3902
md5sum = a2de719a5a65438c8c3ee5195442beb6
......@@ -22,7 +22,19 @@
"check-certificate-expiration-days": {
"default": "15",
"title": "Default certificate expiration days check",
"description": "Default amount of days to consider certitifcate as being to-be-expired (default: 15).",
"description": "Default amount of days to consider certificate as being to-be-expired (default: 15).",
"type": "string"
},
"check-maximum-elapsed-time": {
"default": "2",
"title": "Default maximum elapsed time for a site to reply (seconds)",
"description": "Default maximum elapsed time for a site to reply to be considered good (default: 2s).",
"type": "string"
},
"failure-amount": {
"default": "1",
"title": "Default amount of failures to consider URL as in bad state",
"description": "Default amount of failures to consider URL as in bad state, can be set to higher value for endpoints with accepted short outages (default: 1).",
"type": "string"
}
}
......
......@@ -21,7 +21,19 @@
"check-certificate-expiration-days": {
"default": "Master default",
"title": "Certificate expiration days check",
"description": "Default amount of days to consider certitifcate as being to-be-expired (default: comes from master partition).",
"description": "Amount of days to consider certificate as being to-be-expired (default: comes from master partition).",
"type": "string"
},
"check-maximum-elapsed-time": {
"default": "Master default",
"title": "Maximum elapsed time for a site to reply (seconds)",
"description": "Maximum elapsed time for a site to reply to be considered good.(default: comes from master partition).",
"type": "string"
},
"failure-amount": {
"default": "Master default",
"title": "Amount of failures to consider URL as in bad state",
"description": "Amount of failures to consider URL as in bad state, can be set to higher value for endpoints with accepted short outages (default: comes from master partition).",
"type": "string"
}
}
......
......@@ -5,7 +5,7 @@
{%- do CONFIGURATION.__setitem__(k[14:], v) %}
{%- endif %}
{%- endfor %}
{%- set slave_instance_list = [] %}
{%- set slave_instance_dict = {} %}
{%- set extra_slave_instance_list = slapparameter_dict.get('extra_slave_instance_list') %}
{%- if extra_slave_instance_list %}
{#- Create slaves to process with setting up defaults #}
......@@ -16,15 +16,27 @@
{%- if 'check-certificate-expiration-days' not in slave %}
{%- do slave.__setitem__('check-certificate-expiration-days', CONFIGURATION['check-certificate-expiration-days']) %}
{%- endif %}
{%- if 'failure-amount' not in slave %}
{%- do slave.__setitem__('failure-amount', CONFIGURATION['failure-amount']) %}
{%- endif %}
{%- if 'check-maximum-elapsed-time' not in slave %}
{%- do slave.__setitem__('check-maximum-elapsed-time', CONFIGURATION['check-maximum-elapsed-time']) %}
{%- endif %}
{%- if 'check-frontend-ip' not in slave %}
{%- do slave.__setitem__('check-frontend-ip', CONFIGURATION['check-frontend-ip']) %}
{%- endif %}
{%- if 'url' in slave %}
{%- do slave_instance_list.append(slave) %}
{%- set class = slave['check-maximum-elapsed-time'] %}
{%- if class not in slave_instance_dict %}
{%- do slave_instance_dict.__setitem__(class, []) %}
{%- endif %}
{%- do slave_instance_dict[class].append(slave) %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- set part_list = [] %}
{%- for class, slave_instance_list in slave_instance_dict.items() %}
{#- class is used to separate surykatka with different timeouts #}
{%- for slave in sorted(slave_instance_list) %}
{%- set part_id = 'http-query-' ~ slave['slave_reference'] ~ '-promise' %}
{%- do part_list.append(part_id) %}
......@@ -37,75 +49,81 @@ config-report = http_query
config-url = {{ slave['url'] }}
config-status-code = {{ slave['check-status-code'] }}
config-certificate-expiration-days = {{ slave['check-certificate-expiration-days'] }}
config-failure-amount = {{ slave['failure-amount'] }}
config-maximum-elapsed-time = {{ slave['check-maximum-elapsed-time'] }}
config-ip-list = {{ slave['check-frontend-ip'] }}
config-json-file = ${surykatka-config:json}
{% endfor %}
config-json-file = ${surykatka-config-{{ class }}:json}
{%- endfor %}
[surykatka-bot-promise]
[surykatka-bot-promise-{{ class }}]
<= monitor-promise-base
module = check_surykatka_json
name = surykatka-bot-promise.py
name = surykatka-bot-promise-{{ class }}.py
config-report = bot_status
config-json-file = ${surykatka-config:json}
[buildout]
extends = {{ monitor_template_output }}
parts =
cron
cron-entry-surykatka-status
monitor-base
publish-connection-information
surykatka
surykatka-bot-promise
{% for part_id in sorted(part_list) %}
{{ part_id }}
{% endfor %}
config-json-file = ${surykatka-config-{{ class }}:json}
eggs-directory = {{ eggs_directory }}
develop-eggs-directory = {{ develop_eggs_directory }}
offline = true
[surykatka-config]
[surykatka-config-{{ class }}]
recipe = slapos.recipe.template:jinja2
db = ${directory:srv}/surykatka.db
rendered = ${directory:etc}/surykatka.ini
db = ${directory:srv}/surykatka-{{ class }}.db
rendered = ${directory:etc}/surykatka-{{ class }}.ini
template = {{ template_surykatka_ini }}
slave_instance_list = {{ dumps(slave_instance_list) }}
nameserver = {{ dumps(CONFIGURATION['nameserver']) }}
json = ${directory:srv}/surykatka.json
json = ${directory:srv}/surykatka-{{ class }}.json
{#- timeout is just a bit bigger than class time #}
timeout = {{ int(class) + 2 }}
context =
import json_module json
key db :db
key nameserver :nameserver
key slave_instance_list :slave_instance_list
key timeout :timeout
[surykatka]
[surykatka-{{ class }}]
recipe = slapos.cookbook:wrapper
config = ${surykatka-config:rendered}
config = ${surykatka-config-{{ class }}:rendered}
command-line =
{{ surykatka_binary }} --run crawl --reload --configuration ${:config}
wrapper-path = ${monitor-directory:service}/${:_buildout_section_name_}
hash-existing-files = ${buildout:directory}/software_release/buildout.cfg
[surykatka-status-json]
[surykatka-status-json-{{ class }}]
recipe = slapos.recipe.template:jinja2
json = ${surykatka-config-{{ class }}:json}
template = inline:#!/bin/sh
if {{ surykatka_binary }} --run status --configuration ${surykatka:config} --output json > ${surykatka-config:json}.tmp ; then
mv -f ${surykatka-config:json}.tmp ${surykatka-config:json}
if {{ surykatka_binary }} --run status --configuration ${surykatka-{{ class }}:config} --output json > ${:json}.tmp ; then
mv -f ${:json}.tmp ${:json}
else
rm -f ${surykatka-config:json}.tmp
rm -f ${:json}.tmp
fi
rendered = ${monitor-directory:bin}/${:_buildout_section_name_}
mode = 0755
[cron-entry-surykatka-status]
[cron-entry-surykatka-status-{{ class }}]
recipe = slapos.cookbook:cron.d
cron-entries = ${directory:etc}/cron.d
name = surykatka-status
name = surykatka-status-{{ class }}
frequency = */2 * * * *
command = ${surykatka-status-json:rendered}
command = ${surykatka-status-json-{{ class }}:rendered}
{%- do part_list.append('surykatka-' + class) %}
{%- do part_list.append('surykatka-bot-promise-' + class) %}
{%- do part_list.append('cron-entry-surykatka-status-' + class) %}
{%- endfor %}
[buildout]
extends = {{ monitor_template_output }}
parts =
cron
monitor-base
publish-connection-information
{% for part_id in sorted(part_list) %}
{{ part_id }}
{% endfor %}
eggs-directory = {{ eggs_directory }}
develop-eggs-directory = {{ develop_eggs_directory }}
offline = true
[publish-connection-information]
recipe = slapos.cookbook:publish.serialised
......
......@@ -44,6 +44,11 @@ context = import json_module json
raw template_json_edgetest_test ${json-test-template:target}
mode = 0644
[surykatka-wrapped]
recipe = slapos.cookbook:wrapper
wrapper-path = $${buildout:bin-directory}/surykatka-wrapped
command-line = ${surykatka:location}/bin/python3.7 ${surykatka:executable}
[instance-base-edgebot]
recipe = slapos.recipe.template:jinja2
template = ${template-monitor-edgebot:target}
......@@ -59,7 +64,7 @@ context = import json_module json
key slapparameter_dict slap-configuration:configuration
key slap_software_type slap-configuration:slap-software-type
raw software_type edgebot
key surykatka_binary :surykatka-binary
key surykatka_binary surykatka-wrapped:wrapper-path
key template_surykatka_ini :template-surykatka-ini
raw buildout_bin ${buildout:bin-directory}
raw monitor_template_output ${monitor-template:output}
......@@ -79,6 +84,8 @@ configuration.check-status-code = 200
configuration.nameserver =
configuration.check-frontend-ip =
configuration.check-certificate-expiration-days = 15
configuration.check-maximum-elapsed-time = 2
configuration.failure-amount = 2
# use monitor-base-port to have monitor listening on each instance
# on different port and also on different port than other services
# it makes it possible to instantiate it correctly on signle IP, for
......
[SURYKATKA]
INTERVAL = 120
TIMEOUT = {{ timeout }}
SQLITE = {{ db }}
{%- set nameserver_list = nameserver.split() %}
{%- if len(nameserver_list) > 0 %}
......
This diff is collapsed.
......@@ -145,7 +145,7 @@ slapos.libnetworkcache = 0.20
slapos.rebootstrap = 4.4
slapos.recipe.build = 0.42
slapos.recipe.cmmi = 0.12
slapos.toolbox = 0.104
slapos.toolbox = 0.106
stevedore = 1.21.0
subprocess32 = 3.5.3
unicodecsv = 0.14.1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment