Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
slapos
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
slapos
Commits
32459e14
Commit
32459e14
authored
Mar 03, 2017
by
Alain Takoudjou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
monitor sr: add promise to check cpu load and free memory on server
parent
0b1dab76
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
163 additions
and
3 deletions
+163
-3
software/monitor/instance-monitor.cfg.jinja2
software/monitor/instance-monitor.cfg.jinja2
+29
-0
software/monitor/instance.cfg
software/monitor/instance.cfg
+1
-1
software/monitor/script/check_system_health.py
software/monitor/script/check_system_health.py
+124
-0
software/monitor/software.cfg
software/monitor/software.cfg
+9
-2
No files found.
software/monitor/instance-monitor.cfg.jinja2
View file @
32459e14
...
...
@@ -10,6 +10,8 @@ parts =
monitor-collect-csv-wrapper
fluentd-wrapper
monitor-base
monitor-check-memory-usage
monitor-check-cpu-usage
publish-connection-information
...
...
@@ -99,11 +101,38 @@ command-line =
wrapper-path = ${monitor-directory:reports}/monitor-collect-csv-dump
parameters-extra = true
[monitor-check-cpu-usage]
recipe = slapos.cookbook:wrapper
command-line = ${monitor-directory:bin}/python {{ monitor_check_system_health }} cpu ${init-monitor-parameters:cpu-load-file}
wrapper-path = ${monitor-directory:promises}/system-CPU-load-check
[monitor-check-memory-usage]
recipe = slapos.cookbook:wrapper
command-line = ${monitor-directory:bin}/python {{ monitor_check_system_health }} mem ${init-monitor-parameters:mem-free-file} ${directory:monitor}
wrapper-path = ${monitor-directory:promises}/system-MEMORY-usage-check
[publish-connection-information]
recipe = slapos.cookbook:publish
monitor-setup-url = https://monitor.app.officejs.com/#page=settings_configurator&url=${publish:monitor-url}&username=${publish:monitor-user}&password=${publish:monitor-password}
server_log_url = ${publish:monitor-base-url}/${slap-configuration:private-hash}/
[monitor-instance-parameter]
instance-configuration =
file max-cpu-load-per-core ${init-monitor-parameters:cpu-load-file}
file min-free-mem-percent ${init-monitor-parameters:mem-free-file}
[init-monitor-parameters]
recipe = plone.recipe.command
cpu-load-file = ${directory:monitor}/cpu-load-tolerance
mem-free-file = ${directory:monitor}/mem-free-limit
command =
if [ ! -s "${:cpu-load-file}" ]; then
echo "1.5" > ${:cpu-load-file}
fi
if [ ! -s "${:mem-free-file}" ]; then
echo "7.0" > ${:mem-free-file}
fi
[slap-configuration]
recipe = slapos.cookbook:slapconfiguration.serialised
computer = ${slap-connection:computer-id}
...
...
software/monitor/instance.cfg
View file @
32459e14
...
...
@@ -24,6 +24,7 @@ context = key buildout buildout:bin-directory
raw fluentd_location ${fluentd:location}
raw fluent_conf_output ${fluentd-agent-conf:output}
raw monitor_collect_csv_dump ${monitor-collect-csv-dump:output}
raw monitor_check_system_health ${monitor-system-health:output}
mode = 0644
[instance-base-distributor]
...
...
@@ -51,4 +52,3 @@ partition = $${slap-connection:partition-id}
url = $${slap-connection:server-url}
key = $${slap-connection:key-file}
cert = $${slap-connection:cert-file}
software/monitor/script/check_system_health.py
0 → 100644
View file @
32459e14
#!/usr/bin/env python
import
subprocess
import
os
import
re
import
json
cpu_command_list
=
[
'top'
,
'-n'
,
'1'
,
'-b'
]
mem_command_list
=
[
'free'
,
'-m'
]
head_command_list
=
[
'head'
,
'-n'
,
'5'
]
cpu_core_cmd_list
=
[
'nproc'
]
def
cpu_usage
(
tolerance
=
1.5
):
# tolerance=1.5 => accept up to 1.5 =150% CPU load
uptime_result
=
subprocess
.
check_output
([
'uptime'
])
line
=
uptime_result
.
strip
().
split
(
' '
)
load
,
load5
,
long_load
=
line
[
-
3
:]
core_count
=
int
(
subprocess
.
check_output
(
cpu_core_cmd_list
).
strip
())
threshold
=
core_count
*
tolerance
if
float
(
long_load
)
>
threshold
:
# display top statistics
top
=
subprocess
.
Popen
(
cpu_command_list
,
stdout
=
subprocess
.
PIPE
)
result
=
subprocess
.
check_output
(
head_command_list
,
stdin
=
top
.
stdout
)
message
=
"CPU load is high: %s %s %s
\
n
\
n
"
%
(
load
,
load5
,
long_load
)
message
+=
result
return
message
def
check_last_result
(
file
,
last_value
,
threshold
=
7.0
,
elt_count
=
5
):
mem_average
=
0.0
value_list
=
[]
if
os
.
path
.
exists
(
file
):
with
open
(
file
)
as
f
:
values
=
f
.
read
()
value_list
=
values
.
split
(
' '
)
size
=
len
(
value_list
)
value_list
.
append
(
str
(
last_value
))
if
size
>=
elt_count
:
while
len
(
value_list
)
>
elt_count
:
value_list
.
pop
(
0
)
# calculate average
average
=
sum
([
float
(
l
)
for
l
in
value_list
])
/
(
size
*
1.0
)
if
average
<
threshold
:
mem_average
=
round
(
average
,
2
)
else
:
value_list
.
append
(
str
(
last_value
))
with
open
(
file
,
'w'
)
as
f
:
f
.
write
(
' '
.
join
(
value_list
))
return
mem_average
def
memory_usage
(
storage_file
,
threshold
=
7.0
,
elt_count
=
5
):
mem_stats
=
subprocess
.
check_output
(
mem_command_list
)
result_list
=
mem_stats
.
split
(
'
\
n
'
)
usage
=
re
.
sub
(
'
\
s+
'
, '
', result_list[1])
usage_real = re.sub('
\
s
+
', '
', result_list[2])
usage_list = usage.split('
')
mem_total = float(usage_list[1])
mem_free = float(usage_real.split('
')[-1])
if mem_free == 0.0:
mem_available = 0.0
else:
mem_available = round(mem_free * 100 / (mem_total * 1.0), 2)
average = check_last_result(
storage_file,
mem_available,
threshold=threshold,
elt_count=elt_count)
if average != 0.0 and average < threshold:
# mem used at (threshold)% at least
message = "Memory usage is high. %s%% is available (%s%% for last %s minutes).
\
n
\
n
" % (
mem_available, average, elt_count)
message += mem_stats
return message
swap_usage = re.sub('
\
s
+
', '
', result_list[3])
swap_usage_list = swap_usage.split('
')
swap_total = float(swap_usage_list[1])
swap_free = float(swap_usage_list[3])
if swap_total > 1:
if swap_free == 0.0:
swap_available = 0.0
else:
swap_available = round(swap_free * 100 / (swap_total * 1.0), 2) * 100
if swap_available < threshold*1.7:
message = "Memory SWAP usage is high. %s%% is available.
\
n
\
n
" % swap_available
message += mem_stats
return message
if __name__ == '
__main__
':
if len(sys.argv) < 2:
print "Usage: %s [cpu | mem] CONFIG_FILE [BASE_DIR]" % os.path.basename(sys.argv[0])
exit(2)
check_type = sys.argv[1]
threshold = None
if len(sys.argv) >= 3:
config_file = sys.argv[2]
if os.path.exists(config_file):
with open(config_file) as f:
try:
threshold = float(f.read())
if not threshold > 0:
threshold = None
except ValueError:
pass
if check_type == "cpu":
result = cpu_usage(threshold or 1.5)
if result:
print result
exit(1)
elif check_type == "mem":
directory = ""
if len(sys.argv) >= 4:
directory = sys.argv[3]
if not os.path.exists(directory) or not os.path.isdir(directory):
directory = os.getcwd()
storage_file = os.path.join(directory, '
mem
-
usage
.
mo
')
result = memory_usage(storage_file, threshold=(threshold or 4.0), elt_count=10)
if result:
print result
exit(1)
else:
exit(3)
exit(0)
\ No newline at end of file
software/monitor/software.cfg
View file @
32459e14
...
...
@@ -25,14 +25,14 @@ parts =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg
output = ${buildout:directory}/template.cfg
md5sum =
0c5e94beede86a91d7b01f61a7290f86
md5sum =
3ff5fb2710bf0ea84632c6d6d3894dd9
mode = 0644
[template-monitor]
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/instance-monitor.cfg.jinja2
destination = ${buildout:directory}/template-base-monitor.cfg
md5sum =
df37b01b194f9621ce8928fe361db728
md5sum =
c513cf70b8671840b2ba049d2826a661
mode = 0644
[template-monitor-distributor]
...
...
@@ -77,6 +77,13 @@ filename = collect_csv_dump.py
output = ${:destination}/${:filename}
md5sum = cad2402bbd21907cfed6bc5af8c5d3ab
[monitor-system-health]
<= monitor-template-script
url = ${:_profile_base_location_}/script/${:filename}
filename = check_system_health.py
output = ${:destination}/${:filename}
md5sum = 7eb74a0be4995c6a1015a9a1eb6874c6
[extra-eggs]
<= monitor-eggs
interpreter = pythonwitheggs
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment