Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.toolbox
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Titouan Soulard
slapos.toolbox
Commits
70bf9386
Commit
70bf9386
authored
Jan 04, 2023
by
Justin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
promise/plugin: Merged check_cpu_temperature from
@jhuge
parent
4df69149
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
180 additions
and
0 deletions
+180
-0
slapos/promise/plugin/check_cpu_temperature.py
slapos/promise/plugin/check_cpu_temperature.py
+87
-0
slapos/test/promise/plugin/test_check_cpu_temperature.py
slapos/test/promise/plugin/test_check_cpu_temperature.py
+93
-0
No files found.
slapos/promise/plugin/check_cpu_temperature.py
0 → 100644
View file @
70bf9386
import
json
import
os
import
psutil
import
time
from
.util
import
JSONPromise
from
zope.interface
import
implementer
from
slapos.grid.promise
import
interface
@
implementer
(
interface
.
IPromise
)
class
RunPromise
(
JSONPromise
):
def
__init__
(
self
,
config
):
super
(
RunPromise
,
self
).
__init__
(
config
)
self
.
setPeriodicity
(
float
(
self
.
getConfig
(
'frequency'
,
2
)))
self
.
avg_flag_file
=
self
.
getConfig
(
'last-avg-computation-file'
,
'last_avg'
)
self
.
max_spot_temp
=
float
(
self
.
getConfig
(
'max-spot-temp'
,
90
))
# °C
self
.
max_avg_temp
=
float
(
self
.
getConfig
(
'max-avg-temp'
,
80
))
# °C
self
.
avg_temp_duration
=
int
(
self
.
getConfig
(
'avg-temp-duration'
,
600
))
# secondes
def
sense
(
self
):
success
=
True
# Get current temperature
try
:
cpu_temp
=
psutil
.
sensors_temperatures
()[
'coretemp'
][
0
][
1
]
except
(
KeyError
,
IndexError
)
as
e
:
# Put logger.info to avoid errors when sensors are not
# supported by OS (ex: VM)
self
.
logger
.
info
(
"Could not read core temperature on VM"
)
return
# Check spot temperature
if
cpu_temp
>
self
.
max_spot_temp
:
success
=
False
self
.
logger
.
error
(
"Temperature reached critical threshold: %s °C"
" (threshold is %s °C)"
,
cpu_temp
,
self
.
max_spot_temp
)
# Log temperature
data
=
json
.
dumps
({
'cpu_temperature'
:
cpu_temp
})
self
.
json_logger
.
info
(
"Temperature data"
,
extra
=
{
'data'
:
data
})
# TODO: promise should compute average only with logs between interval
# Computer average temperature
avg_computation_period
=
self
.
avg_temp_duration
/
4
try
:
t
=
os
.
path
.
getmtime
(
self
.
avg_flag_file
)
except
OSError
:
t
=
0
if
(
time
.
time
()
-
t
)
>
avg_computation_period
:
open
(
self
.
avg_flag_file
,
'w'
).
close
()
temp_list
=
self
.
getJsonLogDataInterval
(
self
.
avg_temp_duration
)
if
temp_list
:
avg_temp
=
sum
(
x
[
'cpu_temperature'
]
for
x
in
temp_list
)
/
len
(
temp_list
)
if
avg_temp
>
self
.
max_avg_temp
:
success
=
False
self
.
logger
.
error
(
"Average temperature over the last %ds reached threshold: %s °C"
" (threshold is %s °C)"
,
self
.
avg_temp_duration
,
avg_temp
,
self
.
max_avg_temp
)
else
:
success
=
False
self
.
logger
.
error
(
"Couldn't read temperature from log"
)
if
success
:
self
.
logger
.
info
(
"Temperature OK (%s °C)"
,
cpu_temp
)
def
test
(
self
):
"""
Called after sense() if the instance is still converging.
Returns success or failure based on sense results.
In this case, fail if the previous sensor result is negative.
"""
return
self
.
_test
(
result_count
=
1
,
failure_amount
=
1
)
def
anomaly
(
self
):
"""
Called after sense() if the instance has finished converging.
Returns success or failure based on sense results.
Failure signals the instance has diverged.
In this case, fail if two out of the last three results are negative.
"""
return
self
.
_anomaly
(
result_count
=
3
,
failure_amount
=
2
)
slapos/test/promise/plugin/test_check_cpu_temperature.py
0 → 100644
View file @
70bf9386
# -*- coding: utf-8 -*-
##############################################################################
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
##############################################################################
import
mock
import
os
import
time
from
slapos.grid.promise
import
PromiseError
from
slapos.promise.plugin.check_cpu_temperature
import
RunPromise
from
.
import
TestPromisePluginMixin
class
TestCheckCpuTemperature
(
TestPromisePluginMixin
):
promise_name
=
"monitor-cpu-temperature.py"
def
setUp
(
self
):
super
(
TestCheckCpuTemperature
,
self
).
setUp
()
def
writePromise
(
self
,
**
kw
):
super
(
TestCheckCpuTemperature
,
self
).
writePromise
(
self
.
promise_name
,
"from %s import %s
\
n
extra_config_dict = %r
\
n
"
%
(
RunPromise
.
__module__
,
RunPromise
.
__name__
,
kw
))
def
runPromise
(
self
,
summary
,
failed
=
False
):
self
.
configureLauncher
(
enable_anomaly
=
True
,
force
=
True
)
with
mock
.
patch
(
'psutil.sensors_temperatures'
,
return_value
=
summary
):
if
failed
:
self
.
assertRaises
(
PromiseError
,
self
.
launcher
.
run
)
else
:
self
.
launcher
.
run
()
result
=
self
.
getPromiseResult
(
self
.
promise_name
)[
'result'
]
self
.
assertEqual
(
result
[
'failed'
],
failed
)
return
result
[
'message'
]
def
test_temp_ok
(
self
):
message
=
"Temperature OK (50 °C)"
self
.
writePromise
(
**
{
'last-avg-computation-file'
:
'last_avg_computation_file'
,
'max-spot-temp'
:
80
,
'max-avg-temp'
:
100
,
})
self
.
assertEqual
(
message
,
self
.
runPromise
({
'coretemp'
:
[[
0
,
50
]]}))
def
test_spot_critical
(
self
):
message
=
"Temperature reached critical threshold: 90 °C (threshold is 80.0 °C)"
self
.
writePromise
(
**
{
'last-avg-computation-file'
:
'last_avg_computation_file'
,
'max-spot-temp'
:
80
,
'max-avg-temp'
:
100
,
})
self
.
assertEqual
(
message
,
self
.
runPromise
({
'coretemp'
:
[[
0
,
90
]]}))
def
test_avg_critical
(
self
):
message
=
"Average temperature over the last 1s reached threshold: 45.0 °C (threshold is 40.0 °C)"
self
.
writePromise
(
**
{
'last-avg-computation-file'
:
'last_avg_computation_file'
,
'max-spot-temp'
:
99999
,
'max-avg-temp'
:
40
,
'avg-temp-duration'
:
1
,
})
m
=
self
.
runPromise
({
'coretemp'
:
[[
0
,
0
]]})
time
.
sleep
(
0.6
)
m
=
self
.
runPromise
({
'coretemp'
:
[[
0
,
0
]]})
time
.
sleep
(
0.5
)
self
.
assertEqual
(
message
,
self
.
runPromise
({
'coretemp'
:
[[
0
,
90
]]}))
if
__name__
==
'__main__'
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment