Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.toolbox
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Justin
slapos.toolbox
Commits
3e813ce2
Commit
3e813ce2
authored
Dec 06, 2022
by
Joanne Hugé
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
promise/plugin: add check_cpu_temperature promise
parent
b17843bc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
278 additions
and
1 deletion
+278
-1
slapos/promise/plugin/check_cpu_temperature.py
slapos/promise/plugin/check_cpu_temperature.py
+97
-0
slapos/promise/plugin/util.py
slapos/promise/plugin/util.py
+88
-1
slapos/test/promise/plugin/test_check_cpu_temperature.py
slapos/test/promise/plugin/test_check_cpu_temperature.py
+93
-0
No files found.
slapos/promise/plugin/check_cpu_temperature.py
0 → 100644
View file @
3e813ce2
import
json
import
os
import
psutil
import
time
from
.util
import
get_data_interval_json_log
from
.util
import
JSONRunPromise
from
zope.interface
import
implementer
from
slapos.grid.promise
import
interface
@
implementer
(
interface
.
IPromise
)
class
RunPromise
(
JSONRunPromise
):
def
__init__
(
self
,
config
):
super
(
RunPromise
,
self
).
__init__
(
config
)
self
.
setPeriodicity
(
minute
=
2
)
self
.
last_avg_computation_file
=
self
.
getConfig
(
'last-avg-computation-file'
,
'last_avg'
)
def
sense
(
self
):
promise_success
=
True
max_spot_temp
=
float
(
self
.
getConfig
(
'max-spot-temp'
,
90
))
max_avg_temp
=
float
(
self
.
getConfig
(
'max-avg-temp'
,
80
))
avg_temp_duration_sec
=
int
(
self
.
getConfig
(
'avg-temp-duration-sec'
,
0
))
if
avg_temp_duration_sec
:
avg_temp_duration
=
avg_temp_duration_sec
else
:
avg_temp_duration
=
60
*
int
(
self
.
getConfig
(
'avg-temp-duration'
,
5
))
testing
=
self
.
getConfig
(
'testing'
)
==
"True"
# For theia JHGD
#testing = True # JHGD
# Get current temperature
if
testing
:
from
random
import
randint
cpu_temp
=
randint
(
40
,
75
)
else
:
data
=
psutil
.
sensors_temperatures
()
cpu_temp
=
data
[
'coretemp'
][
0
][
1
]
if
cpu_temp
>
max_spot_temp
:
self
.
logger
.
error
(
"Temperature reached critical threshold: %s degrees "
\
"celsius (threshold is %s degrees celsius)"
%
(
cpu_temp
,
max_spot_temp
))
promise_success
=
False
# Log temperature
data
=
json
.
dumps
({
'cpu_temperature'
:
cpu_temp
})
self
.
json_logger
.
info
(
"Temperature data"
,
extra
=
{
'data'
:
data
})
# TODO: promise should computer average only with logs between interval
# Computer average temperature
avg_computation_period
=
avg_temp_duration
/
4
try
:
t
=
os
.
path
.
getmtime
(
self
.
last_avg_computation_file
)
except
OSError
:
t
=
0
if
(
time
.
time
()
-
t
)
>
avg_computation_period
:
open
(
self
.
last_avg_computation_file
,
'w'
).
close
()
temp_list
=
get_data_interval_json_log
(
self
.
log_file
,
avg_temp_duration
)
if
temp_list
:
avg_temp
=
sum
(
map
(
lambda
x
:
x
[
'cpu_temperature'
],
temp_list
))
/
len
(
temp_list
)
if
avg_temp
>
max_avg_temp
:
self
.
logger
.
error
(
"Average temperature over the last %s seconds "
\
"reached threshold: %s degrees celsius (threshold is %s degrees "
\
"celsius)"
%
(
avg_temp_duration
,
avg_temp
,
max_avg_temp
))
promise_success
=
False
else
:
self
.
logger
.
error
(
"Couldn't read temperature from log"
)
promise_success
=
False
if
promise_success
:
self
.
logger
.
info
(
"Temperature OK"
)
def
test
(
self
):
"""
Called after sense() if the instance is still converging.
Returns success or failure based on sense results.
In this case, fail if the previous sensor result is negative.
"""
return
self
.
_test
(
result_count
=
1
,
failure_amount
=
1
)
def
anomaly
(
self
):
"""
Called after sense() if the instance has finished converging.
Returns success or failure based on sense results.
Failure signals the instance has diverged.
In this case, fail if two out of the last three results are negative.
"""
return
self
.
_anomaly
(
result_count
=
3
,
failure_amount
=
2
)
slapos/promise/plugin/util.py
View file @
3e813ce2
import
json
import
logging
import
os
from
dateutil
import
parser
from
slapos.grid.promise.generic
import
GenericPromise
def
tail_file
(
file_path
,
line_count
=
10
):
"""
...
...
@@ -26,4 +31,86 @@ def tail_file(file_path, line_count=10):
bytes
-=
BUFSIZ
block
-=
1
return
'
\
n
'
.
join
(
''
.
join
(
line_list
).
splitlines
()[
-
line_count
:])
\ No newline at end of file
return
'
\
n
'
.
join
(
''
.
join
(
line_list
).
splitlines
()[
-
line_count
:])
class
JSONRunPromise
(
GenericPromise
):
def
__init__
(
self
,
config
):
self
.
__name
=
config
.
get
(
'name'
,
None
)
self
.
__log_folder
=
config
.
get
(
'log-folder'
,
None
)
super
(
JSONRunPromise
,
self
).
__init__
(
config
)
self
.
__title
=
os
.
path
.
splitext
(
self
.
__name
)[
0
]
self
.
log_file
=
os
.
path
.
join
(
self
.
__log_folder
,
'%s.json.log'
%
self
.
__title
)
self
.
json_logger
=
logging
.
getLogger
(
'json_logger'
)
self
.
json_logger
.
setLevel
(
logging
.
INFO
)
handler
=
logging
.
FileHandler
(
self
.
log_file
)
formatter
=
logging
.
Formatter
(
'{"time": "%(asctime)s", "log_level": '
\
'"%(levelname)s", "message": "%(message)s", "data": %(data)s}'
)
handler
.
setFormatter
(
formatter
)
self
.
json_logger
.
addHandler
(
handler
)
def
get_data_interval_json_log
(
log
,
interval
):
"""
Get all data in the last "interval" seconds from JSON log
Reads rotated logs too (XX.log, XX.log.1, XX.log.2, ...)
"""
log_number
=
0
latest_timestamp
=
0
data_list
=
[]
while
True
:
try
:
f
=
open
(
"{}.{}"
.
format
(
log
,
log_number
)
if
log_number
else
log
,
"rb"
)
except
OSError
:
return
data_list
try
:
f
.
seek
(
0
,
os
.
SEEK_END
)
while
True
:
try
:
while
f
.
seek
(
-
2
,
os
.
SEEK_CUR
)
and
f
.
read
(
1
)
!=
b'
\
n
'
:
pass
except
OSError
:
break
pos
=
f
.
tell
()
l
=
json
.
loads
(
f
.
readline
().
decode
().
replace
(
"'"
,
'"'
))
timestamp
=
parser
.
parse
(
l
[
'time'
])
data_list
.
append
(
l
[
'data'
])
if
not
latest_timestamp
:
latest_timestamp
=
timestamp
if
(
latest_timestamp
-
timestamp
).
total_seconds
()
>
interval
:
return
data_list
f
.
seek
(
pos
,
os
.
SEEK_SET
)
finally
:
f
.
close
()
log_number
+=
1
def
get_latest_timestamp_json_log
(
log
):
"""
Get latest timestamp from JSON log
Reads rotated logs too (XX.log, XX.log.1, XX.log.2, ...)
"""
log_number
=
0
while
True
:
try
:
f
=
open
(
"{}.{}"
.
format
(
log
,
log_number
)
if
log_number
else
log
,
"rb"
)
except
OSError
:
return
0
try
:
f
.
seek
(
0
,
os
.
SEEK_END
)
try
:
while
f
.
seek
(
-
2
,
os
.
SEEK_CUR
)
and
f
.
read
(
1
)
!=
b'
\
n
'
:
pass
except
OSError
:
break
l
=
json
.
loads
(
f
.
readline
().
decode
().
replace
(
"'"
,
'"'
))
return
parser
.
parse
(
l
[
'time'
])
finally
:
f
.
close
()
log_number
+=
1
return
0
slapos/test/promise/plugin/test_check_cpu_temperature.py
0 → 100644
View file @
3e813ce2
# -*- coding: utf-8 -*-
##############################################################################
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
##############################################################################
import
mock
import
os
import
time
from
slapos.grid.promise
import
PromiseError
from
slapos.promise.plugin.check_cpu_temperature
import
RunPromise
from
.
import
TestPromisePluginMixin
class
TestCheckCpuTemperature
(
TestPromisePluginMixin
):
promise_name
=
"monitor-cpu-temperature.py"
def
setUp
(
self
):
super
(
TestCheckCpuTemperature
,
self
).
setUp
()
def
writePromise
(
self
,
**
kw
):
super
(
TestCheckCpuTemperature
,
self
).
writePromise
(
self
.
promise_name
,
"from %s import %s
\
n
extra_config_dict = %r
\
n
"
%
(
RunPromise
.
__module__
,
RunPromise
.
__name__
,
kw
))
def
runPromise
(
self
,
summary
,
failed
=
False
):
self
.
configureLauncher
(
enable_anomaly
=
True
,
force
=
True
)
with
mock
.
patch
(
'psutil.sensors_temperatures'
,
return_value
=
summary
):
if
failed
:
self
.
assertRaises
(
PromiseError
,
self
.
launcher
.
run
)
else
:
self
.
launcher
.
run
()
result
=
self
.
getPromiseResult
(
self
.
promise_name
)[
'result'
]
self
.
assertEqual
(
result
[
'failed'
],
failed
)
return
result
[
'message'
]
def
test_temp_ok
(
self
):
message
=
"Temperature OK"
self
.
writePromise
(
**
{
'last-avg-computation-file'
:
'last_avg_computation_file'
,
'max-spot-temp'
:
80
,
'max-avg-temp'
:
100
,
})
self
.
assertEqual
(
message
,
self
.
runPromise
({
'coretemp'
:
[[
0
,
50
]]}))
def
test_spot_critical
(
self
):
message
=
"Temperature reached critical threshold: 90 degrees celsius (threshold is 80.0 degrees celsius)"
self
.
writePromise
(
**
{
'last-avg-computation-file'
:
'last_avg_computation_file'
,
'max-spot-temp'
:
80
,
'max-avg-temp'
:
100
,
})
self
.
assertEqual
(
message
,
self
.
runPromise
({
'coretemp'
:
[[
0
,
90
]]}))
def
test_avg_critical
(
self
):
message
=
"Average temperature over the last 1 seconds reached threshold: 45.0 degrees celsius (threshold is 40.0 degrees celsius)"
self
.
writePromise
(
**
{
'last-avg-computation-file'
:
'last_avg_computation_file'
,
'max-spot-temp'
:
99999
,
'max-avg-temp'
:
40
,
'avg-temp-duration-sec'
:
1
,
})
# TODO: promise should computer average only with logs between interval
m
=
self
.
runPromise
({
'coretemp'
:
[[
0
,
0
]]})
m
=
self
.
runPromise
({
'coretemp'
:
[[
0
,
0
]]})
time
.
sleep
(
2
)
self
.
assertEqual
(
message
,
self
.
runPromise
({
'coretemp'
:
[[
0
,
90
]]}))
if
__name__
==
'__main__'
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment