Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
slapos.core
Commits
16b2e8b8
Commit
16b2e8b8
authored
Sep 27, 2012
by
Cédric Le Ninivin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Introducing watchdog
parent
676e32b8
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
175 additions
and
21 deletions
+175
-21
setup.py
setup.py
+1
-0
slapos/grid/SlapObject.py
slapos/grid/SlapObject.py
+36
-21
slapos/grid/slapgrid.py
slapos/grid/slapgrid.py
+12
-0
slapos/grid/templates/supervisord.conf.in
slapos/grid/templates/supervisord.conf.in
+4
-0
slapos/grid/watchdog.py
slapos/grid/watchdog.py
+122
-0
No files found.
setup.py
View file @
16b2e8b8
...
...
@@ -59,6 +59,7 @@ setup(name=name,
'slapproxy = slapos.proxy:main'
,
'bang = slapos.bang:main'
,
'slapos = slapos.entry:main'
,
'watchdog = slapos.grid.watchdog:main'
,
]
},
test_suite
=
"slapos.tests"
,
...
...
slapos/grid/SlapObject.py
View file @
16b2e8b8
...
...
@@ -43,6 +43,7 @@ from exception import BuildoutFailedError, WrongPermissionError, \
PathDoesNotExistError
from
networkcache
import
download_network_cached
,
upload_network_cached
import
tarfile
from
watchdog
import
getWatchdogID
REQUIRED_COMPUTER_PARTITION_PERMISSION
=
'0750'
...
...
@@ -237,6 +238,7 @@ class Partition(object):
self
.
software_path
=
software_path
self
.
instance_path
=
instance_path
self
.
run_path
=
os
.
path
.
join
(
self
.
instance_path
,
'etc'
,
'run'
)
self
.
service_path
=
os
.
path
.
join
(
self
.
instance_path
,
'etc'
,
'service'
)
self
.
supervisord_partition_configuration_path
=
\
supervisord_partition_configuration_path
self
.
supervisord_socket
=
supervisord_socket
...
...
@@ -276,6 +278,26 @@ class Partition(object):
gid
=
stat_info
.
st_gid
return
(
uid
,
gid
)
def
addServiceToGroup
(
self
,
partition_id
,
runner_list
,
path
,
extension
=
''
):
uid
,
gid
=
self
.
getUserGroupId
()
program_partition_template
=
pkg_resources
.
resource_stream
(
__name__
,
'templates/program_partition_supervisord.conf.in'
).
read
()
for
runner
in
runner_list
:
self
.
partition_supervisor_configuration
+=
'
\
n
'
+
\
program_partition_template
%
dict
(
program_id
=
'_'
.
join
([
partition_id
,
runner
]),
program_directory
=
self
.
instance_path
,
program_command
=
os
.
path
.
join
(
path
,
runner
),
program_name
=
runner
+
extension
,
instance_path
=
self
.
instance_path
,
user_id
=
uid
,
group_id
=
gid
,
# As supervisord has no environment to inherit setup minimalistic one
HOME
=
pwd
.
getpwuid
(
uid
).
pw_dir
,
USER
=
pwd
.
getpwuid
(
uid
).
pw_name
,
)
def
install
(
self
):
""" Creates configuration file from template in software_path, then
installs the software partition with the help of buildout
...
...
@@ -384,42 +406,35 @@ class Partition(object):
self
.
logger
.
info
(
"Generating supervisord config file from template..."
)
# check if CP/etc/run exists and it is a directory
# iterate over each file in CP/etc/run
# iterate over each file in CP/etc/service adding WatchdogID to their name
# if at least one is not 0750 raise -- partition has something funny
runner_list
=
[]
service_list
=
[]
if
os
.
path
.
exists
(
self
.
run_path
):
if
os
.
path
.
isdir
(
self
.
run_path
):
runner_list
=
os
.
listdir
(
self
.
run_path
)
if
len
(
runner_list
)
==
0
:
self
.
logger
.
warning
(
'No runners found for partition %r'
%
if
os
.
path
.
exists
(
self
.
service_path
):
if
os
.
path
.
isdir
(
self
.
service_path
):
service_list
=
os
.
listdir
(
self
.
service_path
)
if
len
(
runner_list
)
==
0
and
len
(
service_list
)
==
0
:
self
.
logger
.
warning
(
'No runners nor services found for partition %r'
%
self
.
partition_id
)
if
os
.
path
.
exists
(
self
.
supervisord_partition_configuration_path
):
os
.
unlink
(
self
.
supervisord_partition_configuration_path
)
else
:
partition_id
=
self
.
computer_partition
.
getId
()
program_partition_template
=
pkg_resources
.
resource_stream
(
__name__
,
'templates/program_partition_supervisord.conf.in'
).
read
()
group_partition_template
=
pkg_resources
.
resource_stream
(
__name__
,
'templates/group_partition_supervisord.conf.in'
).
read
()
partition_supervisor_configuration
=
group_partition_template
%
dict
(
self
.
partition_supervisor_configuration
=
group_partition_template
%
dict
(
instance_id
=
partition_id
,
program_list
=
','
.
join
([
'_'
.
join
([
partition_id
,
runner
])
for
runner
in
runner_list
]))
for
runner
in
runner_list
:
partition_supervisor_configuration
+=
'
\
n
'
+
\
program_partition_template
%
dict
(
program_id
=
'_'
.
join
([
partition_id
,
runner
]),
program_directory
=
self
.
instance_path
,
program_command
=
os
.
path
.
join
(
self
.
run_path
,
runner
),
program_name
=
runner
,
instance_path
=
self
.
instance_path
,
user_id
=
uid
,
group_id
=
gid
,
# As supervisord has no environment to inherit setup minimalistic one
HOME
=
pwd
.
getpwuid
(
uid
).
pw_dir
,
USER
=
pwd
.
getpwuid
(
uid
).
pw_name
,
)
for
runner
in
runner_list
+
service_list
]))
# Same method to add to service and run
self
.
addServiceToGroup
(
partition_id
,
runner_list
,
self
.
run_path
)
self
.
addServiceToGroup
(
partition_id
,
service_list
,
self
.
service_path
,
extension
=
getWatchdogID
())
utils
.
updateFile
(
self
.
supervisord_partition_configuration_path
,
partition_supervisor_configuration
)
self
.
partition_supervisor_configuration
)
self
.
updateSupervisor
()
def
start
(
self
):
...
...
slapos/grid/slapgrid.py
View file @
16b2e8b8
...
...
@@ -451,6 +451,17 @@ class Slapgrid(object):
computer_partition_filter_list
.
split
(
","
)
self
.
maximum_periodicity
=
maximum_periodicity
self
.
force_periodicity
=
force_periodicity
# XXX hardcoded watchdog_path
self
.
watchdog_path
=
'/opt/slapos/bin/watchdog'
def
getWatchdogLine
(
self
):
invocation_list
=
[
self
.
watchdog_path
]
invocation_list
.
append
(
"--master-url '%s' "
%
self
.
master_url
)
if
self
.
key_file
is
not
None
and
self
.
cert_file
is
not
None
:
invocation_list
.
append
(
"--cert-file %s"
%
self
.
cert_file
)
invocation_list
.
append
(
"--key-file %s"
%
self
.
key_file
)
invocation_list
.
append
(
"--computer-id '%s'"
%
self
.
computer_id
)
return
' '
.
join
(
invocation_list
)
def
checkEnvironmentAndCreateStructure
(
self
):
"""Checks for software_root and instance_root existence, then creates
...
...
@@ -486,6 +497,7 @@ class Slapgrid(object):
supervisord_pidfile
=
os
.
path
.
abspath
(
os
.
path
.
join
(
self
.
instance_root
,
'var'
,
'run'
,
'supervisord.pid'
)),
supervisord_logfile_backups
=
'10'
,
watchdog_command
=
self
.
getWatchdogLine
(),
))
except
(
WrongPermissionError
,
PathDoesNotExistError
)
as
error
:
raise
error
...
...
slapos/grid/templates/supervisord.conf.in
View file @
16b2e8b8
...
...
@@ -18,3 +18,7 @@ logfile-backups = %(supervisord_logfile_backups)s
[unix_http_server]
file=%(supervisord_socket)s
chmod=0700
[eventlistener:watchdog]
command=%(watchdog_command)s
events=PROCESS_STATE_EXITED, PROCESS_STATE_FATAL
slapos/grid/watchdog.py
0 → 100644
View file @
16b2e8b8
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2012 Vifib SARL and Contributors.
# All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import
argparse
import
slapos.slap.slap
import
subprocess
from
supervisor
import
childutils
import
sys
def
getWatchdogID
():
return
"-on-watch"
def
parseArgumentTuple
():
"""Parses arguments either from command line, from method parameters or from
config file. Then returns a new instance of slapgrid.Slapgrid with those
parameters. Also returns the options dict and unused variable list, and
configures logger.
"""
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--master-url"
,
help
=
"The master server URL. Mandatory."
,
required
=
True
)
parser
.
add_argument
(
"--computer-id"
,
help
=
"The computer id defined in the server."
,
required
=
True
)
parser
.
add_argument
(
"--key-file"
,
help
=
"SSL Authorisation key file."
,
default
=
None
)
parser
.
add_argument
(
"--cert-file"
,
help
=
"SSL Authorisation certificate file."
,
default
=
None
)
option
=
parser
.
parse_args
()
# Build option_dict
option_dict
=
{}
for
argument_key
,
argument_value
in
vars
(
option
).
iteritems
():
option_dict
.
update
({
argument_key
:
argument_value
})
return
option_dict
class
Watchdog
():
process_state_events
=
[
'PROCESS_STATE_EXITED'
,
'PROCESS_STATE_FATAL'
]
def
__init__
(
self
,
option_dict
):
for
option
,
value
in
option_dict
.
items
():
setattr
(
self
,
option
,
value
)
self
.
stdin
=
sys
.
stdin
self
.
stdout
=
sys
.
stdout
self
.
stderr
=
sys
.
stderr
self
.
slap
=
slapos
.
slap
.
slap
()
self
.
slap
.
initializeConnection
(
slapgrid_uri
=
self
.
master_url
,
key_file
=
self
.
key_file
,
cert_file
=
self
.
cert_file
)
def
write_stdout
(
self
,
s
):
self
.
stdout
.
write
(
s
)
self
.
stdout
.
flush
()
def
write_stderr
(
self
,
s
):
self
.
stderr
.
write
(
s
)
self
.
stderr
.
flush
()
def
run
(
self
):
while
1
:
self
.
write_stdout
(
'READY
\
n
'
)
line
=
self
.
stdin
.
readline
()
# read header line from stdin
headers
=
dict
([
x
.
split
(
':'
)
for
x
in
line
.
split
()
])
data
=
sys
.
stdin
.
read
(
int
(
headers
[
'len'
]))
# read the event payload
self
.
handle_event
(
headers
,
data
)
self
.
write_stdout
(
'RESULT 2
\
n
OK'
)
# transition from READY to ACKNOWLEDGED
def
handle_event
(
self
,
headers
,
payload
):
if
headers
[
'eventname'
]
in
self
.
process_state_events
:
payload_dict
=
dict
([
x
.
split
(
':'
)
for
x
in
payload
.
split
()
])
if
getWatchdogID
()
in
payload_dict
[
'processname'
]:
self
.
handle_process_state_change_event
(
headers
,
payload_dict
)
def
handle_process_state_change_event
(
self
,
headers
,
payload_dict
):
partition_id
=
payload_dict
[
'groupname'
]
partition
=
slapos
.
slap
.
ComputerPartition
(
computer_id
=
self
.
computer_id
,
connection_helper
=
self
.
slap
.
_connection_helper
,
partition_id
=
partition_id
)
partition
.
bang
(
"%s process in partition %s encountered a problem"
%
(
payload_dict
[
'processname'
],
partition_id
))
def
main
():
watchdog
=
Watchdog
(
parseArgumentTuple
())
watchdog
.
run
()
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment