Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
slapos.core
Commits
b39fd3d6
Commit
b39fd3d6
authored
Jan 18, 2023
by
Rafael Monnerat
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
slapos_crm: Update SupportRequest_recheckMonitoring
Update script to handle also stalled cases
parent
f21f0465
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
75 additions
and
24 deletions
+75
-24
master/bt5/slapos_crm/SkinTemplateItem/portal_skins/slapos_crm_monitoring/ComputeNode_checkState.py
...tal_skins/slapos_crm_monitoring/ComputeNode_checkState.py
+16
-18
master/bt5/slapos_crm/SkinTemplateItem/portal_skins/slapos_crm_monitoring/SupportRequest_recheckMonitoring.py
...slapos_crm_monitoring/SupportRequest_recheckMonitoring.py
+59
-6
No files found.
master/bt5/slapos_crm/SkinTemplateItem/portal_skins/slapos_crm_monitoring/ComputeNode_checkState.py
View file @
b39fd3d6
...
@@ -54,26 +54,25 @@ if not should_notify:
...
@@ -54,26 +54,25 @@ if not should_notify:
compute_partition_uid_list
=
[
compute_partition_uid_list
=
[
x
.
getUid
()
for
x
in
context
.
contentValues
(
portal_type
=
"Compute Partition"
)
x
.
getUid
()
for
x
in
context
.
contentValues
(
portal_type
=
"Compute Partition"
)
if
x
.
getSlapState
()
==
'busy'
]
if
x
.
getSlapState
()
==
'busy'
]
if
compute_partition_uid_list
:
if
compute_partition_uid_list
:
instance_list
=
portal
.
portal_catalog
(
instance_list
=
portal
.
portal_catalog
(
portal_type
=
'Software Instance'
,
portal_type
=
'Software Instance'
,
default_aggregate_uid
=
compute_partition_uid_list
)
default_aggregate_uid
=
compute_partition_uid_list
)
if
instance_list
:
if
instance_list
:
should_notify
=
True
should_notify
=
True
description
=
"The Compute Node %s (%s) didnt process its instances for more them 24 hours"
%
(
description
=
"The Compute Node %s (%s) didnt process its instances for more them 24 hours, last contact: %s"
compute_node_title
,
reference
)
for
instance
in
instance_list
:
for
instance
in
instance_list
:
instance_access_status
=
instance
.
getAccessStatus
()
instance_access_status
=
instance
.
getAccessStatus
()
if
instance_access_status
.
get
(
'no_data'
,
None
):
if
instance_access_status
.
get
(
'no_data'
,
None
):
# Ignore if there isnt any data
# Ignore if there isnt any data
continue
continue
# At lest one partition contacted in the last 24h30min.
# At lest one partition contacted in the last 24h30min.
last_contact
=
max
(
DateTime
(
instance_access_status
.
get
(
'created_at'
)),
last_contact
)
last_contact
=
max
(
DateTime
(
instance_access_status
.
get
(
'created_at'
)),
last_contact
)
if
(
now
-
DateTime
(
instance_access_status
.
get
(
'created_at'
)))
<
1.0
1
:
if
(
now
-
DateTime
(
instance_access_status
.
get
(
'created_at'
)))
<
1.0
5
:
should_notify
=
False
should_notify
=
False
description
=
""
description
=
""
break
break
...
@@ -91,8 +90,7 @@ if not should_notify:
...
@@ -91,8 +90,7 @@ if not should_notify:
if
software_installation_list
:
if
software_installation_list
:
should_notify
=
True
should_notify
=
True
description
=
"The Compute Node %s (%s) didnt process its software releases for more them 24 hours"
%
(
description
=
"The Compute Node %s (%s) didnt process its software releases for more them 24 hours, last contact %s"
compute_node_title
,
reference
)
# Test if server didnt process the internal softwares releases for more them 24h
# Test if server didnt process the internal softwares releases for more them 24h
for
installation
in
software_installation_list
:
for
installation
in
software_installation_list
:
...
@@ -100,7 +98,7 @@ if not should_notify:
...
@@ -100,7 +98,7 @@ if not should_notify:
if
installation_access_status
.
get
(
'no_data'
,
None
):
if
installation_access_status
.
get
(
'no_data'
,
None
):
# Ignore if there isnt any data on it
# Ignore if there isnt any data on it
continue
continue
last_contact
=
max
(
DateTime
(
installation_access_status
.
get
(
'created_at'
)),
last_contact
)
last_contact
=
max
(
DateTime
(
installation_access_status
.
get
(
'created_at'
)),
last_contact
)
if
(
now
-
DateTime
(
installation_access_status
.
get
(
'created_at'
)))
<
1.01
:
if
(
now
-
DateTime
(
installation_access_status
.
get
(
'created_at'
)))
<
1.01
:
should_notify
=
False
should_notify
=
False
...
@@ -119,33 +117,33 @@ if should_notify:
...
@@ -119,33 +117,33 @@ if should_notify:
if
support_request
is
None
:
if
support_request
is
None
:
person
.
notify
(
support_request_title
=
ticket_title
,
person
.
notify
(
support_request_title
=
ticket_title
,
support_request_description
=
description
,
support_request_description
=
description
%
(
context
.
getTitle
(),
reference
,
last_contact
)
,
aggregate
=
context
.
getRelativeUrl
())
aggregate
=
context
.
getRelativeUrl
())
support_request_relative_url
=
context
.
REQUEST
.
get
(
"support_request_relative_url"
)
support_request_relative_url
=
context
.
REQUEST
.
get
(
"support_request_relative_url"
)
if
support_request_relative_url
is
None
:
if
support_request_relative_url
is
None
:
return
return
support_request
=
portal
.
restrictedTraverse
(
support_request_relative_url
)
support_request
=
portal
.
restrictedTraverse
(
support_request_relative_url
)
if
support_request
is
None
:
if
support_request
is
None
:
return
return
# Send Notification message
# Send Notification message
notification_message
=
portal
.
portal_notifications
.
getDocumentValue
(
notification_message
=
portal
.
portal_notifications
.
getDocumentValue
(
reference
=
notification_message_reference
)
reference
=
notification_message_reference
)
if
notification_message
is
None
:
if
notification_message
is
None
:
message
=
"""%s"""
%
description
message
=
"""%s"""
%
(
description
%
(
context
.
getTitle
(),
reference
,
last_contact
))
else
:
else
:
mapping_dict
=
{
'compute_node_title'
:
context
.
getTitle
(),
mapping_dict
=
{
'compute_node_title'
:
context
.
getTitle
(),
'compute_node_id'
:
reference
,
'compute_node_id'
:
reference
,
'last_contact'
:
last_contact
}
'last_contact'
:
last_contact
}
message
=
notification_message
.
asText
(
message
=
notification_message
.
asText
(
substitution_method_parameter_dict
=
{
'mapping_dict'
:
mapping_dict
})
substitution_method_parameter_dict
=
{
'mapping_dict'
:
mapping_dict
})
event
=
support_request
.
SupportRequest_getLastEvent
(
ticket_title
)
event
=
support_request
.
SupportRequest_getLastEvent
(
ticket_title
)
if
event
is
None
:
if
event
is
None
:
support_request
.
notify
(
message_title
=
ticket_title
,
message
=
message
)
support_request
.
notify
(
message_title
=
ticket_title
,
message
=
message
)
return
support_request
return
support_request
master/bt5/slapos_crm/SkinTemplateItem/portal_skins/slapos_crm_monitoring/SupportRequest_recheckMonitoring.py
View file @
b39fd3d6
#
#
# XXX This ticket contains dupplicated coded found arround SlapOS
# XXX This ticket contains dupplicated coded found arround SlapOS
# It is required to rewrite this in a generic way.
# It is required to rewrite this in a generic way.
# See also: InstanceTree_checkSoftwareInstanceState
# See also: InstanceTree_checkSoftwareInstanceState
# See also: ComputeNode_checkState
# See also: ComputeNode_checkState
#
#
...
@@ -13,6 +13,9 @@ if context.getSimulationState() == "invalidated":
...
@@ -13,6 +13,9 @@ if context.getSimulationState() == "invalidated":
if
context
.
getPortalType
()
!=
"Support Request"
:
if
context
.
getPortalType
()
!=
"Support Request"
:
return
"Not a Support Request"
return
"Not a Support Request"
now
=
DateTime
()
portal
=
context
.
getPortalObject
()
document
=
context
.
getAggregateValue
()
document
=
context
.
getAggregateValue
()
if
document
is
None
:
if
document
is
None
:
return
True
return
True
...
@@ -25,13 +28,63 @@ if aggregate_portal_type == "Compute Node":
...
@@ -25,13 +28,63 @@ if aggregate_portal_type == "Compute Node":
d
=
document
.
getAccessStatus
()
d
=
document
.
getAccessStatus
()
if
d
.
get
(
"no_data"
,
None
)
==
1
:
if
d
.
get
(
"no_data"
,
None
)
==
1
:
return
"No Contact Information"
return
"No Contact Information"
last_contact
=
DateTime
(
d
.
get
(
'created_at'
))
last_contact
=
DateTime
(
d
.
get
(
'created_at'
))
if
(
DateTime
()
-
last_contact
)
<
0.01
:
if
(
now
-
last_contact
)
<
0.01
:
# If server has no partitions skip
compute_partition_uid_list
=
[
x
.
getUid
()
for
x
in
document
.
contentValues
(
portal_type
=
"Compute Partition"
)
if
x
.
getSlapState
()
==
'busy'
]
if
compute_partition_uid_list
:
is_instance_stalled
=
True
last_contact
=
None
instance_list
=
portal
.
portal_catalog
(
portal_type
=
'Software Instance'
,
default_aggregate_uid
=
compute_partition_uid_list
)
for
instance
in
instance_list
:
instance_access_status
=
instance
.
getAccessStatus
()
if
instance_access_status
.
get
(
'no_data'
,
None
):
# Ignore if there isnt any data
continue
# At lest one partition contacted in the last 24h30min.
last_contact
=
max
(
DateTime
(
instance_access_status
.
get
(
'created_at'
)),
last_contact
)
if
(
now
-
DateTime
(
instance_access_status
.
get
(
'created_at'
)))
<
1.05
:
is_instance_stalled
=
False
break
if
is_instance_stalled
and
len
(
instance_list
):
return
"Process instance stalled, last contact was %s"
%
last_contact
# Since server is contacting, check for stalled software releases processes
is_software_stalled
=
True
last_contact
=
None
software_installation_list
=
portal
.
portal_catalog
(
portal_type
=
'Software Installation'
,
default_aggregate_uid
=
document
.
getUid
(),
validation_state
=
'validated'
)
# Test if server didnt process the internal softwares releases for more them 24h
for
installation
in
software_installation_list
:
installation_access_status
=
installation
.
getAccessStatus
()
if
installation_access_status
.
get
(
'no_data'
,
None
):
# Ignore if there isnt any data on it
continue
last_contact
=
max
(
DateTime
(
installation_access_status
.
get
(
'created_at'
)),
last_contact
)
if
(
now
-
DateTime
(
installation_access_status
.
get
(
'created_at'
)))
<
1.01
:
is_software_stalled
=
False
break
if
is_software_stalled
and
len
(
software_installation_list
):
return
"Process instance stalled, last contact was %s"
%
last_contact
return
"All OK, latest contact: %s "
%
last_contact
return
"All OK, latest contact: %s "
%
last_contact
else
:
else
:
return
"Problem, latest contact: %s"
%
last_contact
return
"Problem, latest contact: %s"
%
last_contact
if
aggregate_portal_type
==
"Software Installation"
:
if
aggregate_portal_type
==
"Software Installation"
:
compute_node_title
=
document
.
getAggregateTitle
()
compute_node_title
=
document
.
getAggregateTitle
()
if
document
.
getAggregateValue
().
getMonitorScope
()
==
"disabled"
:
if
document
.
getAggregateValue
().
getMonitorScope
()
==
"disabled"
:
...
@@ -40,11 +93,11 @@ if aggregate_portal_type == "Software Installation":
...
@@ -40,11 +93,11 @@ if aggregate_portal_type == "Software Installation":
if
document
.
getSlapState
()
not
in
[
"start_requested"
,
"stop_requested"
]:
if
document
.
getSlapState
()
not
in
[
"start_requested"
,
"stop_requested"
]:
return
"Software Installation is Destroyed."
return
"Software Installation is Destroyed."
d
=
contex
t
.
getAccessStatus
()
d
=
documen
t
.
getAccessStatus
()
if
d
.
get
(
"no_data"
,
None
)
==
1
:
if
d
.
get
(
"no_data"
,
None
)
==
1
:
return
"The software release %s did not started to build on %s since %s"
%
\
return
"The software release %s did not started to build on %s since %s"
%
\
(
document
.
getUrlString
(),
compute_node_title
,
document
.
getCreationDate
())
(
document
.
getUrlString
(),
compute_node_title
,
document
.
getCreationDate
())
last_contact
=
DateTime
(
d
.
get
(
'created_at'
))
last_contact
=
DateTime
(
d
.
get
(
'created_at'
))
if
d
.
get
(
"text"
).
startswith
(
"building"
):
if
d
.
get
(
"text"
).
startswith
(
"building"
):
return
"The software release %s is building for mode them 12 hours on %s, started on %s"
%
\
return
"The software release %s is building for mode them 12 hours on %s, started on %s"
%
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment