Commit 9be1c9a9 authored by Jim Fulton's avatar Jim Fulton

Added a nagios monitor.

parent 14f0257d
......@@ -131,6 +131,7 @@ setup(name="ZEO",
runzeo = ZEO.runzeo:main
zeopasswd = ZEO.zeopasswd:main
zeoctl = ZEO.zeoctl:main
zeo-nagios = ZEO.nagios:main
""",
include_package_data = True,
)
from __future__ import print_function
##############################################################################
#
# Copyright (c) 2011 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""%prog [options] address
Where the address is an IPV6 address of the form: [addr]:port, an IPV4
address of the form: addr:port, or the name of a unix-domain socket file.
"""
import json
import optparse
import os
import re
import socket
import struct
import sys
import time
NO_TRANSACTION = '0'*16
nodiff_names = 'active_txns connections waiting'.split()
diff_names = 'aborts commits conflicts conflicts_resolved loads stores'.split()
per_times = dict(seconds=1.0, minutes=60.0, hours=3600.0, days=86400.0)
def new_metric(metrics, storage_id, name, value):
if storage_id == '1':
label = name
else:
if ' ' in storage_id:
label = "'%s:%s'" % (storage_id, name)
else:
label = "%s:%s" % (storage_id, name)
metrics.append("%s=%s" % (label, value))
def result(messages, metrics=(), status=None):
if metrics:
messages[0] += '|' + metrics[0]
if len(metrics) > 1:
messages.append('| ' + '\n '.join(metrics[1:]))
print('\n'.join(messages))
return status
def error(message):
return result((message, ), (), 2)
def warn(message):
return result((message, ), (), 1)
def check(addr, output_metrics, status, per):
m = re.match(r'\[(\S+)\]:(\d+)$', addr)
if m:
addr = m.group(1), int(m.group(2))
s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
else:
m = re.match(r'(\S+):(\d+)$', addr)
if m:
addr = m.group(1), int(m.group(2))
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
else:
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
s.connect(addr)
except socket.error:
return error("Can't connect %s" % sys.exc_info()[1])
s.send('\x00\x00\x00\x04ruok')
proto = s.recv(struct.unpack(">I", s.recv(4))[0])
datas = s.recv(struct.unpack(">I", s.recv(4))[0])
data = json.loads(datas)
if not data:
return warn("No storages")
metrics = []
messages = []
level = 0
if output_metrics:
for storage_id, sdata in data.items():
for name in nodiff_names:
new_metric(metrics, storage_id, name, sdata[name])
if status:
now = time.time()
if os.path.exists(status):
dt = now - os.stat(status).st_mtime
if dt > 0: # sanity :)
with open(status) as f: # Read previous
old = json.loads(f.read())
dt /= per_times[per]
for storage_id, sdata in data.items():
sdata['sameple-time'] = now
if storage_id in old:
sold = old[storage_id]
for name in diff_names:
v = (sdata[name] - sold[name]) / dt
new_metric(metrics, storage_id, name, v)
with open(status, 'w') as f: # save current
f.write(json.dumps(data))
for storage_id, sdata in data.items():
if sdata['last-transaction'] == NO_TRANSACTION:
messages.append("Empty storage %r" % storage_id)
level = max(level, 1)
if not messages:
messages.append('OK')
return result(messages, metrics, level or None)
def main(args=None):
if args is None:
args = sys.argv[1:]
parser = optparse.OptionParser(__doc__)
parser.add_option(
'-m', '--output-metrics', action="store_true",
help ="Output metrics."
)
parser.add_option(
'-s', '--status-path',
help ="Path to status file, needed to get rate metrics"
)
parser.add_option(
'-u', '--time-units', type='choice', default='minutes',
choices=['seconds', 'minutes', 'hours', 'days'],
help ="Time unit for rate metrics"
)
(options, args) = parser.parse_args(args)
[addr] = args
return check(
addr, options.output_metrics, options.status_path, options.time_units)
if __name__ == '__main__':
main()
ZEO Nagios plugin
=================
ZEO includes a script that provides a nagios monitor plugin:
>>> import pkg_resources, time
>>> nagios = pkg_resources.load_entry_point(
... 'ZEO', 'console_scripts', 'zeo-nagios')
In it's simplest form, the script just checks if it can get status:
>>> import ZEO
>>> addr, stop = ZEO.server('test.fs')
>>> saddr = ':'.join(map(str, addr)) # (host, port) -> host:port
>>> nagios([saddr])
Empty storage u'1'
1
The storage was empty. In that case, the monitor warned as much.
Let's add some data:
>>> ZEO.DB(addr).close()
>>> nagios([saddr])
OK
If we stop the server, we'll error:
>>> stop()
>>> nagios([saddr])
Can't connect [Errno 61] Connection refused
2
Metrics
-------
The monitor will optionally output server metric data. There are 2
kinds of metrics it can output, level and rate metric. If we use the
-m/--output-metrics option, we'll just get rate metrics:
>>> addr, stop = ZEO.server('test.fs')
>>> saddr = ':'.join(map(str, addr)) # (host, port) -> host:port
>>> nagios([saddr, '-m'])
OK|active_txns=0
| connections=0
waiting=0
We only got the metrics that are levels, like current number of
connections. If we want rate metrics, we need to be able to save
values from run to run. We need to use the -s/--status-path option to
specify the name of a file for status information:
>>> nagios([saddr, '-m', '-sstatus'])
OK|active_txns=0
| connections=0
waiting=0
We still didn't get any rate metrics, because we've only run once.
Let's actually do something with the database and then make another
sample.
>>> db = ZEO.DB(addr)
>>> nagios([saddr, '-m', '-sstatus'])
OK|active_txns=0
| connections=1
waiting=0
aborts=0.0
commits=0.0
conflicts=0.0
conflicts_resolved=0.0
loads=81.226297803
stores=0.0
Note that this time, we saw that there was a connection.
The ZEO.nagios module provides a check function that can be used by
other monitors (e.g. that get address data from ZooKeeper). It takes:
- Address string,
- Metrics flag.
- Status file name (or None), and
- Time units for rate metrics
::
>>> import ZEO.nagios
>>> ZEO.nagios.check(saddr, True, 'status', 'seconds')
OK|active_txns=0
| connections=1
waiting=0
aborts=0.0
commits=0.0
conflicts=0.0
conflicts_resolved=0.0
loads=0.0
stores=0.0
>>> db.close()
>>> stop()
Multi-storage servers
---------------------
A ZEO server can host multiple servers. (This is a feature that will
likely be dropped in the future.) When this is the case, the monitor
profixes metrics with a storage id.
>>> addr, stop = ZEO.server(
... storage_conf = """
... <mappingstorage first>
... </mappingstorage>
... <mappingstorage second>
... </mappingstorage>
... """)
>>> saddr = ':'.join(map(str, addr)) # (host, port) -> host:port
>>> nagios([saddr, '-m', '-sstatus'])
Empty storage u'second'|second:active_txns=0
Empty storage u'first'
| second:connections=0
second:waiting=0
first:active_txns=0
first:connections=0
first:waiting=0
1
>>> nagios([saddr, '-m', '-sstatus'])
Empty storage u'second'|second:active_txns=0
Empty storage u'first'
| second:connections=0
second:waiting=0
first:active_txns=0
first:connections=0
first:waiting=0
second:aborts=0.0
second:commits=0.0
second:conflicts=0.0
second:conflicts_resolved=0.0
second:loads=0.0
second:stores=0.0
first:aborts=0.0
first:commits=0.0
first:conflicts=0.0
first:conflicts_resolved=0.0
first:loads=0.0
first:stores=0.0
1
>>> stop()
......@@ -1790,6 +1790,8 @@ def test_suite():
(re.compile("ZODB.POSException.ConflictError"), "ConflictError"),
(re.compile("ZODB.POSException.POSKeyError"), "POSKeyError"),
(re.compile("ZEO.Exceptions.ClientStorageError"), "ClientStorageError"),
(re.compile(r"\[Errno \d+\]"), '[Errno N]'),
(re.compile(r"loads=\d+\.\d+"), 'loads=42.42'),
]
if not PY3:
patterns.append((re.compile("^'(blob[^']*)'"), r"b'\1'"))
......@@ -1813,7 +1815,7 @@ def test_suite():
'zeo-fan-out.test', 'zdoptions.test',
'drop_cache_rather_than_verify.txt', 'client-config.test',
'protocols.test', 'zeo_blob_cache.test', 'invalidation-age.txt',
'dynamic_server_ports.test', 'new_addr.test',
'dynamic_server_ports.test', 'new_addr.test', '../nagios.rst',
setUp=forker.setUp, tearDown=zope.testing.setupstack.tearDown,
checker=renormalizing.RENormalizing(patterns),
globs={'print_function': print_function},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment