Commit ba2153ff authored by Hanno Schlichting's avatar Hanno Schlichting

Merged andig-catalog-report branch

parents 0f5f1f07 a0db671f
...@@ -28,6 +28,7 @@ from BTrees.OIBTree import OIBTree ...@@ -28,6 +28,7 @@ from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree from BTrees.IOBTree import IOBTree
from Lazy import LazyMap, LazyCat, LazyValues from Lazy import LazyMap, LazyCat, LazyValues
from CatalogBrains import AbstractCatalogBrain, NoBrainer from CatalogBrains import AbstractCatalogBrain, NoBrainer
from .report import CatalogReport
LOG = logging.getLogger('Zope.ZCatalog') LOG = logging.getLogger('Zope.ZCatalog')
...@@ -468,12 +469,18 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -468,12 +469,18 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
# Note that if the indexes find query arguments, but the end result # Note that if the indexes find query arguments, but the end result
# is an empty sequence, we do nothing # is an empty sequence, we do nothing
cr = self.getCatalogReport(request)
cr.start()
for i in self.indexes.keys(): for i in self.indexes.keys():
index = self.getIndex(i) index = self.getIndex(i)
_apply_index = getattr(index, "_apply_index", None) _apply_index = getattr(index, "_apply_index", None)
if _apply_index is None: if _apply_index is None:
continue continue
cr.split(i)
r = _apply_index(request) r = _apply_index(request)
cr.split(i)
if r is not None: if r is not None:
r, u = r r, u = r
...@@ -481,6 +488,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -481,6 +488,8 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
if not rs: if not rs:
break break
cr.stop()
if rs is None: if rs is None:
# None of the indexes found anything to do with the request # None of the indexes found anything to do with the request
# We take this to mean that the query was empty (an empty filter) # We take this to mean that the query was empty (an empty filter)
...@@ -738,6 +747,13 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base): ...@@ -738,6 +747,13 @@ class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
__call__ = searchResults __call__ = searchResults
def getCatalogReport(self, request=None):
"""Reports about the duration of queries.
"""
parent = Acquisition.aq_base(Acquisition.aq_parent(self))
threshold = getattr(parent, 'long_query_time', 0.1)
return CatalogReport(self, request, threshold)
class CatalogSearchArgumentsMap: class CatalogSearchArgumentsMap:
"""Multimap catalog arguments coming simultaneously from keywords """Multimap catalog arguments coming simultaneously from keywords
......
...@@ -16,6 +16,7 @@ $Id$ ...@@ -16,6 +16,7 @@ $Id$
""" """
import logging import logging
import operator
import sys import sys
import string import string
import time import time
...@@ -109,6 +110,9 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -109,6 +110,9 @@ class ZCatalog(Folder, Persistent, Implicit):
'action': 'manage_catalogFind'}, 'action': 'manage_catalogFind'},
{'label': 'Advanced', # TAB: Advanced {'label': 'Advanced', # TAB: Advanced
'action': 'manage_catalogAdvanced'}, 'action': 'manage_catalogAdvanced'},
{'label': 'Query Report',
'action': 'manage_catalogReport',
},
{'label': 'Undo', # TAB: Undo {'label': 'Undo', # TAB: Undo
'action': 'manage_UndoForm'}, 'action': 'manage_UndoForm'},
{'label': 'Security', # TAB: Security {'label': 'Security', # TAB: Security
...@@ -139,6 +143,11 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -139,6 +143,11 @@ class ZCatalog(Folder, Persistent, Implicit):
'manage_catalogAdvanced') 'manage_catalogAdvanced')
manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals()) manage_catalogAdvanced = DTMLFile('dtml/catalogAdvanced', globals())
security.declareProtected(manage_zcatalog_entries,
'manage_catalogReport')
manage_catalogReport = DTMLFile('dtml/catalogReport',
globals())
security.declareProtected(manage_zcatalog_entries, security.declareProtected(manage_zcatalog_entries,
'manage_objectInformation') 'manage_objectInformation')
manage_objectInformation = DTMLFile('dtml/catalogObjectInformation', manage_objectInformation = DTMLFile('dtml/catalogObjectInformation',
...@@ -147,6 +156,8 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -147,6 +156,8 @@ class ZCatalog(Folder, Persistent, Implicit):
Indexes = ZCatalogIndexes() Indexes = ZCatalogIndexes()
threshold=10000 threshold=10000
long_query_time = 0.1
_v_total=0 _v_total=0
_v_transaction = None _v_transaction = None
...@@ -170,6 +181,8 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -170,6 +181,8 @@ class ZCatalog(Folder, Persistent, Implicit):
self._catalog = Catalog() self._catalog = Catalog()
self._migrated_280 = True self._migrated_280 = True
self.long_query_time = 0.1 # in seconds
def __len__(self): def __len__(self):
# Perform a migration of _catalog.__len__ to _catalog._length # Perform a migration of _catalog.__len__ to _catalog._length
# to avoid with new-style class caching issues (see #1332) # to avoid with new-style class caching issues (see #1332)
...@@ -966,6 +979,40 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -966,6 +979,40 @@ class ZCatalog(Folder, Persistent, Implicit):
def delColumn(self, name): def delColumn(self, name):
return self._catalog.delColumn(name) return self._catalog.delColumn(name)
#
# Catalog report methods
#
security.declareProtected(manage_zcatalog_entries, 'getCatalogReport')
def getCatalogReport(self):
""" Reports about the duration of queries """
rval = self._catalog.getCatalogReport().report()
rval.sort(key=operator.itemgetter('duration'), reverse=True)
return rval
security.declareProtected(manage_zcatalog_entries,
'manage_resetCatalogReport')
def manage_resetCatalogReport(self, REQUEST=None):
""" resets the catalog report """
self._catalog.getCatalogReport().reset()
if REQUEST is not None:
REQUEST.response.redirect(REQUEST.URL1 +
'/manage_catalogReport?manage_tabs_message=Report%20cleared')
security.declareProtected(manage_zcatalog_entries,
'manage_editCatalogReport')
def manage_editCatalogReport(self, long_query_time=0.1, REQUEST=None):
""" edit the long query time """
if not isinstance(long_query_time, float):
long_query_time = float(long_query_time)
self.long_query_time = long_query_time
if REQUEST is not None:
REQUEST.response.redirect(REQUEST.URL1 +
'/manage_catalogReport?manage_tabs_message=' +
'Long%20query%20time%20changed')
InitializeClass(ZCatalog) InitializeClass(ZCatalog)
......
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
The <strong>query report</strong> shows catalog queries that
perform slowly.
</p>
<table width="100%" cellspacing="0" cellpadding="2" border="0">
<tr class="list-header" >
<td align="left" valign="top">
<div class="list-nav">
Mean duration&nbsp;[ms]
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Hits
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Query key
</div>
</td>
<td align="left" valign="top">
<div class="list-nav">
Recent duration&nbsp;[ms]
</div>
</td>
</tr>
<dtml-if getCatalogReport>
<dtml-in getCatalogReport mapping>
<dtml-if sequence-odd>
<tr class="row-normal">
<dtml-else>
<tr class="row-hilite">
</dtml-if>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % duration">
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-counter;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
&dtml-query;
</div>
</td>
<td align="left" valign="top">
<div class="list-item">
<dtml-var expr="'%3.2f' % last['duration']">
[<dtml-in expr="last['details']" sort mapping>
&dtml-id;:&nbsp;<dtml-var expr="'%3.2f' % duration">
</dtml-in>]
</div>
</td>
</tr>
</dtml-in>
<tr>
<td colspan="2" align="left" valign="top">
<p class="form-help">Resetting the catalog report will reinitialize the report log.</p>
</td>
<td colspan="2" align="right" valign="top">
<form action="manage_resetCatalogReport" method=POST>
<div class="form-element">
<input class="form-element" type="submit" value="Reset Report">
</div>
</form>
</td>
</tr>
<dtml-else>
<tr>
<td colspan="4" >
<div class="list-item">
Report is empty.
</div>
</td>
</tr>
</dtml-if>
</table>
<form action="manage_editCatalogReport" method="post">
<table width="100%" style="padding-top:1em;" cellspacing="0" cellpadding="2" border="0">
<tr class="section-bar">
<td colspan="3" align="left">
<div class="form-label">
Settings
</div>
</td>
</tr>
<tr>
<td align="right" valign="middle">
<div class="list-item">
Threshold (in seconds)
</div>
</td>
<td align="left" valign="middle">
<div class="form-element">
<input name="long_query_time:float" value="&dtml-long_query_time;" />
</div>
</td>
<td align="left" valign="middle">
<p class="form-help">Only queries whose execution
takes longer than the configured threshold are considered
being slow. (Default 0.1 seconds).</p>
</tr>
</table>
<input class="form-element" type="submit" value="Apply settings">
</form>
<dtml-var manage_page_footer>
##############################################################################
#
# Copyright (c) 2010 Zope Foundation and Contributors.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import time
from thread import allocate_lock
from Acquisition import aq_base
from Acquisition import aq_parent
from Products.PluginIndexes.interfaces import IUniqueValueIndex
reports_lock = allocate_lock()
reports = {}
value_indexes_lock = allocate_lock()
value_indexes = frozenset()
MAX_DISTINCT_VALUES = 10
def determine_value_indexes(indexes):
# This function determines all indexes whose values should be respected
# in the report key. The number of unique values for the index needs to be
# lower than the MAX_DISTINCT_VALUES watermark.
# TODO: Ideally who would only consider those indexes with a small number
# of unique values, where the number of items for each value differs a
# lot. If the number of items per value is similar, the duration of a
# query is likely similar as well.
global value_indexes
if value_indexes:
# Calculating all the value indexes is quite slow, so we do this once
# for the first query. Since this is an optimization only, slightly
# outdated results based on index changes in the running process
# can be ignored.
return value_indexes
new_value_indexes = set()
for name, index in indexes.items():
if IUniqueValueIndex.providedBy(index):
values = index.uniqueValues()
if values and len(values) < MAX_DISTINCT_VALUES:
# Only consider indexes which actually return a number
# greater than zero
new_value_indexes.add(name)
try:
value_indexes_lock.acquire()
value_indexes = frozenset(new_value_indexes)
finally:
value_indexes_lock.release()
return value_indexes
def clear_value_indexes():
global value_indexes
try:
value_indexes_lock.acquire()
value_indexes = frozenset()
finally:
value_indexes_lock.release()
from zope.testing.cleanup import addCleanUp
addCleanUp(clear_value_indexes)
del addCleanUp
def make_query(indexes, request):
# This is a bit of a mess, but the ZCatalog API supports passing
# in query restrictions in almost arbitary ways
if isinstance(request, dict):
query = request.copy()
else:
query = {}
query.update(request.keywords)
real_req = request.request
if isinstance(real_req, dict):
query.update(real_req)
known_keys = query.keys()
# The request has too many places where an index restriction might be
# specified. Putting all of request.form, request.other, ... into the
# key isn't what we want either, so we iterate over all known indexes
# instead and see if they are in the request.
for iid in indexes.keys():
if iid in known_keys:
continue
value = real_req.get(iid)
if value:
query[iid] = value
return query
def make_key(catalog, request):
indexes = catalog.indexes
valueindexes = determine_value_indexes(indexes)
query = make_query(indexes, request)
key = keys = query.keys()
values = [name for name in keys if name in valueindexes]
if values:
# If we have indexes whose values should be considered, we first
# preserve all normal indexes and then add the keys whose values
# matter including their value into the key
key = [name for name in keys if name not in values]
for name in values:
v = query.get(name, [])
if isinstance(v, (tuple, list)):
v = list(v)
v.sort()
# We need to make sure the key is immutable, repr() is an easy way
# to do this without imposing restrictions on the types of values
key.append((name, repr(v)))
key = tuple(sorted(key))
return key
class StopWatch(object):
""" Simple stopwatch class """
def __init__(self):
self.init()
def init(self):
self.res = []
self.start_time = None
self.interim = {}
self.stop_time = None
def start(self):
self.init()
self.start_time = time.time()
def split(self, label):
current = time.time()
start_time, stop_time = self.interim.get(label, (None, None))
if start_time is None:
self.interim[label] = (current, None)
return
self.interim[label] = (start_time, current)
self.res.append((label, current - start_time))
def stop(self):
self.end_time = time.time()
def result(self):
return (self.end_time - self.start_time, tuple(self.res))
class CatalogReport(StopWatch):
"""Catalog report class to meassure and identify catalog queries.
"""
def __init__(self, catalog, request=None, threshold=0.1):
super(CatalogReport, self).__init__()
self.catalog = catalog
self.request = request
self.threshold = threshold
parent = aq_parent(catalog)
path = getattr(aq_base(parent), 'getPhysicalPath', None)
if path is None:
path = ('', 'NonPersistentCatalog')
else:
path = tuple(parent.getPhysicalPath())
self.cid = path
def stop(self):
super(CatalogReport, self).stop()
self.log()
def log(self):
# result of stopwatch
res = self.result()
if res[0] < self.threshold:
return
# The key calculation takes a bit itself, we want to avoid that for
# any fast queries. This does mean that slow queries get the key
# calculation overhead added to their runtime.
key = make_key(self.catalog, self.request)
reports_lock.acquire()
try:
if self.cid not in reports:
reports[self.cid] = {}
previous = reports[self.cid].get(key)
if previous:
counter, mean, last = previous
mean = (mean * counter + res[0]) / float(counter + 1)
reports[self.cid][key] = (counter + 1, mean, res)
else:
reports[self.cid][key] = (1, res[0], res)
finally:
reports_lock.release()
def reset(self):
reports_lock.acquire()
try:
reports[self.cid] = {}
finally:
reports_lock.release()
def report(self):
"""Returns a statistic report of catalog queries as list of dicts as
follows:
[{'query': <query_key>,
'counter': <hits>
'duration': <mean duration>,
'last': <details of recent query>,
},
...
]
<details of recent query> := {'duration': <duration of last query>,
'details': <duration of single indexes>,
}
<duration of single indexes> := [{'id': <index_name1>,
'duration': <duration>,
},
...
]
The duration is provided in millisecond.
"""
rval = []
for k, v in reports.get(self.cid, {}).items():
info = {
'query': k,
'counter': v[0],
'duration': v[1] * 1000,
'last': {'duration': v[2][0] * 1000,
'details': [dict(id=i[0],
duration=i[1]*1000)
for i in v[2][1]],
},
}
rval.append(info)
return rval
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import unittest
class dummy(object):
def __init__(self, num):
self.num = num
def big(self):
return self.num > 5
def numbers(self):
return (self.num, self.num + 1)
class TestCatalogReport(unittest.TestCase):
def setUp(self):
from Products.ZCatalog.ZCatalog import ZCatalog
self.zcat = ZCatalog('catalog')
self.zcat.long_query_time = 0.0
self.zcat.addIndex('num', 'FieldIndex')
self.zcat.addIndex('big', 'FieldIndex')
self.zcat.addIndex('numbers', 'KeywordIndex')
for i in range(9):
obj = dummy(i)
self.zcat.catalog_object(obj, str(i))
def tearDown(self):
from Products.ZCatalog.report import clear_value_indexes
clear_value_indexes()
def test_ReportLength(self):
""" tests the report aggregation """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(numbers=4,sort_on='num')
self.zcat.searchResults(numbers=1,sort_on='num')
self.zcat.searchResults(numbers=3,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=False,sort_on='num')
self.zcat.searchResults(num=[5,4,3],sort_on='num')
self.zcat.searchResults(num=(3,4,5),sort_on='num')
self.assertEqual(4, len(self.zcat.getCatalogReport()))
def test_ReportCounter(self):
""" tests the counter of equal queries """
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(numbers=5,sort_on='num')
self.zcat.searchResults(numbers=6,sort_on='num')
self.zcat.searchResults(numbers=8,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['counter'],3)
def test_ReportKey(self):
""" tests the query keys for uniqueness """
# query key 1
key = ('sort_on', ('big', 'True'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=True,sort_on='num')
self.zcat.searchResults(big=True,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'],2)
# query key 2
key = ('sort_on', ('big', 'False'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(big=False,sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'],key)
self.assertEqual(r['counter'], 1)
# query key 3
key = ('sort_on', ('num', '[3, 4, 5]'))
self.zcat.manage_resetCatalogReport()
self.zcat.searchResults(num=[5,4,3], sort_on='num')
self.zcat.searchResults(num=(3,4,5), sort_on='num')
r = self.zcat.getCatalogReport()[0]
self.assertEqual(r['query'], key)
self.assertEqual(r['counter'], 2)
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCatalogReport))
return suite
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment