From 75fdd631b6aced60fedd5975d417f3b06dccb97a Mon Sep 17 00:00:00 2001 From: Vincent Pelletier <vincent@nexedi.com> Date: Wed, 3 Apr 2013 14:36:40 +0200 Subject: [PATCH] Add optional detailed error analysis. --- apachedex/__init__.py | 59 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/apachedex/__init__.py b/apachedex/__init__.py index 70f7e61..e7de674 100755 --- a/apachedex/__init__.py +++ b/apachedex/__init__.py @@ -33,7 +33,7 @@ # - provide some form of raw data output, not just html # - allow user to specify min & max dates from cgi import escape -from collections import defaultdict +from collections import defaultdict, Counter from datetime import datetime, tzinfo, timedelta from functools import partial from operator import itemgetter @@ -68,7 +68,10 @@ US_PER_S = 10 ** 6 N_SLOWEST = 20 N_SLOWEST_THRESHOLD = N_SLOWEST * 4 +N_ERROR_URL = 10 +N_REFERRER_PER_ERROR_URL = 5 ITEMGETTER0 = itemgetter(0) +ITEMGETTER1 = itemgetter(1) APDEX_TOLERATING_COEF = 4 def statusIsError(status): @@ -124,15 +127,17 @@ class APDEXStats(object): return 0 class GenericSiteStats(object): - def __init__(self, threshold, prefix=1): + def __init__(self, threshold, prefix=1, error_detail=False): self.threshold = threshold self.prefix = prefix + self.error_detail = error_detail self.status = defaultdict(partial(defaultdict, int)) + if error_detail: + self.error_url_count = defaultdict(partial(defaultdict, list)) self.slowest_list = [(-1, None, None, None)] self.apdex = defaultdict(partial(APDEXStats, threshold)) def accumulate(self, match, url_match, date): - self.status[match.group('status')][date] += 1 self.apdex[date].accumulate(match) duration = int(match.group('duration')) if url_match is None: @@ -145,6 +150,11 @@ class GenericSiteStats(object): match.group('referer'))) if len(slowest_list) > N_SLOWEST_THRESHOLD: self._housekeeping() + status = match.group('status') + self.status[status][date] += 1 + if self.error_detail and statusIsError(status): + # XXX: can eat memory if there are many errors on many different urls + self.error_url_count[status][url].append(match.group('referer')) def _housekeeping(self): slowest_list = self.slowest_list @@ -196,7 +206,37 @@ class GenericSiteStats(object): for date in column_list: append(hitTd(data_dict[date], status)) append('</tr>') - append('</table><h2>Slowest pages</h2><table><tr><th>duration (s)</th>' + append('</table>') + if self.error_detail: + def getHitForUrl(referer_counter): + return sum(referer_counter.itervalues()) + filtered_status_url = defaultdict(partial(defaultdict, dict)) + for status, url_dict in self.error_url_count.iteritems(): + filtered_status_url[status] = sorted( + ((key, Counter(value)) for key, value in url_dict.iteritems()), + key=lambda x: getHitForUrl(x[1]), reverse=True)[:N_ERROR_URL] + append('<h3>Error detail</h3><table><tr><th>status</th><th>hit</th>' + '<th>url</th><th>referers</th></tr>') + for status, url_list in sorted(filtered_status_url.iteritems(), + key=ITEMGETTER0): + append('<tr><th rowspan="%s">%s</th>' % (len(url_list), status)) + first_url = True + for url, referer_counter in url_list: + if first_url: + first_url = False + else: + append('<tr>') + append('<td>%s</td><td class="text">%s</td>' + '<td class="text">%s</td>' % ( + getHitForUrl(referer_counter), + url, + '<br/>'.join('%i: %s' % (hit, referer) for referer, hit in sorted( + referer_counter.iteritems(), key=ITEMGETTER1, reverse=True + )[:N_REFERRER_PER_ERROR_URL]), + )) + append('</tr>') + append('</table>') + append('<h2>Slowest pages</h2><table><tr><th>duration (s)</th>' '<th>date</th><th>url</th><th>referer</th></tr>') for duration, timestamp, url, referer in reversed(self.slowest_list): if timestamp is None: @@ -221,8 +261,9 @@ class ERP5SiteStats(GenericSiteStats): - If a line belongs to a module and has at least 2 slashes after module, count line as belonging to a document of that module """ - def __init__(self, threshold, prefix=1): - super(ERP5SiteStats, self).__init__(threshold, prefix=prefix) + def __init__(self, threshold, prefix=1, error_detail=False): + super(ERP5SiteStats, self).__init__(threshold, prefix=prefix, + error_detail=error_detail) # Key levels: # - module id (string) # - is document (bool) @@ -355,6 +396,8 @@ def main(): parser.add_argument('-a', '--apdex', default=US_PER_S, type=int, help='First threshold for Apdex computation, in microseconds. ' 'Default: %(default)r') + parser.add_argument('-e', '--error-detail', action='store_true', + help='Include detailed report (url & referers) for error statuses.') parser.add_argument('-d', '--default', help='Caption for lines matching no prefix, or skip them if not provided.') parser.add_argument('--base', dest='site_list', default=[], @@ -418,6 +461,7 @@ def main(): infile_list = args.logfile quiet = args.quiet threshold = args.apdex + error_detail = args.error_detail file_count = len(infile_list) per_site = {} hit_per_day = defaultdict(int) @@ -469,7 +513,8 @@ def main(): try: site_data = per_site[site] except KeyError: - site_data = per_site[site] = action(threshold) + site_data = per_site[site] = action(threshold, + error_detail=error_detail) site_data.accumulate(match, url_match, date) all_lines += lineno end_parsing_time = time.time() -- 2.30.9