Commit 75fdd631 authored by Vincent Pelletier's avatar Vincent Pelletier

Add optional detailed error analysis.

parent 724784ae
......@@ -33,7 +33,7 @@
# - provide some form of raw data output, not just html
# - allow user to specify min & max dates
from cgi import escape
from collections import defaultdict
from collections import defaultdict, Counter
from datetime import datetime, tzinfo, timedelta
from functools import partial
from operator import itemgetter
......@@ -68,7 +68,10 @@ US_PER_S = 10 ** 6
N_SLOWEST = 20
N_SLOWEST_THRESHOLD = N_SLOWEST * 4
N_ERROR_URL = 10
N_REFERRER_PER_ERROR_URL = 5
ITEMGETTER0 = itemgetter(0)
ITEMGETTER1 = itemgetter(1)
APDEX_TOLERATING_COEF = 4
def statusIsError(status):
......@@ -124,15 +127,17 @@ class APDEXStats(object):
return 0
class GenericSiteStats(object):
def __init__(self, threshold, prefix=1):
def __init__(self, threshold, prefix=1, error_detail=False):
self.threshold = threshold
self.prefix = prefix
self.error_detail = error_detail
self.status = defaultdict(partial(defaultdict, int))
if error_detail:
self.error_url_count = defaultdict(partial(defaultdict, list))
self.slowest_list = [(-1, None, None, None)]
self.apdex = defaultdict(partial(APDEXStats, threshold))
def accumulate(self, match, url_match, date):
self.status[match.group('status')][date] += 1
self.apdex[date].accumulate(match)
duration = int(match.group('duration'))
if url_match is None:
......@@ -145,6 +150,11 @@ class GenericSiteStats(object):
match.group('referer')))
if len(slowest_list) > N_SLOWEST_THRESHOLD:
self._housekeeping()
status = match.group('status')
self.status[status][date] += 1
if self.error_detail and statusIsError(status):
# XXX: can eat memory if there are many errors on many different urls
self.error_url_count[status][url].append(match.group('referer'))
def _housekeeping(self):
slowest_list = self.slowest_list
......@@ -196,7 +206,37 @@ class GenericSiteStats(object):
for date in column_list:
append(hitTd(data_dict[date], status))
append('</tr>')
append('</table><h2>Slowest pages</h2><table><tr><th>duration (s)</th>'
append('</table>')
if self.error_detail:
def getHitForUrl(referer_counter):
return sum(referer_counter.itervalues())
filtered_status_url = defaultdict(partial(defaultdict, dict))
for status, url_dict in self.error_url_count.iteritems():
filtered_status_url[status] = sorted(
((key, Counter(value)) for key, value in url_dict.iteritems()),
key=lambda x: getHitForUrl(x[1]), reverse=True)[:N_ERROR_URL]
append('<h3>Error detail</h3><table><tr><th>status</th><th>hit</th>'
'<th>url</th><th>referers</th></tr>')
for status, url_list in sorted(filtered_status_url.iteritems(),
key=ITEMGETTER0):
append('<tr><th rowspan="%s">%s</th>' % (len(url_list), status))
first_url = True
for url, referer_counter in url_list:
if first_url:
first_url = False
else:
append('<tr>')
append('<td>%s</td><td class="text">%s</td>'
'<td class="text">%s</td>' % (
getHitForUrl(referer_counter),
url,
'<br/>'.join('%i: %s' % (hit, referer) for referer, hit in sorted(
referer_counter.iteritems(), key=ITEMGETTER1, reverse=True
)[:N_REFERRER_PER_ERROR_URL]),
))
append('</tr>')
append('</table>')
append('<h2>Slowest pages</h2><table><tr><th>duration (s)</th>'
'<th>date</th><th>url</th><th>referer</th></tr>')
for duration, timestamp, url, referer in reversed(self.slowest_list):
if timestamp is None:
......@@ -221,8 +261,9 @@ class ERP5SiteStats(GenericSiteStats):
- If a line belongs to a module and has at least 2 slashes after module,
count line as belonging to a document of that module
"""
def __init__(self, threshold, prefix=1):
super(ERP5SiteStats, self).__init__(threshold, prefix=prefix)
def __init__(self, threshold, prefix=1, error_detail=False):
super(ERP5SiteStats, self).__init__(threshold, prefix=prefix,
error_detail=error_detail)
# Key levels:
# - module id (string)
# - is document (bool)
......@@ -355,6 +396,8 @@ def main():
parser.add_argument('-a', '--apdex', default=US_PER_S, type=int,
help='First threshold for Apdex computation, in microseconds. '
'Default: %(default)r')
parser.add_argument('-e', '--error-detail', action='store_true',
help='Include detailed report (url & referers) for error statuses.')
parser.add_argument('-d', '--default',
help='Caption for lines matching no prefix, or skip them if not provided.')
parser.add_argument('--base', dest='site_list', default=[],
......@@ -418,6 +461,7 @@ def main():
infile_list = args.logfile
quiet = args.quiet
threshold = args.apdex
error_detail = args.error_detail
file_count = len(infile_list)
per_site = {}
hit_per_day = defaultdict(int)
......@@ -469,7 +513,8 @@ def main():
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold)
site_data = per_site[site] = action(threshold,
error_detail=error_detail)
site_data.accumulate(match, url_match, date)
all_lines += lineno
end_parsing_time = time.time()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment