Commit 0ec73650 authored by Vincent Pelletier's avatar Vincent Pelletier

Add support for %T (second-granularity duration).

Also, error-out when no duration is available.
parent de2477a6
...@@ -105,7 +105,7 @@ APDEX_TABLE_HEADERS = ''.join('<th>' + x + '</th>' for x in ( ...@@ -105,7 +105,7 @@ APDEX_TABLE_HEADERS = ''.join('<th>' + x + '</th>' for x in (
'apdex', 'hits', 'avg (s)', 'max (s)')) 'apdex', 'hits', 'avg (s)', 'max (s)'))
class APDEXStats(object): class APDEXStats(object):
def __init__(self, threshold): def __init__(self, threshold, getDuration):
threshold *= US_PER_S threshold *= US_PER_S
self.threshold = threshold self.threshold = threshold
self.threshold4 = threshold * APDEX_TOLERATING_COEF self.threshold4 = threshold * APDEX_TOLERATING_COEF
...@@ -114,9 +114,10 @@ class APDEXStats(object): ...@@ -114,9 +114,10 @@ class APDEXStats(object):
self.hit = 0 self.hit = 0
self.duration_total = 0 self.duration_total = 0
self.duration_max = 0 self.duration_max = 0
self.getDuration = getDuration
def accumulate(self, match): def accumulate(self, match):
duration = int(match.group('duration')) duration = self.getDuration(match)
self.duration_total += duration self.duration_total += duration
self.duration_max = max(self.duration_max, duration) self.duration_max = max(self.duration_max, duration)
if not statusIsError(match.group('status')): if not statusIsError(match.group('status')):
...@@ -146,19 +147,20 @@ class APDEXStats(object): ...@@ -146,19 +147,20 @@ class APDEXStats(object):
return float(self.duration_max) / US_PER_S return float(self.duration_max) / US_PER_S
class GenericSiteStats(object): class GenericSiteStats(object):
def __init__(self, threshold, prefix=1, error_detail=False): def __init__(self, threshold, getDuration, prefix=1, error_detail=False):
self.threshold = threshold self.threshold = threshold
self.prefix = prefix self.prefix = prefix
self.error_detail = error_detail self.error_detail = error_detail
self.getDuration = getDuration
self.status = defaultdict(partial(defaultdict, int)) self.status = defaultdict(partial(defaultdict, int))
if error_detail: if error_detail:
self.error_url_count = defaultdict(partial(defaultdict, list)) self.error_url_count = defaultdict(partial(defaultdict, list))
self.url_apdex = defaultdict(partial(APDEXStats, threshold)) self.url_apdex = defaultdict(partial(APDEXStats, threshold, getDuration))
self.apdex = defaultdict(partial(APDEXStats, threshold)) self.apdex = defaultdict(partial(APDEXStats, threshold, getDuration))
def accumulate(self, match, url_match, date): def accumulate(self, match, url_match, date):
self.apdex[date].accumulate(match) self.apdex[date].accumulate(match)
duration = int(match.group('duration')) duration = self.getDuration(match)
if url_match is None: if url_match is None:
url = match.group('request') url = match.group('request')
else: else:
...@@ -172,7 +174,7 @@ class GenericSiteStats(object): ...@@ -172,7 +174,7 @@ class GenericSiteStats(object):
self.error_url_count[status][url].append(match.group('referer')) self.error_url_count[status][url].append(match.group('referer'))
def getApdexData(self): def getApdexData(self):
apdex = APDEXStats(self.threshold) apdex = APDEXStats(self.threshold, None)
for data in self.apdex.itervalues(): for data in self.apdex.itervalues():
apdex.accumulateFrom(data) apdex.accumulateFrom(data)
return [ return [
...@@ -183,7 +185,7 @@ class GenericSiteStats(object): ...@@ -183,7 +185,7 @@ class GenericSiteStats(object):
def asHTML(self, stat_filter=lambda x: x): def asHTML(self, stat_filter=lambda x: x):
result = [] result = []
append = result.append append = result.append
apdex = APDEXStats(self.threshold) apdex = APDEXStats(self.threshold, None)
for data in self.apdex.itervalues(): for data in self.apdex.itervalues():
apdex.accumulateFrom(data) apdex.accumulateFrom(data)
append('<h2>Overall</h2><table class="stats"><tr>') append('<h2>Overall</h2><table class="stats"><tr>')
...@@ -264,16 +266,16 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -264,16 +266,16 @@ class ERP5SiteStats(GenericSiteStats):
- If a line belongs to a module and has at least 2 slashes after module, - If a line belongs to a module and has at least 2 slashes after module,
count line as belonging to a document of that module count line as belonging to a document of that module
""" """
def __init__(self, threshold, prefix=1, error_detail=False): def __init__(self, threshold, getDuration, prefix=1, error_detail=False):
super(ERP5SiteStats, self).__init__(threshold, prefix=prefix, super(ERP5SiteStats, self).__init__(threshold, getDuration, prefix=prefix,
error_detail=error_detail) error_detail=error_detail)
# Key levels: # Key levels:
# - module id (string) # - module id (string)
# - is document (bool) # - is document (bool)
# - date (datetime.date) # - date (datetime.date)
self.module = defaultdict(partial(defaultdict, partial( self.module = defaultdict(partial(defaultdict, partial(
defaultdict, partial(APDEXStats, threshold)))) defaultdict, partial(APDEXStats, threshold, getDuration))))
self.no_module = defaultdict(partial(APDEXStats, threshold)) self.no_module = defaultdict(partial(APDEXStats, threshold, getDuration))
def accumulate(self, match, url_match, date): def accumulate(self, match, url_match, date):
prefix = self.prefix prefix = self.prefix
...@@ -292,8 +294,8 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -292,8 +294,8 @@ class ERP5SiteStats(GenericSiteStats):
append('<h2>Stats per module</h2><table class="stats"><tr>' append('<h2>Stats per module</h2><table class="stats"><tr>'
'<th rowspan="2" colspan="2">module</th><th colspan="4">overall</th>') '<th rowspan="2" colspan="2">module</th><th colspan="4">overall</th>')
filtered_module = defaultdict(partial(defaultdict, partial( filtered_module = defaultdict(partial(defaultdict, partial(
defaultdict, partial(APDEXStats, self.threshold)))) defaultdict, partial(APDEXStats, self.threshold, None))))
filtered_no_module = defaultdict(partial(APDEXStats, self.threshold)) filtered_no_module = defaultdict(partial(APDEXStats, self.threshold, None))
for date, value in self.no_module.iteritems(): for date, value in self.no_module.iteritems():
filtered_no_module[stat_filter(date)].accumulateFrom(value) filtered_no_module[stat_filter(date)].accumulateFrom(value)
column_set = set(filtered_no_module) column_set = set(filtered_no_module)
...@@ -312,7 +314,7 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -312,7 +314,7 @@ class ERP5SiteStats(GenericSiteStats):
append(APDEX_TABLE_HEADERS) append(APDEX_TABLE_HEADERS)
append('</tr>') append('</tr>')
def apdexAsColumns(data_dict): def apdexAsColumns(data_dict):
data_total = APDEXStats(self.threshold) data_total = APDEXStats(self.threshold, None)
for data in data_dict.values(): for data in data_dict.values():
data_total.accumulateFrom(data) data_total.accumulateFrom(data)
append(getApdexStatsAsHtml(data_total, self.threshold)) append(getApdexStatsAsHtml(data_total, self.threshold))
...@@ -331,6 +333,9 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -331,6 +333,9 @@ class ERP5SiteStats(GenericSiteStats):
append(super(ERP5SiteStats, self).asHTML(stat_filter=stat_filter)) append(super(ERP5SiteStats, self).asHTML(stat_filter=stat_filter))
return '\n'.join(result) return '\n'.join(result)
DURATION_US_FORMAT = '%D'
DURATION_S_FORMAT = '%T'
logformat_dict = { logformat_dict = {
'%h': r'(?P<host>[^ ]*)', '%h': r'(?P<host>[^ ]*)',
'%l': r'(?P<ident>[^ ]*)', '%l': r'(?P<ident>[^ ]*)',
...@@ -341,7 +346,8 @@ logformat_dict = { ...@@ -341,7 +346,8 @@ logformat_dict = {
'%O': r'(?P<size>[0-9-]*?)', '%O': r'(?P<size>[0-9-]*?)',
'%{Referer}i': r'(?P<referer>[^"]*)', # XXX: expected to be enclosed in " '%{Referer}i': r'(?P<referer>[^"]*)', # XXX: expected to be enclosed in "
'%{User-Agent}i': r'(?P<agent>[^"]*)', # XXX: expected to be enclosed in " '%{User-Agent}i': r'(?P<agent>[^"]*)', # XXX: expected to be enclosed in "
'%D': r'(?P<duration>[0-9]*)', DURATION_US_FORMAT: r'(?P<duration>[0-9]*)',
DURATION_S_FORMAT: r'(?P<duration_s>[0-9]*)',
'%%': r'%', '%%': r'%',
# TODO: add more formats # TODO: add more formats
} }
...@@ -441,6 +447,14 @@ def main(): ...@@ -441,6 +447,14 @@ def main():
args = parser.parse_args() args = parser.parse_args()
abs_file_container = getattr(args, 'js', abs_file_container) abs_file_container = getattr(args, 'js', abs_file_container)
if DURATION_US_FORMAT in args.logformat:
getDuration = lambda x: int(x.group('duration'))
elif DURATION_S_FORMAT in args.logformat:
getDuration = lambda x: int(x.group('duration_s')) * US_PER_S
else:
print >> sys.stderr, 'Neither %D nor %T are present in logformat, apdex ' \
'cannot be computed.'
sys.exit(1)
line_regex = '' line_regex = ''
try: try:
n = iter(args.logformat).next n = iter(args.logformat).next
...@@ -473,8 +487,7 @@ def main(): ...@@ -473,8 +487,7 @@ def main():
'specified, nothing to do.' 'specified, nothing to do.'
sys.exit(1) sys.exit(1)
else: else:
default_action = partial(GenericSiteStats, prefix=0, default_action = partial(GenericSiteStats, prefix=0)
error_detail=args.error_detail)
infile_list = args.logfile infile_list = args.logfile
quiet = args.quiet quiet = args.quiet
threshold = args.apdex threshold = args.apdex
...@@ -531,7 +544,7 @@ def main(): ...@@ -531,7 +544,7 @@ def main():
try: try:
site_data = per_site[site] site_data = per_site[site]
except KeyError: except KeyError:
site_data = per_site[site] = action(threshold, site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail) error_detail=error_detail)
site_data.accumulate(match, url_match, date) site_data.accumulate(match, url_match, date)
all_lines += lineno all_lines += lineno
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment