Commit c8ace6fb authored by Vincent Pelletier's avatar Vincent Pelletier

Document support for non-ASCII urls.

Also, display them decoded in report.
parent 58e82c25
...@@ -31,8 +31,9 @@ from collections import defaultdict, Counter ...@@ -31,8 +31,9 @@ from collections import defaultdict, Counter
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import partial from functools import partial
from operator import itemgetter from operator import itemgetter
from urllib import splittype, splithost from urllib import splittype, splithost, unquote
import argparse import argparse
import codecs
import gzip import gzip
import httplib import httplib
import itertools import itertools
...@@ -282,7 +283,7 @@ class GenericSiteStats(object): ...@@ -282,7 +283,7 @@ class GenericSiteStats(object):
def getApdexData(self): def getApdexData(self):
return getDataPoints(self.apdex) return getDataPoints(self.apdex)
def asHTML(self, date_format, placeholder_delta, graph_period, def asHTML(self, date_format, placeholder_delta, graph_period, encoding,
stat_filter=lambda x: x): stat_filter=lambda x: x):
result = [] result = []
append = result.append append = result.append
...@@ -300,7 +301,7 @@ class GenericSiteStats(object): ...@@ -300,7 +301,7 @@ class GenericSiteStats(object):
reverse=True)[:N_SLOWEST]: reverse=True)[:N_SLOWEST]:
append('<tr>') append('<tr>')
append(data.asHTML(self.threshold)) append(data.asHTML(self.threshold))
append('<td class="text">%s</td></tr>' % escape(url)) append('<td class="text">%s</td></tr>' % unquoteToHtml(url, encoding))
append('</table>') append('</table>')
column_set = set() column_set = set()
filtered_status = defaultdict(partial(defaultdict, int)) filtered_status = defaultdict(partial(defaultdict, int))
...@@ -357,9 +358,12 @@ class GenericSiteStats(object): ...@@ -357,9 +358,12 @@ class GenericSiteStats(object):
append('<td>%s</td><td class="text">%s</td>' append('<td>%s</td><td class="text">%s</td>'
'<td class="text">%s</td>' % ( '<td class="text">%s</td>' % (
getHitForUrl(referer_counter), getHitForUrl(referer_counter),
escape(url), unquoteToHtml(url, encoding),
'<br/>'.join('%i: %s' % (hit, escape(referer)) for referer, hit in sorted( '<br/>'.join('%i: %s' % (hit, unquoteToHtml(referer, encoding))
referer_counter.iteritems(), key=ITEMGETTER1, reverse=True for referer, hit in sorted(
referer_counter.iteritems(),
key=ITEMGETTER1,
reverse=True,
)[:N_REFERRER_PER_ERROR_URL]), )[:N_REFERRER_PER_ERROR_URL]),
)) ))
append('</tr>') append('</tr>')
...@@ -424,7 +428,7 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -424,7 +428,7 @@ class ERP5SiteStats(GenericSiteStats):
return sorted(((date, apdex, hit) return sorted(((date, apdex, hit)
for date, (apdex, hit) in date_dict.iteritems()), key=ITEMGETTER0) for date, (apdex, hit) in date_dict.iteritems()), key=ITEMGETTER0)
def asHTML(self, date_format, placeholder_delta, graph_period, def asHTML(self, date_format, placeholder_delta, graph_period, encoding,
stat_filter=lambda x: x): stat_filter=lambda x: x):
result = [] result = []
append = result.append append = result.append
...@@ -490,7 +494,7 @@ class ERP5SiteStats(GenericSiteStats): ...@@ -490,7 +494,7 @@ class ERP5SiteStats(GenericSiteStats):
apdexAsColumns(filtered_no_module) apdexAsColumns(filtered_no_module)
append('</tr></table>') append('</tr></table>')
append(super(ERP5SiteStats, self).asHTML(date_format, append(super(ERP5SiteStats, self).asHTML(date_format,
placeholder_delta, graph_period, stat_filter=stat_filter)) placeholder_delta, graph_period, encoding, stat_filter=stat_filter))
return '\n'.join(result) return '\n'.join(result)
DURATION_US_FORMAT = '%D' DURATION_US_FORMAT = '%D'
...@@ -634,14 +638,17 @@ period_parser = { ...@@ -634,14 +638,17 @@ period_parser = {
), ),
} }
def asHTML(out, per_site, args, default_site, period_parameter_dict, stats): unquoteToHtml = lambda x, encoding: escape(unquote(x).decode(encoding))
def asHTML(out, encoding, per_site, args, default_site, period_parameter_dict,
stats):
period = period_parameter_dict['period'] period = period_parameter_dict['period']
decimator = period_parameter_dict['decimator'] decimator = period_parameter_dict['decimator']
date_format = period_parameter_dict['date_format'] date_format = period_parameter_dict['date_format']
placeholder_delta = period_parameter_dict['placeholder_delta'] placeholder_delta = period_parameter_dict['placeholder_delta']
graph_period = period_parameter_dict['graph_period'] graph_period = period_parameter_dict['graph_period']
out.write('<!DOCTYPE html>\n<html><head><meta charset="utf-8">' out.write('<!DOCTYPE html>\n<html><head><meta charset="%s">'
'<title>Stats</title>') '<title>Stats</title>' % encoding)
js_embed = getattr(args, 'js_embed', True) js_embed = getattr(args, 'js_embed', True)
js_path = getattr(args, 'js', None) js_path = getattr(args, 'js', None)
if js_embed: if js_embed:
...@@ -680,7 +687,7 @@ def asHTML(out, per_site, args, default_site, period_parameter_dict, stats): ...@@ -680,7 +687,7 @@ def asHTML(out, per_site, args, default_site, period_parameter_dict, stats):
for site_id, data in per_site.iteritems(): for site_id, data in per_site.iteritems():
if site_id is None: if site_id is None:
site_id = default_site site_id = default_site
out.write('<h1>Site: %s</h1>' % site_id) out.write('<h1>Site: %s</h1>' % unquoteToHtml(site_id, encoding))
out.write( out.write(
graphPair( graphPair(
prepareDataForGraph( prepareDataForGraph(
...@@ -693,7 +700,7 @@ def asHTML(out, per_site, args, default_site, period_parameter_dict, stats): ...@@ -693,7 +700,7 @@ def asHTML(out, per_site, args, default_site, period_parameter_dict, stats):
) )
) )
out.write(data.asHTML(date_format, placeholder_delta, graph_period, out.write(data.asHTML(date_format, placeholder_delta, graph_period,
decimator)) encoding, decimator))
end_stat_time = time.time() end_stat_time = time.time()
if args.stats: if args.stats:
out.write('<h1>Parsing stats</h1><table class="stats">') out.write('<h1>Parsing stats</h1><table class="stats">')
...@@ -726,7 +733,7 @@ def asHTML(out, per_site, args, default_site, period_parameter_dict, stats): ...@@ -726,7 +733,7 @@ def asHTML(out, per_site, args, default_site, period_parameter_dict, stats):
out.write('</body></html>') out.write('</body></html>')
format_generator = { format_generator = {
'html': asHTML, 'html': (asHTML, 'utf-8'),
} }
def main(): def main():
...@@ -770,7 +777,10 @@ def main(): ...@@ -770,7 +777,10 @@ def main():
group = parser.add_argument_group('site matching', 'Earlier arguments take ' group = parser.add_argument_group('site matching', 'Earlier arguments take '
'precedence. For example: --skip-base "/foo/bar(/|$|\\?)" ' 'precedence. For example: --skip-base "/foo/bar(/|$|\\?)" '
'--base "/foo(/|$|\\?)" generates stats for /foo, excluding /foo/bar. ' '--base "/foo(/|$|\\?)" generates stats for /foo, excluding /foo/bar. '
'Arguments (except for -d/--default) are interpreted as Python regexes.') 'Arguments (except for -d/--default) are interpreted as Python regexes. '
'Literal values are expected urlencoded. For example: '
'--base "/%E6%96%87%E5%AD%97%E5%8C%96%E3%81%91(/|$|\\?)" matches '
'"/\xe6\x96\x87\xe5\xad\x97\xe5\x8c\x96\xe3\x81\x91" ("mojibake").')
group.add_argument('-d', '--default', group.add_argument('-d', '--default',
help='Caption for lines matching no prefix, or skip them if not provided.') help='Caption for lines matching no prefix, or skip them if not provided.')
group.add_argument('--base', dest='path', default=[], nargs='+', group.add_argument('--base', dest='path', default=[], nargs='+',
...@@ -936,12 +946,13 @@ def main(): ...@@ -936,12 +946,13 @@ def main():
all_lines += lineno all_lines += lineno
sys.stderr.write('%i\n' % lineno) sys.stderr.write('%i\n' % lineno)
end_parsing_time = time.time() end_parsing_time = time.time()
generator, out_encoding = format_generator[args.format]
if args.out == '-': if args.out == '-':
out = sys.stdout out = sys.stdout
else: else:
out = open(args.out, 'w') out = codecs.open(args.out, 'w', encoding=out_encoding)
with out: with out:
format_generator[args.format](out, per_site, args, default_site, { generator(out, out_encoding, per_site, args, default_site, {
'period': period, 'period': period,
'decimator': decimator, 'decimator': decimator,
'date_format': date_format, 'date_format': date_format,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment