Commit 3cc66082 authored by Vincent Pelletier's avatar Vincent Pelletier

Use a context manager for logfile resource management

parent 466146fb
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
from html import escape from html import escape
from collections import defaultdict, Counter from collections import defaultdict, Counter
from contextlib import nullcontext
from datetime import datetime, timedelta, date, tzinfo from datetime import datetime, timedelta, date, tzinfo
from functools import partial from functools import partial
from operator import itemgetter from operator import itemgetter
...@@ -1520,6 +1521,7 @@ def main(): ...@@ -1520,6 +1521,7 @@ def main():
file=sys.stderr) file=sys.stderr)
if filename == '-': if filename == '-':
logfile = sys.stdin logfile = sys.stdin
logfile_context = nullcontext()
else: else:
for opener, exc in FILE_OPENER_LIST: for opener, exc in FILE_OPENER_LIST:
logfile = opener(filename, 'rt', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER) logfile = opener(filename, 'rt', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
...@@ -1531,91 +1533,98 @@ def main(): ...@@ -1531,91 +1533,98 @@ def main():
logfile.seek(0) logfile.seek(0)
break break
else: else:
logfile = open(filename, 'r', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER) logfile = open( # pylint: disable=consider-using-with
lineno = 0 filename,
for lineno, line in enumerate(logfile, 1): 'r',
if show_progress and lineno % 5000 == 0: encoding=INPUT_ENCODING,
print(lineno, end='\r', file=sys.stderr) errors=INPUT_ENCODING_ERROR_HANDLER,
match = matchline(line) )
if match is None: logfile_context = logfile
match = expensive_matchline(line) with logfile_context:
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
print(lineno, end='\r', file=sys.stderr)
match = matchline(line)
if match is None: if match is None:
if not quiet: match = expensive_matchline(line)
print(f'Malformed line at {filename}:{lineno}: {line}', if match is None:
file=sys.stderr) if not quiet:
malformed_lines += 1 print(f'Malformed line at {filename}:{lineno}: {line}',
file=sys.stderr)
malformed_lines += 1
continue
agent = match.group('agent')
if any(x(agent) for x in skip_user_agent):
skipped_user_agent += 1
continue continue
agent = match.group('agent') url_match = matchrequest(match.group('request'))
if any(x(agent) for x in skip_user_agent): if url_match is None:
skipped_user_agent += 1 no_url_lines += 1
continue continue
url_match = matchrequest(match.group('request')) url = url_match.group('url')
if url_match is None: if url.startswith('http'):
no_url_lines += 1 url = splithost(splittype(url)[1])[1]
continue url = get_url_prefix(match, url)
url = url_match.group('url') for site, prefix_match, action in site_list:
if url.startswith('http'): if prefix_match(url) is not None:
url = splithost(splittype(url)[1])[1] break
url = get_url_prefix(match, url) else:
for site, prefix_match, action in site_list: site = None
if prefix_match(url) is not None: action = default_action
break if action is None:
else: skipped_lines += 1
site = None continue
action = default_action hit_date = asDate(matchToDateTime(match))
if action is None: if to_next_period is not None:
skipped_lines += 1 if latest_date is None or latest_date < hit_date:
continue latest_date = hit_date
hit_date = asDate(matchToDateTime(match)) if earliest_date is None or hit_date < earliest_date:
if to_next_period is not None: earliest_date = hit_date
if latest_date is None or latest_date < hit_date:
latest_date = hit_date
if earliest_date is None or hit_date < earliest_date:
earliest_date = hit_date
next_period = getNextPeriod()
try:
while latest_date > next_period:
period, to_next_period = next_period_data()
next_period = getNextPeriod() next_period = getNextPeriod()
except StopIteration: try:
to_next_period = None while latest_date > next_period:
if original_period != period: period, to_next_period = next_period_data()
original_period = period next_period = getNextPeriod()
if show_progress: except StopIteration:
print(f'Increasing period to {period}...', end='', to_next_period = None
file=sys.stderr) if original_period != period:
old_date_format = date_format original_period = period
( if show_progress:
asDate, print(f'Increasing period to {period}...', end='',
decimator, file=sys.stderr)
graph_period, old_date_format = date_format
date_format, (
placeholder_delta, asDate,
round_date, decimator,
graph_coefficient, graph_period,
) = period_parser[period] date_format,
latest_date = rescale(latest_date) placeholder_delta,
earliest_date = rescale(earliest_date) round_date,
period_increase_start = time.time() graph_coefficient,
for site_data in per_site.values(): ) = period_parser[period]
site_data.rescale(rescale, getDuration) latest_date = rescale(latest_date)
if show_progress: earliest_date = rescale(earliest_date)
print(f'done ({timedelta(seconds=time.time() - period_increase_start)})', period_increase_start = time.time()
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print(f'done ({timedelta(seconds=time.time() - period_increase_start)})',
file=sys.stderr)
hit_date = asDate(matchToDateTime(match))
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail, user_agent_detail=user_agent_detail,
erp5_expand_other=erp5_expand_other)
try:
site_data.accumulate(match, url_match, hit_date)
except Exception: # pylint: disable=broad-exception-caught
if not quiet:
print(f'Error analysing line at {filename}:{lineno}: {line!r}',
file=sys.stderr) file=sys.stderr)
hit_date = asDate(matchToDateTime(match)) traceback.print_exc(file=sys.stderr)
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail, user_agent_detail=user_agent_detail,
erp5_expand_other=erp5_expand_other)
try:
site_data.accumulate(match, url_match, hit_date)
except Exception: # pylint: disable=broad-exception-caught
if not quiet:
print(f'Error analysing line at {filename}:{lineno}: {line!r}',
file=sys.stderr)
traceback.print_exc(file=sys.stderr)
all_lines += lineno all_lines += lineno
if show_progress: if show_progress:
print(lineno, file=sys.stderr) print(lineno, file=sys.stderr)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment