Commit 3cc66082 authored by Vincent Pelletier's avatar Vincent Pelletier

Use a context manager for logfile resource management

parent 466146fb
......@@ -42,6 +42,7 @@
from html import escape
from collections import defaultdict, Counter
from contextlib import nullcontext
from datetime import datetime, timedelta, date, tzinfo
from functools import partial
from operator import itemgetter
......@@ -1520,6 +1521,7 @@ def main():
file=sys.stderr)
if filename == '-':
logfile = sys.stdin
logfile_context = nullcontext()
else:
for opener, exc in FILE_OPENER_LIST:
logfile = opener(filename, 'rt', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
......@@ -1531,91 +1533,98 @@ def main():
logfile.seek(0)
break
else:
logfile = open(filename, 'r', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
print(lineno, end='\r', file=sys.stderr)
match = matchline(line)
if match is None:
match = expensive_matchline(line)
logfile = open( # pylint: disable=consider-using-with
filename,
'r',
encoding=INPUT_ENCODING,
errors=INPUT_ENCODING_ERROR_HANDLER,
)
logfile_context = logfile
with logfile_context:
lineno = 0
for lineno, line in enumerate(logfile, 1):
if show_progress and lineno % 5000 == 0:
print(lineno, end='\r', file=sys.stderr)
match = matchline(line)
if match is None:
if not quiet:
print(f'Malformed line at {filename}:{lineno}: {line}',
file=sys.stderr)
malformed_lines += 1
match = expensive_matchline(line)
if match is None:
if not quiet:
print(f'Malformed line at {filename}:{lineno}: {line}',
file=sys.stderr)
malformed_lines += 1
continue
agent = match.group('agent')
if any(x(agent) for x in skip_user_agent):
skipped_user_agent += 1
continue
agent = match.group('agent')
if any(x(agent) for x in skip_user_agent):
skipped_user_agent += 1
continue
url_match = matchrequest(match.group('request'))
if url_match is None:
no_url_lines += 1
continue
url = url_match.group('url')
if url.startswith('http'):
url = splithost(splittype(url)[1])[1]
url = get_url_prefix(match, url)
for site, prefix_match, action in site_list:
if prefix_match(url) is not None:
break
else:
site = None
action = default_action
if action is None:
skipped_lines += 1
continue
hit_date = asDate(matchToDateTime(match))
if to_next_period is not None:
if latest_date is None or latest_date < hit_date:
latest_date = hit_date
if earliest_date is None or hit_date < earliest_date:
earliest_date = hit_date
next_period = getNextPeriod()
try:
while latest_date > next_period:
period, to_next_period = next_period_data()
url_match = matchrequest(match.group('request'))
if url_match is None:
no_url_lines += 1
continue
url = url_match.group('url')
if url.startswith('http'):
url = splithost(splittype(url)[1])[1]
url = get_url_prefix(match, url)
for site, prefix_match, action in site_list:
if prefix_match(url) is not None:
break
else:
site = None
action = default_action
if action is None:
skipped_lines += 1
continue
hit_date = asDate(matchToDateTime(match))
if to_next_period is not None:
if latest_date is None or latest_date < hit_date:
latest_date = hit_date
if earliest_date is None or hit_date < earliest_date:
earliest_date = hit_date
next_period = getNextPeriod()
except StopIteration:
to_next_period = None
if original_period != period:
original_period = period
if show_progress:
print(f'Increasing period to {period}...', end='',
file=sys.stderr)
old_date_format = date_format
(
asDate,
decimator,
graph_period,
date_format,
placeholder_delta,
round_date,
graph_coefficient,
) = period_parser[period]
latest_date = rescale(latest_date)
earliest_date = rescale(earliest_date)
period_increase_start = time.time()
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print(f'done ({timedelta(seconds=time.time() - period_increase_start)})',
try:
while latest_date > next_period:
period, to_next_period = next_period_data()
next_period = getNextPeriod()
except StopIteration:
to_next_period = None
if original_period != period:
original_period = period
if show_progress:
print(f'Increasing period to {period}...', end='',
file=sys.stderr)
old_date_format = date_format
(
asDate,
decimator,
graph_period,
date_format,
placeholder_delta,
round_date,
graph_coefficient,
) = period_parser[period]
latest_date = rescale(latest_date)
earliest_date = rescale(earliest_date)
period_increase_start = time.time()
for site_data in per_site.values():
site_data.rescale(rescale, getDuration)
if show_progress:
print(f'done ({timedelta(seconds=time.time() - period_increase_start)})',
file=sys.stderr)
hit_date = asDate(matchToDateTime(match))
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail, user_agent_detail=user_agent_detail,
erp5_expand_other=erp5_expand_other)
try:
site_data.accumulate(match, url_match, hit_date)
except Exception: # pylint: disable=broad-exception-caught
if not quiet:
print(f'Error analysing line at {filename}:{lineno}: {line!r}',
file=sys.stderr)
hit_date = asDate(matchToDateTime(match))
try:
site_data = per_site[site]
except KeyError:
site_data = per_site[site] = action(threshold, getDuration,
error_detail=error_detail, user_agent_detail=user_agent_detail,
erp5_expand_other=erp5_expand_other)
try:
site_data.accumulate(match, url_match, hit_date)
except Exception: # pylint: disable=broad-exception-caught
if not quiet:
print(f'Error analysing line at {filename}:{lineno}: {line!r}',
file=sys.stderr)
traceback.print_exc(file=sys.stderr)
traceback.print_exc(file=sys.stderr)
all_lines += lineno
if show_progress:
print(lineno, file=sys.stderr)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment