Use a context manager for logfile resource management

3cc66082 · Vincent Pelletier · 466146fb · 3cc66082
Commit 3cc66082 authored Dec 27, 2023 by Vincent Pelletier
Hide whitespace changes
Inline Side-by-side

Showing with 90 additions and 81 deletions

apachedex/__init__.py apachedex/__init__.py +90 -81

No files found.
--- a/apachedex/__init__.py
+++ b/apachedex/__init__.py
@@ -42,6 +42,7 @@
 from html import escape
 from collections import defaultdict, Counter
+from contextlib import nullcontext
 from datetime import datetime, timedelta, date, tzinfo
 from functools import partial
 from operator import itemgetter
@@ -1520,6 +1521,7 @@ def main():
        file=sys.stderr)
    if filename == '-':
      logfile = sys.stdin
+      logfile_context = nullcontext()
    else:
      for opener, exc in FILE_OPENER_LIST:
        logfile = opener(filename, 'rt', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
@@ -1531,91 +1533,98 @@ def main():
          logfile.seek(0)
          break
      else:
-        logfile = open(filename, 'r', encoding=INPUT_ENCODING, errors=INPUT_ENCODING_ERROR_HANDLER)
+        logfile = open( # pylint: disable=consider-using-with
-    lineno = 0
+          filename,
-    for lineno, line in enumerate(logfile, 1):
+          'r',
-      if show_progress and lineno % 5000 == 0:
+          encoding=INPUT_ENCODING,
-        print(lineno, end='\r', file=sys.stderr)
+          errors=INPUT_ENCODING_ERROR_HANDLER,
-      match = matchline(line)
+        )
-      if match is None:
+      logfile_context = logfile
-        match = expensive_matchline(line)
+    with logfile_context:
+      lineno = 0
+      for lineno, line in enumerate(logfile, 1):
+        if show_progress and lineno % 5000 == 0:
+          print(lineno, end='\r', file=sys.stderr)
+        match = matchline(line)
        if match is None:
-          if not quiet:
+          match = expensive_matchline(line)
-            print(f'Malformed line at {filename}:{lineno}: {line}',
+          if match is None:
-              file=sys.stderr)
+            if not quiet:
-          malformed_lines += 1
+              print(f'Malformed line at {filename}:{lineno}: {line}',
+                file=sys.stderr)
+            malformed_lines += 1
+            continue
+        agent = match.group('agent')
+        if any(x(agent) for x in skip_user_agent):
+          skipped_user_agent += 1
          continue
-      agent = match.group('agent')
+        url_match = matchrequest(match.group('request'))
-      if any(x(agent) for x in skip_user_agent):
+        if url_match is None:
-        skipped_user_agent += 1
+          no_url_lines += 1
-        continue
+          continue
-      url_match = matchrequest(match.group('request'))
+        url = url_match.group('url')
-      if url_match is None:
+        if url.startswith('http'):
-        no_url_lines += 1
+          url = splithost(splittype(url)[1])[1]
-        continue
+        url = get_url_prefix(match, url)
-      url = url_match.group('url')
+        for site, prefix_match, action in site_list:
-      if url.startswith('http'):
+          if prefix_match(url) is not None:
-        url = splithost(splittype(url)[1])[1]
+            break
-      url = get_url_prefix(match, url)
+        else:
-      for site, prefix_match, action in site_list:
+          site = None
-        if prefix_match(url) is not None:
+          action = default_action
-          break
+        if action is None:
-      else:
+          skipped_lines += 1
-        site = None
+          continue
-        action = default_action
+        hit_date = asDate(matchToDateTime(match))
-      if action is None:
+        if to_next_period is not None:
-        skipped_lines += 1
+          if latest_date is None or latest_date < hit_date:
-        continue
+            latest_date = hit_date
-      hit_date = asDate(matchToDateTime(match))
+          if earliest_date is None or hit_date < earliest_date:
-      if to_next_period is not None:
+            earliest_date = hit_date
-        if latest_date is None or latest_date < hit_date:
-          latest_date = hit_date
-        if earliest_date is None or hit_date < earliest_date:
-          earliest_date = hit_date
-          next_period = getNextPeriod()
-        try:
-          while latest_date > next_period:
-            period, to_next_period = next_period_data()
            next_period = getNextPeriod()
-        except StopIteration:
+          try:
-          to_next_period = None
+            while latest_date > next_period:
-        if original_period != period:
+              period, to_next_period = next_period_data()
-          original_period = period
+              next_period = getNextPeriod()
-          if show_progress:
+          except StopIteration:
-            print(f'Increasing period to {period}...', end='',
+            to_next_period = None
-              file=sys.stderr)
+          if original_period != period:
-          old_date_format = date_format
+            original_period = period
-          (
+            if show_progress:
-            asDate,
+              print(f'Increasing period to {period}...', end='',
-            decimator,
+                file=sys.stderr)
-            graph_period,
+            old_date_format = date_format
-            date_format,
+            (
-            placeholder_delta,
+              asDate,
-            round_date,
+              decimator,
-            graph_coefficient,
+              graph_period,
-          ) = period_parser[period]
+              date_format,
-          latest_date = rescale(latest_date)
+              placeholder_delta,
-          earliest_date = rescale(earliest_date)
+              round_date,
-          period_increase_start = time.time()
+              graph_coefficient,
-          for site_data in per_site.values():
+            ) = period_parser[period]
-            site_data.rescale(rescale, getDuration)
+            latest_date = rescale(latest_date)
-          if show_progress:
+            earliest_date = rescale(earliest_date)
-            print(f'done ({timedelta(seconds=time.time() - period_increase_start)})',
+            period_increase_start = time.time()
+            for site_data in per_site.values():
+              site_data.rescale(rescale, getDuration)
+            if show_progress:
+              print(f'done ({timedelta(seconds=time.time() - period_increase_start)})',
+                file=sys.stderr)
+            hit_date = asDate(matchToDateTime(match))
+        try:
+          site_data = per_site[site]
+        except KeyError:
+          site_data = per_site[site] = action(threshold, getDuration,
+            error_detail=error_detail, user_agent_detail=user_agent_detail,
+            erp5_expand_other=erp5_expand_other)
+        try:
+          site_data.accumulate(match, url_match, hit_date)
+        except Exception: # pylint: disable=broad-exception-caught
+          if not quiet:
+            print(f'Error analysing line at {filename}:{lineno}: {line!r}',
              file=sys.stderr)
-          hit_date = asDate(matchToDateTime(match))
+            traceback.print_exc(file=sys.stderr)
-      try:
-        site_data = per_site[site]
-      except KeyError:
-        site_data = per_site[site] = action(threshold, getDuration,
-          error_detail=error_detail, user_agent_detail=user_agent_detail,
-          erp5_expand_other=erp5_expand_other)
-      try:
-        site_data.accumulate(match, url_match, hit_date)
-      except Exception: # pylint: disable=broad-exception-caught
-        if not quiet:
-          print(f'Error analysing line at {filename}:{lineno}: {line!r}',
-            file=sys.stderr)
-          traceback.print_exc(file=sys.stderr)
    all_lines += lineno
    if show_progress:
      print(lineno, file=sys.stderr)