From 1106f02e192976315abe7ac02223cba83e660773 Mon Sep 17 00:00:00 2001
From: Tatuya Kamada <tatuya@nexedi.com>
Date: Tue, 31 Jan 2012 15:32:48 +0900
Subject: [PATCH] Fix the problem that web checking result can be duplicated.

---
 erp5/util/webchecker/__init__.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/erp5/util/webchecker/__init__.py b/erp5/util/webchecker/__init__.py
index 335354f064..c9b8e0afc0 100644
--- a/erp5/util/webchecker/__init__.py
+++ b/erp5/util/webchecker/__init__.py
@@ -363,6 +363,25 @@ class HTTPCacheCheckerTestSuite(object):
                                     (header, read_value, reference_value)
           self.report_dict.setdefault(url, []).append(message)
 
+  def _isSameUrl(self, url):
+    """
+      Return whether the url is already checked or not.
+
+      Example case):
+      http://example.com/login_form
+      http://example.com/login_form/
+    """
+    if url in (None, ''):
+      return False
+    same_url = None
+    if url.endswith('/'):
+      same_url = url.rstrip('/')
+    else:
+      same_url = '%s/' % url
+    if same_url in self.report_dict:
+      return True
+    return False
+
   def _parseWgetLogs(self, wget_log_file, discarded_url_list=_MARKER,
                      prohibited_file_name_list=None,
                      prohibited_folder_name_list=None):
@@ -386,6 +405,8 @@ class HTTPCacheCheckerTestSuite(object):
           # URL already checked during first pass
           logging.debug('%r Discarded' % url)
           discarded = True
+        elif self._isSameUrl(url):
+          discarded = True
       if discarded:
         # keep reading wget process without doing anything
         continue
-- 
2.30.9