From 4aaadb960bbec2fcec00eb7aa59ee48b40fd48fa Mon Sep 17 00:00:00 2001
From: Tristan Cavelier <tristan.cavelier@tiolive.com>
Date: Tue, 17 Jun 2014 13:42:07 +0000
Subject: [PATCH] erp5 cluster: add parameters to check cache hit script +
 better output

---
 stack/erp5/buildout.cfg                       |  4 +-
 stack/erp5/instance-http-monitor.cfg.in       | 12 ++++
 .../monitor-check-cache-hit.in                | 57 +++++++------------
 3 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/stack/erp5/buildout.cfg b/stack/erp5/buildout.cfg
index 91a654ea8..a7d2c1860 100644
--- a/stack/erp5/buildout.cfg
+++ b/stack/erp5/buildout.cfg
@@ -180,7 +180,7 @@ context =
 recipe = slapos.recipe.template
 filename = instance-http-monitor.cfg.in
 url = ${:_profile_base_location_}/${:filename}
-md5sum = 48037c15a0140c1e094049183a29f34e
+md5sum = f1d5fa0e3f0b5f42cc87119c427d20a6
 output = ${buildout:directory}/template-http-monitor.cfg.in
 
 ##################e
@@ -191,7 +191,7 @@ output = ${buildout:directory}/template-http-monitor.cfg.in
 < = download-base
 url = ${:_profile_base_location_}/monitor-templates/monitor-check-cache-hit.in
 download-only = true
-md5sum = 4703a0f64c72da35b897d020e022d1b1
+md5sum = 3362bea561597cd29d10d754c251c8bf
 filename = monitor-check-cache-hit.in
 mode = 0644
 
diff --git a/stack/erp5/instance-http-monitor.cfg.in b/stack/erp5/instance-http-monitor.cfg.in
index f1dd41d00..e49b93670 100644
--- a/stack/erp5/instance-http-monitor.cfg.in
+++ b/stack/erp5/instance-http-monitor.cfg.in
@@ -30,6 +30,18 @@ template = ${template-monitor-check-cache-hit:location}/${template-monitor-check
 rendered = $${monitor-directory:monitor-custom-scripts}/check-cache-hit.py
 mode = 700
 context =
+  key url_list zero-parameters:url_list
+  key resolve_list zero-parameters:resolve_list
   raw python_executable ${buildout:bin-directory}/python2.7
 
+[public]
+recipe = slapos.cookbook:zero-knowledge.write
+filename = knowledge0.cfg
+url_list = ['http://www.erp5.com/']
+resolve_list = ['www.erp5.com:80:5.135.149.226']
+
+[zero-parameters]
+recipe = slapos.cookbook:zero-knowledge.read
+filename = $${public:filename}
+
 {% endif %}
diff --git a/stack/erp5/monitor-templates/monitor-check-cache-hit.in b/stack/erp5/monitor-templates/monitor-check-cache-hit.in
index b5fa579a2..ca6c6f710 100644
--- a/stack/erp5/monitor-templates/monitor-check-cache-hit.in
+++ b/stack/erp5/monitor-templates/monitor-check-cache-hit.in
@@ -19,7 +19,9 @@ def log(*args):
 def info(*args):
   sys.stdout.write("INFO : " + "\n     : ".join(" ".join((str(arg) for arg in args)).split("\n")) + "\n")
   pass
+last_warn_log = None
 def warn(*args):
+  global last_warn_log
   last_warn_log = "WARN : " + "\n     : ".join(" ".join((str(arg) for arg in args)).split("\n")) + "\n"
   sys.stderr.write(last_warn_log)
 last_error_log = None
@@ -29,6 +31,9 @@ def error(*args):
   sys.stderr.write(last_error_log)
 
 class MyHTMLParser(HTMLParser):
+  def __init__(self, base_url):
+    HTMLParser.__init__(self)
+    self.base_url = base_url
   def handle_starttag(self, tag, attrs):
     if tag == 'img' or tag == 'script': # TODO: CSS & JS
       debug(tag, attrs)
@@ -36,7 +41,7 @@ class MyHTMLParser(HTMLParser):
         if attr[0] == 'src':
           url = attr[1]
           if not url.startswith('http'):
-            url = base + url
+            url = self.base_url + url
           do_request(url)
 
 headers = {
@@ -47,23 +52,24 @@ headers = {
  #   "Connection": "keep-alive"
 }
 
-base = "http://www.erp5.com/"
+url_list = {{ url_list }}
+# url_list = ["http://www.erp5.com"]
+resolve_list = {{ resolve_list }}
+# resolve_list = ["www.erp5.com:80:5.135.149.226"]
 
-vary_dict = dict()
-hit_dict = dict()
-status_dict = dict()
-headers_dict = dict()
+parsed_url_dict = {}
 
 info("Start checking for cache hits")
 
 def do_request(url):
-  if url in hit_dict: return
+  if parsed_url_dict.get("url") is not None: return
+  parsed_url_dict[url] = True
   log("Checking cache hit for", url)
   c = pycurl.Curl()
   response_headers = StringIO()
   output = StringIO()
   c.setopt(c.URL, url)
-  c.setopt(c.RESOLVE, ["www.erp5.com:80:5.135.149.226"])
+  c.setopt(c.RESOLVE, resolve_list)
   c.setopt(c.WRITEFUNCTION, output.write)
   c.setopt(c.HEADERFUNCTION, response_headers.write)
   c.perform() # perform a request before testing if the cache is hit
@@ -77,41 +83,20 @@ def do_request(url):
   m = Message(response_headers)
 
   # see http://labs.omniti.com/people/mark/ats_sa/slides.html#slide-18
-  hit_dict[url] = any("[cHs" in header or "[cSs" in header for header in m.getheaders('via'))
-  vary_dict[url] = m.getheader('vary')
-  status_dict[url] = status
-  headers_dict[url] = response_headers.getvalue()
-  if not hit_dict[url]:
-    error("No cache hit found in", m.getheaders('via'))
+  if any("[cHs" in header or "[cSs" in header for header in m.getheaders('via')) or \
+     any("HIT" in header for header in m.getheaders("x-cache")):
+    debug("Cache hit found in 'Via' or 'X-Cache' headers")
   else:
-    debug("Cache hit found in", m.getheaders('via'))
+    error("No cache hit found in 'Via' or 'X-Cache' headers\n" + response_headers.getvalue().rstrip())
 
   if m.getheader('content-type', '').startswith('text/html'):
-    MyHTMLParser().feed(output.getvalue())
+    MyHTMLParser(url).feed(output.getvalue())
 
   response_headers.close()
   output.close()
 
-do_request(base)
-
-from pprint import pformat
-
-debug('--------------------------------------------------')
-debug('hit_dict')
-debug(pformat(hit_dict))
-debug('--------------------------------------------------')
-debug('vary_dict')
-debug(pformat(vary_dict))
-debug('--------------------------------------------------')
-debug('status_dict')
-debug(pformat(status_dict))
-debug('--------------------------------------------------')
-
-for url, hit in hit_dict.items():
-  if not hit:
-    log(url)
-    log(headers_dict[url])
-    log()
+for url in url_list:
+  do_request(url)
 
 if last_error_log is not None:
   sys.exit(1)
-- 
2.30.9