From 1ed2b68202a2e79db45f669767959f76b84f1a6c Mon Sep 17 00:00:00 2001
From: Tatuya Kamada <tatuya@nexedi.com>
Date: Thu, 19 Apr 2012 17:33:35 +0900
Subject: [PATCH] Introduce the best practices from erp5 public web sites
 experience.

---
 .../generic_varnish/template/default.vcl.in   | 247 +++++++++++-------
 1 file changed, 155 insertions(+), 92 deletions(-)

diff --git a/slapos/recipe/generic_varnish/template/default.vcl.in b/slapos/recipe/generic_varnish/template/default.vcl.in
index 0a792e083..220bc446c 100644
--- a/slapos/recipe/generic_varnish/template/default.vcl.in
+++ b/slapos/recipe/generic_varnish/template/default.vcl.in
@@ -5,120 +5,183 @@
 #server.
 #
 backend default {
-  .host = "%(backend_server)s";
-  .port = "%(backend_port)s";
-  .probe = {
-           .timeout = 30s;
-           .interval = 5s;
-           .window = 4;
-           .threshold = 3;
-           .request =
-             "OPTIONS /erp5/getId HTTP/1.1"
-             "Host: %(backend_server)s:%(backend_port)s"
-             "Accept-Encoding: identity"
-             "Connection: close"
-             "User-Agent: Varnish";
-  }
+    .host = "%(backend_server)s";
+    .port = "%(backend_port)s";
+    .probe = {
+        .timeout = 30s;
+        .interval = 5s;
+        .window = 4;
+        .threshold = 3;
+        .request =
+            "OPTIONS /erp5/getId HTTP/1.1"
+            "Host: %(backend_server)s:%(backend_port)s"
+            "Accept-Encoding: identity"
+            "Connection: close"
+            "User-Agent: Varnish";
+    }
 }
-#
-#Below is a commented-out copy of the default VCL logic.  If you
-#redefine any of these subroutines, the built-in logic will be
-#appended to your code.
-#
 
+# Called at the beginning of a request, after the complete request has been received and parsed
 sub vcl_recv {
-  if (req.request != "GET" &&
-    req.request != "HEAD" &&
-    req.request != "PUT" &&
-    req.request != "POST" &&
-    req.request != "TRACE" &&
-    req.request != "OPTIONS" &&
-    req.request != "PURGE" &&
-    req.request != "DELETE") {
-      /* Non-RFC2616 or CONNECT which is weird. */
-      return(pipe);
-  }
-  if (req.request != "GET" && req.request != "HEAD" && req.request != "PURGE") {
-      /* We only deal with GET and HEAD by default */
-      return(pass);
-  }
-  remove req.http.Cookie;
-  remove req.http.Set-Cookie;
-  if (req.http.Accept-Encoding) {
-    if (req.http.Accept-Encoding ~ "gzip") {
-      set req.http.Accept-Encoding = "gzip";
-    } elsif (req.http.Accept-Encoding ~ "deflate") {
-      set req.http.Accept-Encoding = "deflate";
+    # Force lookup if the request is a no-cache request from the client
+    if (req.http.cache-control ~ "no-cache") {
+        ban_url(req.url);
+    }
+    # Pass any requests that Varnish does not understand straight to the backend.
+    if (req.request != "GET" &&
+        req.request != "HEAD" &&
+        req.request != "PUT" &&
+        req.request != "POST" &&
+        req.request != "TRACE" &&
+        req.request != "OPTIONS" &&
+        req.request != "PURGE" &&
+        req.request != "DELETE") {
+        /* Non-RFC2616 or CONNECT which is weird. */
+        return(pipe);
+    }
+    # Pass anything other than GET and HEAD and PURGE directly.
+    if (req.request != "GET" && req.request != "HEAD" && req.request != "PURGE") {
+        /* We only deal with GET and HEAD by default */
+        return(pass);
+    }
+    if (req.http.Authorization) {
+        /* Not cacheable by default */
+        return (pass);
+    }
+
+    # no need to have cookies for the resources
+    if (req.url ~ "\.(css|js|ico)$") {
+        unset req.http.cookie;
+    }
+    # remove bogus cookies
+    if (req.http.Cookie) {
+        set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__utm.=[^;]+;? *", "\1");
+        set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__ac_name=\x22\x22;? *", "\1");
+        set req.http.Cookie = regsuball(req.http.Cookie, "(^|; ) *__ac=\x22Og.3D.3D\x22;? *", "\1");
+    }
+    if (req.http.Cookie == "") {
+        remove req.http.Cookie;
+    }
+    if (req.http.Cookie && req.http.Cookie ~ "(^|; ) *__ac=") {
+        /* Not cacheable for authorised users,
+           but KM images are cacheable */
+        if (!(req.url ~ "/km_img/.*\.(png|gif)$")) {
+            return (pass);
+        }
+    }
+    # XXX Is it OK to remove this of all the case?
+    remove req.http.Set-Cookie;
+
+    if (req.http.Accept-Encoding) {
+        if (req.http.Accept-Encoding ~ "gzip") {
+            set req.http.Accept-Encoding = "gzip";
+        } elsif (req.http.Accept-Encoding ~ "deflate") {
+            set req.http.Accept-Encoding = "deflate";
+        } else {
+            # unkown algorithm
+            remove req.http.Accept-Encoding;
+        }
+    }
+    # We do not care about Accept-Language, this is url controlled
+    remove req.http.Accept-Language;
+
+    ## XXX login form can defer based on __ac_name cookie value
+    if (req.url ~ "/(login_form|WebSite_viewLoginDialog)($|\?)") {
+        return (pass);
+    }
+
+    if (req.backend.healthy) {
+        set req.grace = 1h;
     } else {
-      # unkown algorithm
-      remove req.http.Accept-Encoding;
-    }
-  }
-  # Force deflate
-  remove req.http.Accept-Encoding;
-  # We do not care about Accept-Language, this is url controlled
-  remove req.http.Accept-Language;
-  #if (req.request == "PURGE") {
-  #  if (!client.ip ~ purge) {
-  #    error 405 "Not allowed.";
-  #  }
-  #  purge_url(req.url);
-  #  error 200 "HASHPURGED";
-  #  unset req.http.x;
-  #}
-  set req.grace = 30d;
-  return(lookup);
+        set req.grace = 1w;
+    }
+    return(lookup);
 }
 
+# Creates the varnish cache key by the url
 sub vcl_hash {
     hash_data(req.url);
     return(hash);
 }
 
+# Called after a cache lookup if the requested document was found in the cache
 sub vcl_hit {
-   #if (req.request == "PURGE" && client.ip ~ purge) {
-   #  set obj.ttl = 0s;
-   #  error 200 "Purged.";
-   #}
-
-   #if (client.ip ~ purge){
-   #  # Force refresh from localhost
-   #  set obj.ttl = 0s;
-   #  return (restart);
-   #}
-   # According Vary Header do not return those headers
-   remove req.http.Accept-Language;
-   remove req.http.Accept-Encoding;
-   remove req.http.Cookie;
-   return(deliver);
+    # According Vary Header do not return those headers
+    remove req.http.Accept-Language;
+    remove req.http.Accept-Encoding;
+    remove req.http.Cookie;
+    return(deliver);
 }
 
+# Called after a cache lookup if the requested document was not found in the cache
 sub vcl_miss {
     return(fetch);
 }
 
+# Called after a document has been successfully retrieved from the backend
 sub vcl_fetch {
+    # we only cache 200 (OK) and 304 (Not Modified) responses.
+    if (beresp.status != 200 && beresp.status != 304) {
+        set beresp.ttl = 0s;
+    }
+
+    if (beresp.http.cache-control ~ "no-cache") {
+        set beresp.ttl = 0s;
+    }
+
+    if (beresp.ttl == 0s) {
+        unset beresp.http.expires;
+        set beresp.http.cache-control = "no-cache";
+        return(hit_for_pass);
+    }
+
+    # we don't care haproxy's cookie.
+    if (beresp.http.Set-Cookie && beresp.http.Set-Cookie !~ "^SERVERID=[^;]+; path=/$") {
+        return(hit_for_pass);
+    }
+
+    if (req.url ~ "\.(css|js|ico)$") {
+        unset beresp.http.set-cookie;
+        set beresp.http.cache-control = regsub(beresp.http.cache-control, "^", "public,");
+        set beresp.http.cache-control = regsub(beresp.http.cache-control, ",$", "");
+    }
+
+    # remove some headers added by caching policy manager to avoid
+    # '304 Not Modified' in case of login <-> logout switching.
+    if (beresp.http.content-type ~ "^text/html") {
+       unset beresp.http.last-modified;
+    }
+
+    set beresp.grace = 1w;
     /* Never send request to backend even if client ask refreshed content */
     if (beresp.ttl > 0s) {
-       /* Setup grace period for 30days for all cacheable contents */
-      #set req.grace = 30d;
-      set beresp.grace = 30d;
-      }
-   return(deliver);
-   }
+        /* Setup grace period for 30days for all cacheable contents */
+        set beresp.grace = 30d;
+        /* Remove Expires from backend, it's not long enough */
+        unset beresp.http.expires;
+        # /* Set the clients TTL on this object */
+        # set beresp.http.cache-control = "max-age = 300";
+        /* Set how long Varnish will keep it */
+        set beresp.ttl = 1w;
+        /* marker for vcl_deliver to reset Age: */
+        set beresp.http.magicmarker = "1";
+    }
 
+    return(deliver);
+}
 
+# Called before a cached object is delivered to the client
 sub vcl_deliver {
-   if (obj.hits > 0) {
-     set resp.http.X-Cache = obj.hits;
-   } else {
-     set resp.http.X-Cache = "MISS";
-   }
-   #if (obj.hash) {
-   #  set resp.http.X-Hash = obj.hash;
-   #} else {
-   #  set resp.http.X-Hash = "No hash";
-   #}
-
-   return(deliver);
+    if (resp.http.magicmarker) {
+        /* Remove the magic marker */
+        unset resp.http.magicmarker;
+        /* By definition we have a fresh object */
+        set resp.http.age = "0";
+    }
+    if (obj.hits > 0) {
+        set resp.http.X-Cache = obj.hits;
+    } else {
+        set resp.http.X-Cache = "MISS";
+    }
+    return(deliver);
 }
-- 
2.30.9