From 59844c4f2e0cad9c31929de824d065a6b3eb524c Mon Sep 17 00:00:00 2001
From: Tim Peters <tim.one@comcast.net>
Date: Tue, 26 Jul 2005 21:02:55 +0000
Subject: [PATCH] Merge rev 37435 from 3.4 branch.

More words; update examples w/ new output.
---
 doc/ZEO/trace.txt | 77 ++++++++++++++++++++++++++++++-----------------
 src/ZEO/simul.py  |  4 +--
 2 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/doc/ZEO/trace.txt b/doc/ZEO/trace.txt
index 93ce38d8..32eef308 100644
--- a/doc/ZEO/trace.txt
+++ b/doc/ZEO/trace.txt
@@ -46,11 +46,11 @@ around 70% are good.  90% is excellent.  If you see a hit rate under 60% you
 can probably improve the cache performance (and hence your Zope application
 server's performance) by increasing the ZEO cache size.  This is normally
 configured using key ``cache_size`` in the ``zeoclient`` section of your
-configuration file.  The default cache size is 20 MB, which is very small.
+configuration file.  The default cache size is 20 MB, which is small.
 
 The stats.py tool shows its command line syntax when invoked without
 arguments.  The tracefile argument can be a gzipped file if it has a .gz
-extension.  It will read from stdin (assuming uncompressed data) if the
+extension.  It will be read from stdin (assuming uncompressed data) if the
 tracefile argument is '-'.
 
 Simulating Different Cache Sizes
@@ -66,36 +66,59 @@ statistics is added at the end.
 
 Example, assuming the trace file is in /tmp/cachetrace.log::
 
-    $ python simul.py -s 100 /tmp/cachetrace.log
-      START TIME  DURATION    LOADS     HITS INVALS WRITES  FLIPS HITRATE
-    Sep  4 11:59     38:01    59833    40473    257     20      2  67.6%
-    $
+    $ python simul.py -s 4 /tmp/cachetrace.log
+    CircularCacheSimulation, cache size 4,194,304 bytes
+      START TIME  DURATION    LOADS     HITS INVALS WRITES HITRATE  EVICTS   INUSE
+    Jul 22 22:22     39:09  3218856  1429329  24046  41517   44.4%   40776    99.8
+
+This shows that with a 4 MB cache size, the cache hit rate is 44.4%, the
+percentage 1429329 (number of cache hits) is of 3218856 (number of load
+requests).  The cache simulated 40776 evictions, to make room for new object
+states.  At the end, 99.8% of the bytes reserved for the cache file were in
+use to hold object state (the remaining 0.2% consists of "holes", bytes freed
+by object eviction and not yet reused to hold another object's state).
 
-This shows that with a 100 MB cache size, the cache hit rate is
-67.6%.  So let's try this again with a 200 MB cache size::
+Let's try this again with an 8 MB cache::
 
-    $ python simul.py -s 200 /tmp/cachetrace.log
-      START TIME  DURATION    LOADS     HITS INVALS WRITES  FLIPS HITRATE
-    Sep  4 11:59     38:01    59833    40921    258     20      1  68.4%
-    $
+    $ python simul.py -s 8 /tmp/cachetrace.log
+    CircularCacheSimulation, cache size 8,388,608 bytes
+      START TIME  DURATION    LOADS     HITS INVALS WRITES HITRATE  EVICTS   INUSE
+    Jul 22 22:22     39:09  3218856  2182722  31315  41517   67.8%   40016   100.0
 
-This showed hardly any improvement.  So let's try a 300 MB cache
-size::
+That's a huge improvement in hit rate, which isn't surprising since these are
+very small cache sizes.  The default cache size is 20 MB, which is still on
+the small side::
 
-    $ python2.0 simul.py -s 300 /tmp/cachetrace.log
-    ZEOCacheSimulation, cache size 300,000,000 bytes
-      START TIME  DURATION    LOADS     HITS INVALS WRITES  FLIPS HITRATE
-    Sep  4 11:59     38:01    59833    40921    258     20      0  68.4%
-    $
+    $ python simul.py /tmp/cachetrace.log
+    CircularCacheSimulation, cache size 20,971,520 bytes
+      START TIME  DURATION    LOADS     HITS INVALS WRITES HITRATE  EVICTS   INUSE
+    Jul 22 22:22     39:09  3218856  2982589  37922  41517   92.7%   37761    99.9
 
-This shows that for this particular trace file, the maximum attainable
-hit rate is 68.4%.  This is probably caused by the fact that nearly a
-third of the objects mentioned in the trace were loaded only once --
-the cache only helps if an object is loaded more than once.
+Again a very nice improvement in hit rate, and there's not a lot of room left
+for improvement.  Let's try 100 MB::
 
-The simul.py tool also supports simulating different cache
-strategies.  Since none of these are implemented, these are not
-further documented here.
+    $ python simul.py -s 100 /tmp/cachetrace.log
+    CircularCacheSimulation, cache size 104,857,600 bytes
+      START TIME  DURATION    LOADS     HITS INVALS WRITES HITRATE  EVICTS   INUSE
+    Jul 22 22:22     39:09  3218856  3218741  39572  41517  100.0%   22778   100.0
+
+It's very unusual to see a hit rate so high.  The application here frequently
+modified a very large BTree, so given enough cache space to hold the entire
+BTree it rarely needed to ask the ZEO server for data:  this application
+reused the same objects over and over.
+
+More typical is that a substantial number of objects will be referenced only
+once.  Whenever an object turns out to be loaded only once, it's a pure loss
+for the cache:  the first (and only) load is a cache miss; storing the object
+evicts other objects, possibly causing more cache misses; and the object is
+never loaded again.  If, for example, a third of the objects are loaded only
+once, it's quite possible for the theoretical maximum hit rate to be 67%, no
+matter how large the cache.
+
+The simul.py script also contains code to simulate different cache
+strategies.  Since none of these are implemented, and only the default cache
+strategy's code has been updated to be aware of MVCC, these are not further
+documented here.
 
 Simulation Limitations
 ----------------------
@@ -118,4 +141,4 @@ exact simulation:
   requests for O will continue to be simulated cache misses, although in a
   real cache they'll likely be cache hits.  On the other hand, the
   simulated cache doesn't need to evict any objects to make room for O, so it
-  may enjoy further cache hits on objects a real cache would need to evict.
+  may enjoy further cache hits on objects a real cache would have evicted.
diff --git a/src/ZEO/simul.py b/src/ZEO/simul.py
index 2ca5b494..07c44f17 100644
--- a/src/ZEO/simul.py
+++ b/src/ZEO/simul.py
@@ -299,7 +299,7 @@ class CircularCacheEntry(object):
 from ZEO.cache import ZEC3_HEADER_SIZE
 
 class CircularCacheSimulation(Simulation):
-    """Simulate the ZEO 3.0a cache."""
+    """Simulate the ZEO 3.0 cache."""
 
     # The cache is managed as a single file with a pointer that
     # goes around the file, circularly, forever.  New objects
@@ -572,7 +572,7 @@ class CircularCacheSimulation(Simulation):
 # CAUTION:  It's most likely that none of the simulators below this
 # point work anymore.  A great many changes were needed to teach
 # CircularCacheSimulation (above) about MVCC, including method signature
-# changes and changes in cache file format, and none of the others simulator
+# changes and changes in cache file format, and none of the other simulator
 # classes were changed.
 #############################################################################
 
-- 
2.30.9