From 59844c4f2e0cad9c31929de824d065a6b3eb524c Mon Sep 17 00:00:00 2001 From: Tim Peters <tim.one@comcast.net> Date: Tue, 26 Jul 2005 21:02:55 +0000 Subject: [PATCH] Merge rev 37435 from 3.4 branch. More words; update examples w/ new output. --- doc/ZEO/trace.txt | 77 ++++++++++++++++++++++++++++++----------------- src/ZEO/simul.py | 4 +-- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/doc/ZEO/trace.txt b/doc/ZEO/trace.txt index 93ce38d8..32eef308 100644 --- a/doc/ZEO/trace.txt +++ b/doc/ZEO/trace.txt @@ -46,11 +46,11 @@ around 70% are good. 90% is excellent. If you see a hit rate under 60% you can probably improve the cache performance (and hence your Zope application server's performance) by increasing the ZEO cache size. This is normally configured using key ``cache_size`` in the ``zeoclient`` section of your -configuration file. The default cache size is 20 MB, which is very small. +configuration file. The default cache size is 20 MB, which is small. The stats.py tool shows its command line syntax when invoked without arguments. The tracefile argument can be a gzipped file if it has a .gz -extension. It will read from stdin (assuming uncompressed data) if the +extension. It will be read from stdin (assuming uncompressed data) if the tracefile argument is '-'. Simulating Different Cache Sizes @@ -66,36 +66,59 @@ statistics is added at the end. Example, assuming the trace file is in /tmp/cachetrace.log:: - $ python simul.py -s 100 /tmp/cachetrace.log - START TIME DURATION LOADS HITS INVALS WRITES FLIPS HITRATE - Sep 4 11:59 38:01 59833 40473 257 20 2 67.6% - $ + $ python simul.py -s 4 /tmp/cachetrace.log + CircularCacheSimulation, cache size 4,194,304 bytes + START TIME DURATION LOADS HITS INVALS WRITES HITRATE EVICTS INUSE + Jul 22 22:22 39:09 3218856 1429329 24046 41517 44.4% 40776 99.8 + +This shows that with a 4 MB cache size, the cache hit rate is 44.4%, the +percentage 1429329 (number of cache hits) is of 3218856 (number of load +requests). The cache simulated 40776 evictions, to make room for new object +states. At the end, 99.8% of the bytes reserved for the cache file were in +use to hold object state (the remaining 0.2% consists of "holes", bytes freed +by object eviction and not yet reused to hold another object's state). -This shows that with a 100 MB cache size, the cache hit rate is -67.6%. So let's try this again with a 200 MB cache size:: +Let's try this again with an 8 MB cache:: - $ python simul.py -s 200 /tmp/cachetrace.log - START TIME DURATION LOADS HITS INVALS WRITES FLIPS HITRATE - Sep 4 11:59 38:01 59833 40921 258 20 1 68.4% - $ + $ python simul.py -s 8 /tmp/cachetrace.log + CircularCacheSimulation, cache size 8,388,608 bytes + START TIME DURATION LOADS HITS INVALS WRITES HITRATE EVICTS INUSE + Jul 22 22:22 39:09 3218856 2182722 31315 41517 67.8% 40016 100.0 -This showed hardly any improvement. So let's try a 300 MB cache -size:: +That's a huge improvement in hit rate, which isn't surprising since these are +very small cache sizes. The default cache size is 20 MB, which is still on +the small side:: - $ python2.0 simul.py -s 300 /tmp/cachetrace.log - ZEOCacheSimulation, cache size 300,000,000 bytes - START TIME DURATION LOADS HITS INVALS WRITES FLIPS HITRATE - Sep 4 11:59 38:01 59833 40921 258 20 0 68.4% - $ + $ python simul.py /tmp/cachetrace.log + CircularCacheSimulation, cache size 20,971,520 bytes + START TIME DURATION LOADS HITS INVALS WRITES HITRATE EVICTS INUSE + Jul 22 22:22 39:09 3218856 2982589 37922 41517 92.7% 37761 99.9 -This shows that for this particular trace file, the maximum attainable -hit rate is 68.4%. This is probably caused by the fact that nearly a -third of the objects mentioned in the trace were loaded only once -- -the cache only helps if an object is loaded more than once. +Again a very nice improvement in hit rate, and there's not a lot of room left +for improvement. Let's try 100 MB:: -The simul.py tool also supports simulating different cache -strategies. Since none of these are implemented, these are not -further documented here. + $ python simul.py -s 100 /tmp/cachetrace.log + CircularCacheSimulation, cache size 104,857,600 bytes + START TIME DURATION LOADS HITS INVALS WRITES HITRATE EVICTS INUSE + Jul 22 22:22 39:09 3218856 3218741 39572 41517 100.0% 22778 100.0 + +It's very unusual to see a hit rate so high. The application here frequently +modified a very large BTree, so given enough cache space to hold the entire +BTree it rarely needed to ask the ZEO server for data: this application +reused the same objects over and over. + +More typical is that a substantial number of objects will be referenced only +once. Whenever an object turns out to be loaded only once, it's a pure loss +for the cache: the first (and only) load is a cache miss; storing the object +evicts other objects, possibly causing more cache misses; and the object is +never loaded again. If, for example, a third of the objects are loaded only +once, it's quite possible for the theoretical maximum hit rate to be 67%, no +matter how large the cache. + +The simul.py script also contains code to simulate different cache +strategies. Since none of these are implemented, and only the default cache +strategy's code has been updated to be aware of MVCC, these are not further +documented here. Simulation Limitations ---------------------- @@ -118,4 +141,4 @@ exact simulation: requests for O will continue to be simulated cache misses, although in a real cache they'll likely be cache hits. On the other hand, the simulated cache doesn't need to evict any objects to make room for O, so it - may enjoy further cache hits on objects a real cache would need to evict. + may enjoy further cache hits on objects a real cache would have evicted. diff --git a/src/ZEO/simul.py b/src/ZEO/simul.py index 2ca5b494..07c44f17 100644 --- a/src/ZEO/simul.py +++ b/src/ZEO/simul.py @@ -299,7 +299,7 @@ class CircularCacheEntry(object): from ZEO.cache import ZEC3_HEADER_SIZE class CircularCacheSimulation(Simulation): - """Simulate the ZEO 3.0a cache.""" + """Simulate the ZEO 3.0 cache.""" # The cache is managed as a single file with a pointer that # goes around the file, circularly, forever. New objects @@ -572,7 +572,7 @@ class CircularCacheSimulation(Simulation): # CAUTION: It's most likely that none of the simulators below this # point work anymore. A great many changes were needed to teach # CircularCacheSimulation (above) about MVCC, including method signature -# changes and changes in cache file format, and none of the others simulator +# changes and changes in cache file format, and none of the other simulator # classes were changed. ############################################################################# -- 2.30.9