Many little bugfixes and improvements in stats.py.

This has survived several 100 MB of trace files I generated over the last few days, so it's solid now if not necessarily perfect. Replaced simul.py with the much broader-ranging code Jeremy and I were working on a couple years ago, although it can't work with the current trace file format (no real loss there -- the simul.py it's replacing can't work with the current format either).

Many little bugfixes and improvements in stats.py.
This has survived several 100 MB of trace files I generated over the last few days, so it's solid now if not necessarily perfect. Replaced simul.py with the much broader-ranging code Jeremy and I were working on a couple years ago, although it can't work with the current trace file format (no real loss there -- the simul.py it's replacing can't work with the current format either).
94a7da6f · Tim Peters · 2f0cf584 · 94a7da6f · 94a7da6f · 94a7da6f
Commit 94a7da6f authored Jul 22, 2005 by Tim Peters
Hide whitespace changes
Inline Side-by-side

Showing with 1037 additions and 86 deletions

doc/ZEO/trace.txt doc/ZEO/trace.txt +1 -1

src/ZEO/simul.py src/ZEO/simul.py +996 -36

src/ZEO/stats.py src/ZEO/stats.py +40 -49

No files found.
--- a/doc/ZEO/trace.txt
+++ b/doc/ZEO/trace.txt
@@ -24,7 +24,7 @@ application server) must be restarted.
 The trace file can grow pretty quickly; on a moderately loaded server, we
 observed it growing by 5 MB per hour.  The file consists of binary records,
-each 26 bytes long if 8-byte oids are in use; a detailed description of the
+each 34 bytes long if 8-byte oids are in use; a detailed description of the
 record lay-out is given in stats.py.  No sensitive data is logged:  data
 record sizes and binary object and transaction ids are logged, but no
 information about object types or names, user names, version names,

--- a/src/ZEO/simul.py
+++ b/src/ZEO/simul.py
+#! /usr/bin/env python
 ##############################################################################
 #
 # Copyright (c) 2001, 2002 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
@@ -13,7 +14,7 @@
 ##############################################################################
 """Cache simulation.
-Usage: simul.py [-bflyz] [-X] [-s size] tracefile
+Usage: simul.py [-bflyz] [-s size] tracefile
 Use one of -b, -f, -l, -y or -z select the cache simulator:
 -b: buddy system allocator
@@ -24,8 +25,6 @@ Use one of -b, -f, -l, -y or -z select the cache simulator:
 Options:
 -s size: cache size in MB (default 20 MB)
-X: enable heuristic checking for misaligned records: oids > 2**32
-    will be rejected; this requires the tracefile to be seekable
 Note: the buddy system allocator rounds the cache size up to a power of 2
 """
@@ -34,6 +33,9 @@ import sys
 import time
 import getopt
 import struct
+import math
+from sets import Set
 def usage(msg):
    print >>sys.stderr, msg
@@ -44,9 +46,9 @@ def main():
    MB = 1000*1000
    cachelimit = 20*MB
    simclass = ZEOCacheSimulation
-    heuristic = 0
+    theta = omicron = None
    try:
-        opts, args = getopt.getopt(sys.argv[1:], "bflyzs:X")
+        opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTUs:o:t:")
    except getopt.error, msg:
        usage(msg)
        return 2
@@ -63,13 +65,32 @@ def main():
            simclass = ZEOCacheSimulation
        if o == '-s':
            cachelimit = int(float(a)*MB)
-        if o == '-X':
+        if o == '-2':
-            heuristic = 1
+            simclass = TwoQSimluation
+        if o == '-c':
+            simclass = CircularCacheSimulation
+        if o == '-o':
+            omicron = float(a)
+        if o == '-t':
+            theta = float(a)
+        if o == '-O':
+            simclass = OracleSimulation
+        if o == '-a':
+            simclass = ARCCacheSimulation
+        if o == '-T':
+            simclass = ThorSimulation
+        if o == '-U':
+            simclass = UnboundedSimulation
    if len(args) != 1:
        usage("exactly one file argument required")
        return 2
    filename = args[0]
+    if omicron is not None and simclass != CircularCacheSimulation:
+        usage("-o flag only useful with -c (CircularCacheSimulation)")
+        return 2
    # Open file
    if filename.endswith(".gz"):
        # Open gzipped file
@@ -95,7 +116,12 @@ def main():
            return 1
    # Create simulation object
-    sim = simclass(cachelimit)
+    if omicron is not None or theta is not None:
+        sim = simclass(cachelimit, omicron, theta)
+    elif simclass is OracleSimulation:
+        sim = simclass(cachelimit, filename)
+    else:
+        sim = simclass(cachelimit)
    # Print output header
    sim.printheader()
@@ -107,21 +133,21 @@ def main():
    struct_unpack = struct.unpack
    while 1:
        # Read a record and decode it
-        r = f_read(10)
+        r = f_read(8)
-        if len(r) < 10:
+        if len(r) < 8:
            break
-        offset += 10
+        offset += 8
-        ts, code, lenoid = struct_unpack(">iiH", r)
+        ts, code = struct_unpack(">ii", r)
        if ts == 0:
            # Must be a misaligned record caused by a crash
            ##print "Skipping 8 bytes at offset", offset-8
            continue
-        r = f_read(8 + lenoid)
+        r = f_read(16)
-        if len(r) < 8 + lenoid:
+        if len(r) < 16:
            break
-        offset += 8 + lenoid
+        offset += 16
        records += 1
-        serial, oid = struct_unpack(">8s%ds" % lenoid, r)
+        oid, serial = struct_unpack(">8s8s", r)
        # Decode the code
        dlen, version, code, current = (code & 0x7fffff00,
                                        code & 0x80,
@@ -153,17 +179,20 @@ class Simulation:
        # Initialize global statistics
        self.epoch = None
        self.total_loads = 0
-        self.total_hits = 0 # Subclass must increment
+        self.total_hits = 0       # subclass must increment
-        self.total_invals = 0
+        self.total_invals = 0     # subclass must increment
        self.total_writes = 0
+        if not hasattr(self, "extras"):
+            self.extras = (self.extraname,)
+        self.format = self.format + " %7s" * len(self.extras)
        # Reset per-run statistics and set up simulation data
        self.restart()
    def restart(self):
        # Reset per-run statistics
        self.loads = 0
-        self.hits = 0 # Subclass must increment
+        self.hits = 0       # subclass must increment
-        self.invals = 0
+        self.invals = 0     # subclass must increment
        self.writes = 0
        self.ts0 = None
@@ -201,12 +230,14 @@ class Simulation:
        pass
    def load(self, oid, size):
+        # Must increment .hits and .total_hits as appropriate.
        pass
    def inval(self, oid):
+        # Must increment .invals and .total_invals as appropriate.
        pass
-    format = "%12s %9s %8s %8s %6s %6s %6s %6s"
+    format = "%12s %9s %8s %8s %6s %6s %7s"
    # Subclass should override extraname to name known instance variables;
    # if extraname is 'foo', both self.foo and self.total_foo must exist:
@@ -215,34 +246,46 @@ class Simulation:
    def printheader(self):
        print "%s, cache size %s bytes" % (self.__class__.__name__,
                                           addcommas(self.cachelimit))
-        print self.format % (
+        self.extraheader()
-            "START TIME", "DURATION", "LOADS", "HITS",
+        extranames = tuple([s.upper() for s in self.extras])
-            "INVALS", "WRITES", self.extraname.upper(), "HITRATE")
+        args = ("START TIME", "DURATION", "LOADS", "HITS",
+                "INVALS", "WRITES", "HITRATE") + extranames
+        print self.format % args
+    def extraheader(self):
+        pass
    nreports = 0
-    def report(self):
+    def report(self, extratext=''):
        if self.loads:
            self.nreports += 1
-            print self.format % (
+            args = (time.ctime(self.ts0)[4:-8],
-                time.ctime(self.ts0)[4:-8],
+                    duration(self.ts1 - self.ts0),
-                duration(self.ts1 - self.ts0),
+                    self.loads, self.hits, self.invals, self.writes,
-                self.loads, self.hits, self.invals, self.writes,
+                    hitrate(self.loads, self.hits))
-                getattr(self, self.extraname),
+            args += tuple([getattr(self, name) for name in self.extras])
-                hitrate(self.loads, self.hits))
+            print self.format % args, extratext
    def finish(self):
-        self.report()
+        # Make sure that the last line of output ends with "OVERALL".  This
-        if self.nreports > 1:
+        # makes it much easier for another program parsing the output to
-            print (self.format + " OVERALL") % (
+        # find summary statistics.
+        if self.nreports < 2:
+            self.report('OVERALL')
+        else:
+            self.report()
+            args = (
                time.ctime(self.epoch)[4:-8],
                duration(self.ts1 - self.epoch),
                self.total_loads,
                self.total_hits,
                self.total_invals,
                self.total_writes,
-                getattr(self, "total_" + self.extraname),
                hitrate(self.total_loads, self.total_hits))
+            args += tuple([getattr(self, "total_" + name)
+                           for name in self.extras])
+            print (self.format + " OVERALL") % args
 class ZEOCacheSimulation(Simulation):
@@ -433,6 +476,710 @@ class Node:
        self.next = next
        prev.next = next.prev = self
+am = object()
+a1in = object()
+a1out = object()
+class Node2Q(Node):
+    __slots__ = ["kind", "hits"]
+    def __init__(self, oid, size, kind=None):
+        Node.__init__(self, oid, size)
+        self.kind = kind
+        self.hits = 0
+    def linkbefore(self, next):
+        if next.kind != self.kind:
+            self.kind = next.kind
+        Node.linkbefore(self, next)
+class TwoQSimluation(Simulation):
+    # The original 2Q algorithm is page based and the authors offer
+    # tuning guidlines based on a page-based cache.  Our cache is
+    # object based, so, for example, it's hard to compute the number
+    # of oids to store in a1out based on the size of a1in.
+    extras = "evicts", "hothit", "am_add"
+    NodeClass = Node2Q
+    def __init__(self, cachelimit, outlen=10000, threshold=0):
+        Simulation.__init__(self, cachelimit)
+        # The promotion threshold: If a hit occurs in a1out, it is
+        # promoted to am if the number of hits on the object while it
+        # was in a1in is at least threshold.  The standard 2Q scheme
+        # uses a threshold of 0.
+        self.threshold = threshold
+        self.am_limit = 3 * self.cachelimit / 4
+        self.a1in_limit = self.cachelimit / 4
+        self.cache = {}
+        self.am_size = 0
+        self.a1in_size = 0
+        self.a1out_size = 0
+        self.total_evicts = 0
+        self.total_hothit = 0
+        self.total_am_add = 0
+        self.a1out_limit = outlen
+        # An LRU queue of hot objects
+        self.am = self.NodeClass(None, None, am)
+        self.am.linkbefore(self.am)
+        # A FIFO queue of recently referenced objects.  It's purpose
+        # is to absorb references to objects that are accessed a few
+        # times in short order, then forgotten about.
+        self.a1in = self.NodeClass(None, None, a1in)
+        self.a1in.linkbefore(self.a1in)
+        # A FIFO queue of recently reference oids.
+        # This queue only stores the oids, not any data.  If we get a
+        # hit in this queue, promote the object to am.
+        self.a1out = self.NodeClass(None, None, a1out)
+        self.a1out.linkbefore(self.a1out)
+    def makespace(self, size):
+        for space in 0, size:
+            if self.enoughspace(size):
+                return
+            self.evict_a1in(space)
+            if self.enoughspace(size):
+                return
+            self.evict_am(space)
+    def enoughspace(self, size):
+        totalsize = self.a1in_size + self.am_size
+        return totalsize + size < self.cachelimit
+    def evict_a1in(self, extra):
+        while self.a1in_size + extra > self.a1in_limit:
+            if self.a1in.next is self.a1in:
+                return
+            assert self.a1in.next is not None
+            node = self.a1in.next
+            self.evicts += 1
+            self.total_evicts += 1
+            node.linkbefore(self.a1out)
+            self.a1out_size += 1
+            self.a1in_size -= node.size
+            if self.a1out_size > self.a1out_limit:
+                assert self.a1out.next is not None
+                node = self.a1out.next
+                node.unlink()
+                del self.cache[node.oid]
+                self.a1out_size -= 1
+    def evict_am(self, extra):
+        while self.am_size + extra > self.am_limit:
+            if self.am.next is self.am:
+                return
+            assert self.am.next is not None
+            node = self.am.next
+            self.evicts += 1
+            self.total_evicts += 1
+            # This node hasn't been accessed in a while, so just
+            # forget about it.
+            node.unlink()
+            del self.cache[node.oid]
+            self.am_size -= node.size
+    def write(self, oid, size):
+        # A write always follows a read (ZODB doesn't allow blind writes).
+        # So this write must have followed a recent read of the object.
+        # Don't change it's position, but do update the size.
+        # XXX For now, don't evict pages if the new version of the object
+        # is big enough to require eviction.
+        node = self.cache.get(oid)
+        if node is None or node.kind is a1out:
+            return
+        if node.kind is am:
+            self.am_size = self.am_size - node.size + size
+            node.size = size
+        else:
+            self.a1in_size = self.a1in_size - node.size + size
+            node.size = size
+    def load(self, oid, size):
+        node = self.cache.get(oid)
+        if node is not None:
+            if node.kind is am:
+                self.hits += 1
+                self.total_hits += 1
+                self.hothit += 1
+                self.total_hothit += 1
+                node.hits += 1
+                node.linkbefore(self.am)
+            elif node.kind is a1in:
+                self.hits += 1
+                self.total_hits += 1
+                node.hits += 1
+            elif node.kind is a1out:
+                self.a1out_size -= 1
+                if node.hits >= self.threshold:
+                    self.makespace(node.size)
+                    self.am_size += node.size
+                    node.linkbefore(self.am)
+                    self.cache[oid] = node
+                    self.am_add += 1
+                    self.total_am_add += 1
+                else:
+                    node.unlink()
+                    self.insert(oid, size)
+        else:
+            self.insert(oid, size)
+    def insert(self, oid, size):
+        # New objects enter the cache via a1in.  If they
+        # are frequently used over a long enough time, they
+        # will be promoted to am -- but only via a1out.
+        self.makespace(size)
+        node = self.NodeClass(oid, size, a1in)
+        node.linkbefore(self.a1in)
+        self.cache[oid] = node
+        self.a1in_size += node.size
+    def inval(self, oid):
+        # The original 2Q algorithm didn't have to deal with
+        # invalidations.  My own solution: Move it to the head of
+        # a1out.
+        node = self.cache.get(oid)
+        if node is None:
+            return
+        self.invals += 1
+        self.total_invals += 1
+        # XXX Should an invalidation to a1out count?
+        if node.kind is a1out:
+            return
+        node.linkbefore(self.a1out)
+        if node.kind is am:
+            self.am_size -= node.size
+        else:
+            self.a1in_size -= node.size
+    def restart(self):
+        Simulation.restart(self)
+        self.evicts = 0
+        self.hothit = 0
+        self.am_add = 0
+lruT = object()
+lruB = object()
+fifoT = object()
+fifoB = object()
+class ARCCacheSimulation(Simulation):
+    # Based on the paper ARC: A Self-Tuning, Low Overhead Replacement
+    # Cache by Nimrod Megiddo and Dharmendra S. Modha, USENIX FAST
+    # 2003.  The paper describes a block-based cache.  A lot of the
+    # details need to be fiddled to work with an object-based cache.
+    # For size issues, the key insight ended up being conditions
+    # A.1-A.5 rather than the details of the algorithm in Fig. 4.
+    extras = "lruThits", "evicts", "p"
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+        # There are two pairs of linked lists.  Each pair has a top and
+        # bottom half.  The bottom half contains metadata, but not actual
+        # objects.
+        # LRU list of frequently used objects
+        self.lruT = Node2Q(None, None, lruT)
+        self.lruT.linkbefore(self.lruT)
+        self.lruT_len = 0
+        self.lruT_size = 0
+        self.lruB = Node2Q(None, None, lruB)
+        self.lruB.linkbefore(self.lruB)
+        self.lruB_len = 0
+        self.lruB_size = 0
+        # FIFO list of objects seen once
+        self.fifoT = Node2Q(None, None, fifoT)
+        self.fifoT.linkbefore(self.fifoT)
+        self.fifoT_len = 0
+        self.fifoT_size = 0
+        self.fifoB = Node2Q(None, None, fifoB)
+        self.fifoB.linkbefore(self.fifoB)
+        self.fifoB_len = 0
+        self.fifoB_size = 0
+        # maps oid to node
+        self.cache = {}
+        # The paper says that p should be adjust be 1 as the minimum:
+        # "The compound effect of such small increments and decrements
+        # to p s quite profound as we will demonstrated in the next
+        # section."  Not really, as far as I can tell.  In my traces
+        # with a very small cache, it was taking far too long to adjust
+        # towards favoring some FIFO component.  I would guess that the
+        # chief difference is that our caches are much bigger than the
+        # ones they experimented with.  Their biggest cache had 512K
+        # entries, while our smallest cache will have 40 times that many
+        # entries.
+        self.p = 0
+        # XXX multiply computed adjustments to p by walk_factor
+        self.walk_factor = 500
+        # statistics
+        self.total_hits = 0
+        self.total_lruThits = 0
+        self.total_fifoThits = 0
+        self.total_evicts = 0
+    def restart(self):
+        Simulation.restart(self)
+        self.hits = 0
+        self.lruThits = 0
+        self.fifoThits = 0
+        self.evicts = 0
+    def write(self, oid, size):
+        pass
+    def replace(self, lruB=False):
+        self.evicts += 1
+        self.total_evicts += 1
+        if self.fifoT_size > self.p or (lruB and self.fifoT_size == self.p):
+            node = self.fifoT.next
+            if node is self.fifoT:
+                return 0
+            assert node is not self.fifoT, self.stats()
+            node.linkbefore(self.fifoB)
+            self.fifoT_len -= 1
+            self.fifoT_size -= node.size
+            self.fifoB_len += 1
+            self.fifoB_size += node.size
+        else:
+            node = self.lruT.next
+            if node is self.lruT:
+                return 0
+            assert node is not self.lruT, self.stats()
+            node.linkbefore(self.lruB)
+            self.lruT_len -= 1
+            self.lruT_size -= node.size
+            self.lruB_len += 1
+            self.lruB_size += node.size
+        return node.size
+    def stats(self):
+        self.totalsize = self.lruT_size + self.fifoT_size
+        self.allsize = self.totalsize + self.lruB_size + self.fifoB_size
+        print "cachelimit = %s totalsize=%s allsize=%s" % (
+            addcommas(self.cachelimit),
+            addcommas(self.totalsize),
+            addcommas(self.allsize))
+        fmt = (
+            "p=%(p)d\n"
+            "lruT  = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
+            "fifoT = %(fifoT_len)5d / %(fifoT_size)8d / %(fifoThits)d\n"
+            "lruB  = %(lruB_len)5d / %(lruB_size)8d\n"
+            "fifoB = %(fifoB_len)5d / %(fifoB_size)8d\n"
+            "loads=%(loads)d hits=%(hits)d evicts=%(evicts)d\n"
+            )
+        print fmt % self.__dict__
+    def report(self):
+        self.total_p = self.p
+        Simulation.report(self)
+##        self.stats()
+    def load(self, oid, size):
+##        maybe(self.stats, p=0.002)
+        node = self.cache.get(oid)
+        if node is None:
+            # cache miss: We're going to insert a new object in fifoT.
+            # If fifo is full, we'll need to evict something to make
+            # room for it.
+            prev = need = size
+            while need > 0:
+                if size + self.fifoT_size + self.fifoB_size >= self.cachelimit:
+                    if need + self.fifoT_size >= self.cachelimit:
+                        node = self.fifoB.next
+                        assert node is not self.fifoB, self.stats()
+                        node.unlink()
+                        del self.cache[node.oid]
+                        self.fifoB_size -= node.size
+                        self.fifoB_len -= 1
+                        self.evicts += 1
+                        self.total_evicts += 1
+                    else:
+                        node = self.fifoB.next
+                        assert node is not self.fifoB, self.stats()
+                        node.unlink()
+                        del self.cache[node.oid]
+                        self.fifoB_size -= node.size
+                        self.fifoB_len -= 1
+                        if self.fifoT_size + self.lruT_size > self.cachelimit:
+                            need -= self.replace()
+                else:
+                    incache_size = self.fifoT_size + self.lruT_size + need
+                    total_size = (incache_size + self.fifoB_size
+                                  + self.lruB_size)
+                    if total_size >= self.cachelimit * 2:
+                        node = self.lruB.next
+                        if node is self.lruB:
+                            break
+                        assert node is not self.lruB
+                        node.unlink()
+                        del self.cache[node.oid]
+                        self.lruB_size -= node.size
+                        self.lruB_len -= 1
+                    elif incache_size > self.cachelimit:
+                        need -= self.replace()
+                    else:
+                        break
+                if need == prev:
+                    # XXX hack, apparently we can't get rid of anything else
+                    break
+                prev = need
+            node = Node2Q(oid, size)
+            node.linkbefore(self.fifoT)
+            self.fifoT_len += 1
+            self.fifoT_size += size
+            self.cache[oid] = node
+        else:
+            # a cache hit, but possibly in a bottom list that doesn't
+            # actually hold the object
+            if node.kind is lruT:
+                node.linkbefore(self.lruT)
+                self.hits += 1
+                self.total_hits += 1
+                self.lruThits += 1
+                self.total_lruThits += 1
+            elif node.kind is fifoT:
+                node.linkbefore(self.lruT)
+                self.fifoT_len -= 1
+                self.lruT_len += 1
+                self.fifoT_size -= node.size
+                self.lruT_size += node.size
+                self.hits += 1
+                self.total_hits += 1
+                self.fifoThits += 1
+                self.total_fifoThits += 1
+            elif node.kind is fifoB:
+                node.linkbefore(self.lruT)
+                self.fifoB_len -= 1
+                self.lruT_len += 1
+                self.fifoB_size -= node.size
+                self.lruT_size += node.size
+                # XXX need a better min than 1?
+##                print "adapt+", max(1, self.lruB_size // self.fifoB_size)
+                delta = max(1, self.lruB_size / max(1, self.fifoB_size))
+                self.p += delta * self.walk_factor
+                if self.p > self.cachelimit:
+                    self.p = self.cachelimit
+                need = node.size
+                if self.lruT_size + self.fifoT_size + need > self.cachelimit:
+                    while need > 0:
+                        r = self.replace()
+                        if not r:
+                            break
+                        need -= r
+            elif node.kind is lruB:
+                node.linkbefore(self.lruT)
+                self.lruB_len -= 1
+                self.lruT_len += 1
+                self.lruB_size -= node.size
+                self.lruT_size += node.size
+                # XXX need a better min than 1?
+##                print "adapt-", max(1, self.fifoB_size // self.lruB_size)
+                delta = max(1, self.fifoB_size / max(1, self.lruB_size))
+                self.p -= delta * self.walk_factor
+                if self.p < 0:
+                    self.p = 0
+                need = node.size
+                if self.lruT_size + self.fifoT_size + need > self.cachelimit:
+                    while need > 0:
+                        r = self.replace(lruB=True)
+                        if not r:
+                            break
+                        need -= r
+    def inval(self, oid):
+        pass
+    def extraheader(self):
+        pass
+class OracleSimulation(LRUCacheSimulation):
+    # Not sure how to implement this yet.  This is a cache where I
+    # cheat to see how good we could actually do.  The cache
+    # replacement problem for multi-size caches is NP-hard, so we're
+    # not going to have an optimal solution.
+    # At the moment, the oracle is mostly blind.  It knows which
+    # objects will be referenced more than once, so that it can
+    # ignore objects referenced only once.  In most traces, these
+    # objects account for about 20% of references.
+    def __init__(self, cachelimit, filename):
+        LRUCacheSimulation.__init__(self, cachelimit)
+        self.count = {}
+        self.scan(filename)
+    def load(self, oid, size):
+        node = self.cache.get(oid)
+        if node is not None:
+            self.hits += 1
+            self.total_hits += 1
+            node.linkbefore(self.head)
+        else:
+            if oid in self.count:
+                self.write(oid, size)
+    def scan(self, filename):
+        # scan the file in advance to figure out which objects will
+        # be referenced more than once.
+        f = open(filename, "rb")
+        struct_unpack = struct.unpack
+        f_read = f.read
+        offset = 0
+        while 1:
+            # Read a record and decode it
+            r = f_read(8)
+            if len(r) < 8:
+                break
+            offset += 8
+            ts, code = struct_unpack(">ii", r)
+            if ts == 0:
+                # Must be a misaligned record caused by a crash
+                ##print "Skipping 8 bytes at offset", offset-8
+                continue
+            r = f_read(16)
+            if len(r) < 16:
+                break
+            offset += 16
+            oid, serial = struct_unpack(">8s8s", r)
+            if code & 0x70 == 0x20:
+                # only look at loads
+                self.count[oid] = self.count.get(oid, 0) + 1
+        all = len(self.count)
+        # Now remove everything with count == 1
+        once = [oid for oid, count in self.count.iteritems()
+                if count == 1]
+        for oid in once:
+            del self.count[oid]
+        print "Scanned file, %d unique oids, %d repeats" % (
+            all, len(self.count))
+class CircularCacheSimulation(Simulation):
+    # The cache is managed as a single file with a pointer that
+    # goes around the file, circularly, forever.  New objects
+    # are written at the current pointer, evicting whatever was
+    # there previously.
+    # For each cache hit, there is some distance between the current
+    # pointer offset and the offset of the cached data record.  The
+    # cache can be configured to copy objects to the current offset
+    # depending on how far away they are now.  The omicron parameter
+    # specifies a percentage
+    extras = "evicts", "copies", "inuse", "skips"
+    def __init__(self, cachelimit, omicron=None, skip=None):
+        Simulation.__init__(self, cachelimit)
+        self.omicron = omicron or 0
+        self.skip = skip or 0
+        self.total_evicts = 0
+        self.total_copies = 0
+        self.total_skips = 0
+        # Current offset in file
+        self.offset = 0
+        # Map offset in file to tuple of size, oid
+        self.filemap = {0: (self.cachelimit, None)}
+        # Map oid to offset, node
+        self.cache = {}
+        # LRU list of oids
+        self.head = Node(None, None)
+        self.head.linkbefore(self.head)
+    def extraheader(self):
+        print "omicron = %s, theta = %s" % (self.omicron, self.skip)
+    def restart(self):
+        Simulation.restart(self)
+        self.evicts = 0
+        self.copies = 0
+        self.skips = 0
+    def load(self, oid, size):
+        p = self.cache.get(oid)
+        if p is None:
+            self.add(oid, size)
+        else:
+            pos, node = p
+            self.hits += 1
+            self.total_hits += 1
+            node.linkbefore(self.head)
+            self.copy(oid, size, pos)
+    def check(self):
+        d = dict(self.filemap)
+        done = {}
+        while d:
+            pos, (size, oid) = d.popitem()
+            next = pos + size
+            if not (next in d or next in done or next == self.cachelimit):
+                print "check", next, pos, size, repr(oid)
+                self.dump()
+                raise RuntimeError
+            done[pos] = pos
+    def dump(self):
+        print len(self.filemap)
+        L = list(self.filemap)
+        L.sort()
+        for k in L:
+            v = self.filemap[k]
+            print k, v[0], repr(v[1])
+    def add(self, oid, size):
+        avail = self.makeroom(size)
+        assert oid not in self.cache
+        self.filemap[self.offset] = size, oid
+        node = Node(oid, size)
+        node.linkbefore(self.head)
+        self.cache[oid] = self.offset, node
+        self.offset += size
+        # All the space made available must be accounted for in filemap.
+        excess = avail - size
+        if excess:
+            self.filemap[self.offset] = excess, None
+    def makeroom(self, need):
+        if self.offset + need > self.cachelimit:
+            self.offset = 0
+        pos = self.offset
+        # Evict enough objects to make the necessary space available.
+        self.compute_closeenough()
+        evicted = False
+        while need > 0:
+            if pos == self.cachelimit:
+                print "wrap makeroom", need
+                pos = 0
+            try:
+                size, oid = self.filemap[pos]
+            except:
+                self.dump()
+                raise
+            if not evicted and self.skip and oid and self.closeenough(oid):
+                self.skips += 1
+                self.total_skips += 1
+                self.offset += size
+                pos += size
+                continue
+            evicted = True
+            del self.filemap[pos]
+            if oid is not None:
+                self.evicts += 1
+                self.total_evicts += 1
+                pos, node = self.cache.pop(oid)
+                node.unlink()
+            need -= size
+            pos += size
+        return pos - self.offset
+    def compute_closeenough(self):
+        self.lru = {}
+        n = int(len(self.cache) * self.skip)
+        node = self.head.prev
+        while n > 0:
+            self.lru[node.oid] = True
+            node = node.prev
+            n -= 1
+    def closeenough(self, oid):
+        # If oid is in the top portion of the most recently used
+        # elements, skip it.
+        return oid in self.lru
+    def copy(self, oid, size, pos):
+        # Copy only if the distance is greater than omicron.
+        dist = self.offset - pos
+        if dist < 0:
+            dist += self.cachelimit
+        if dist < self.omicron * self.cachelimit:
+            self.copies += 1
+            self.total_copies += 1
+            self.filemap[pos] = size, None
+            pos, node =  self.cache.pop(oid)
+            node.unlink()
+            self.add(oid, size)
+    def inval(self, oid):
+        p = self.cache.get(oid)
+        if p is None:
+            return
+        pos, node = p
+        self.invals += 1
+        self.total_invals += 1
+        size, _oid = self.filemap[pos]
+        assert oid == _oid
+        self.filemap[pos] = size, None
+        pos, node = self.cache.pop(oid)
+        node.unlink()
+    def write(self, oid, size):
+        p = self.cache.get(oid)
+        if p is None:
+            return
+        pos, node = p
+        oldsize, _oid = self.filemap[pos]
+        assert oid == _oid
+        if size == oldsize:
+            return
+        if size < oldsize:
+            excess = oldsize - size
+            self.filemap[pos] = size, oid
+            self.filemap[pos + size] = excess, None
+        else:
+            self.filemap[pos] = oldsize, None
+            pos, node = self.cache.pop(oid)
+            node.unlink()
+            self.add(oid, size)
+    def report(self):
+        free = used = total = 0
+        for size, oid in self.filemap.itervalues():
+            total += size
+            if oid:
+                used += size
+            else:
+                free += size
+        self.inuse = round(100.0 * used / total, 1)
+        self.total_inuse = self.inuse
+        Simulation.report(self)
 class BuddyCacheSimulation(LRUCacheSimulation):
    def __init__(self, cachelimit):
@@ -753,5 +1500,218 @@ def addcommas(n):
        i -= 3
    return sign + s
+import random
+def maybe(f, p=0.5):
+    if random.random() < p:
+        f()
+#############################################################################
+# Thor-like eviction scheme.
+#
+# The cache keeps such a list of all objects, and uses a travelling pointer
+# to decay the worth of objects over time.
+class ThorNode(Node):
+    __slots__ = ['worth']
+    def __init__(self, oid, size, worth=None):
+        Node.__init__(self, oid, size)
+        self.worth = worth
+class ThorListHead(Node):
+    def __init__(self):
+        Node.__init__(self, 0, 0)
+        self.next = self.prev = self
+class ThorSimulation(Simulation):
+    extras = "evicts", "trips"
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+        # Maximum total of object sizes we keep in cache.
+        self.maxsize = cachelimit
+        # Current total of object sizes in cache.
+        self.currentsize = 0
+        # A worth byte maps to a set of all objects with that worth.
+        # This is cheap to keep updated, and makes finding low-worth
+        # objects for eviction trivial (just march over the worthsets
+        # list, in order).
+        self.worthsets = [Set() for dummy in range(256)]
+        # We keep a circular list of all objects in cache.  currentobj
+        # walks around it forever.  Each time _tick() is called, the
+        # worth of currentobj is decreased, basically by shifting
+        # right 1, and currentobj moves on to the next object.  When
+        # an object is first inserted, it enters the list right before
+        # currentobj.  When an object is accessed, its worth is
+        # increased by or'ing in 0x80.  This scheme comes from the
+        # Thor system, and is an inexpensive way to account for both
+        # recency and frequency of access:  recency is reflected in
+        # the leftmost bit set, and frequency by how many bits are
+        # set.
+        #
+        # Note:  because evictions are interleaved with ticks,
+        # unlinking an object is tricky, lest we evict currentobj.  The
+        # class _unlink method takes care of this properly.
+        self.listhead = ThorListHead()
+        self.currentobj = self.listhead
+        # Map an object.oid to its ThorNode.
+        self.oid2object = {}
+        self.total_evicts = self.total_trips = 0
+    # Unlink object from the circular list, taking care not to lose
+    # track of the current object.  Always call this instead of
+    # invoking obj.unlink() directly.
+    def _unlink(self, obj):
+        assert obj is not self.listhead
+        if obj is self.currentobj:
+            self.currentobj = obj.next
+        obj.unlink()
+    # Change obj.worth to newworth, maintaining invariants.
+    def _change_worth(self, obj, newworth):
+        if obj.worth != newworth:
+            self.worthsets[obj.worth].remove(obj)
+            obj.worth = newworth
+            self.worthsets[newworth].add(obj)
+    def add(self, object):
+        assert object.oid not in self.oid2object
+        self.oid2object[object.oid] = object
+        newsize = self.currentsize + object.size
+        if newsize > self.maxsize:
+            self._evictbytes(newsize - self.maxsize)
+        self.currentsize += object.size
+        object.linkbefore(self.currentobj)
+        if object.worth is None:
+            # Give smaller objects higher initial worth.  This favors kicking
+            # out unreferenced large objects before kicking out unreferenced
+            # small objects.  On real life traces, this is a significant
+            # win for the hit rate.
+            object.worth = 32 - int(round(math.log(object.size, 2)))
+        self.worthsets[object.worth].add(object)
+    # Decrease the worth of the current object, and advance the
+    # current object.
+    def _tick(self):
+        c = self.currentobj
+        if c is self.listhead:
+            c = c.next
+            if c is self.listhead:  # list is empty
+                return
+            self.total_trips += 1
+            self.trips += 1
+        self._change_worth(c, (c.worth + 1) >> 1)
+        self.currentobj = c.next
+    def access(self, oid):
+        self._tick()
+        obj = self.oid2object.get(oid)
+        if obj is None:
+            return None
+        self._change_worth(obj, obj.worth | 0x80)
+        return obj
+    # Evict objects of least worth first, until at least nbytes bytes
+    # have been freed.
+    def _evictbytes(self, nbytes):
+        for s in self.worthsets:
+            while s:
+                if nbytes <= 0:
+                    return
+                obj = s.pop()
+                nbytes -= obj.size
+                self._evictobj(obj)
+    def _evictobj(self, obj):
+        self.currentsize -= obj.size
+        self.worthsets[obj.worth].discard(obj)
+        del self.oid2object[obj.oid]
+        self._unlink(obj)
+        self.evicts += 1
+        self.total_evicts += 1
+    def _evict_without_bumping_evict_stats(self, obj):
+        self._evictobj(obj)
+        self.evicts -= 1
+        self.total_evicts -= 1
+    # Simulator overrides from here on.
+    def restart(self):
+        # Reset base class
+        Simulation.restart(self)
+        # Reset additional per-run statistics
+        self.evicts = self.trips = 0
+    def write(self, oid, size):
+        obj = self.oid2object.get(oid)
+        worth = None
+        if obj is not None:
+            worth = obj.worth
+            self._evict_without_bumping_evict_stats(obj)
+        self.add(ThorNode(oid, size, worth))
+    def load(self, oid, size):
+        if self.access(oid) is not None:
+            self.hits += 1
+            self.total_hits += 1
+        else:
+            self.write(oid, size)
+    def inval(self, oid):
+        obj = self.oid2object.get(oid)
+        if obj is not None:
+            self.invals += 1
+            self.total_invals += 1
+            self._evict_without_bumping_evict_stats(obj)
+    # Take the "x" off to see additional stats after each restart period.
+    def xreport(self):
+        Simulation.report(self)
+        print 'non-empty worth sets', sum(map(bool, self.worthsets)),
+        print 'objects', len(self.oid2object),
+        print 'size', self.currentsize
+#############################################################################
+# Perfection:  What if the cache were unbounded, and never forgot anything?
+# This simulator answers that question directly; the cache size parameter
+# isn't used.
+class UnboundedSimulation(Simulation):
+    extraname = 'evicts'   # for some reason we *have* to define >= 1 extra
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+        self.oids = Set()
+        self.evicts = self.total_evicts = 0
+    def write(self, oid, size):
+        self.oids.add(oid)
+    def load(self, oid, size):
+        if oid in self.oids:
+            self.hits += 1
+            self.total_hits += 1
+        else:
+            self.oids.add(oid)
+    def inval(self, oid):
+        if oid in self.oids:
+            self.invals += 1
+            self.total_invals += 1
+            self.oids.remove(oid)
 if __name__ == "__main__":
    sys.exit(main())
--- a/src/ZEO/stats.py
+++ b/src/ZEO/stats.py
@@ -26,7 +26,7 @@ Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile
 """File format:
-Each record is 18 bytes, plus a variable number of bytes to store an oid,
+Each record is 26 bytes, plus a variable number of bytes to store an oid,
 with the following layout.  Numbers are big-endian integers.
 Offset  Size  Contents
@@ -35,8 +35,9 @@ Offset  Size  Contents
 4       3     data size, in 256-byte increments, rounded up
 7       1     code (see below)
 8       2     object id length
-10      8     serial number
+10      8     start tid
-18  variable  object id
+18      8     end tid
+26  variable  object id
 The code at offset 7 packs three fields:
@@ -131,74 +132,66 @@ def main():
            print >> sys.stderr, "can't open %s: %s" % (filename, msg)
            return 1
-    # Read file, gathering statistics, and printing each record if verbose
    rt0 = time.time()
-    # bycode -- map code to count of occurrences
+    bycode = {}     # map code to count of occurrences
-    bycode = {}
+    byinterval = {} # map code to count in current interval
-    # records -- number of records
+    records = 0     # number of trace records read
-    records = 0
+    versions = 0    # number of trace records with versions
-    # version -- number of records with versions
+    datarecords = 0 # number of records with dlen set
-    versions = 0
+    datasize = 0L   # sum of dlen across records with dlen set
-    t0 = te = None
+    oids = {}       # map oid to number of times it was loaded
-    # datarecords -- number of records with dlen set
+    bysize = {}     # map data size to number of loads
-    datarecords = 0
+    bysizew = {}    # map data size to number of writes
-    datasize = 0L
-    # oids -- maps oid to number of times it was loaded
-    oids = {}
-    # bysize -- maps data size to number of loads
-    bysize = {}
-    # bysize -- maps data size to number of writes
-    bysizew = {}
    total_loads = 0
-    byinterval = {}
+    t0 = None       # first timestamp seen
-    thisinterval = None
+    te = None       # most recent timestamp seen
-    h0 = he = None
+    h0 = None       # timestamp at start of current interval
-    offset = 0
+    he = None       # timestamp at end of current interval
+    thisinterval = None  # generally te//interval
    f_read = f.read
    struct_unpack = struct.unpack
+    # Read file, gathering statistics, and printing each record if verbose.
    try:
        while 1:
-            r = f_read(8)
+            r = f_read(8) # timestamp:4 code:4
            if len(r) < 8:
                break
-            offset += 8
            ts, code = struct_unpack(">ii", r)
            if ts == 0:
-                # Must be a misaligned record caused by a crash
+                # Must be a misaligned record caused by a crash.
                if not quiet:
-                    print "Skipping 8 bytes at offset", offset-8
+                    print "Skipping 8 bytes at offset", f.tell() - 8
                continue
-            r = f_read(18)
+            r = f_read(18) # oidlen:2 starttid:8 endtid:8
-            if len(r) < 10:
+            if len(r) < 18:
                break
-            offset += 10
-            records += 1
            oidlen, start_tid, end_tid = struct_unpack(">H8s8s", r)
            oid = f_read(oidlen)
-            if len(oid) != oidlen:
+            if len(oid) < oidlen:
                break
-            offset += oidlen
+            records += 1
            if t0 is None:
                t0 = ts
-                thisinterval = t0 / interval
+                thisinterval = t0 // interval
                h0 = he = ts
            te = ts
-            if ts / interval != thisinterval:
+            if ts // interval != thisinterval:
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
-                thisinterval = ts / interval
+                thisinterval = ts // interval
                h0 = ts
            he = ts
            dlen, code = code & 0x7fffff00, code & 0xff
            if dlen:
                datarecords += 1
                datasize += dlen
-            version = '-'
            if code & 0x80:
                version = 'V'
                versions += 1
-            code = code & 0x7e
+            else:
+                version = '-'
+            code &= 0x7e
            bycode[code] = bycode.get(code, 0) + 1
            byinterval[code] = byinterval.get(code, 0) + 1
            if dlen:
@@ -220,11 +213,11 @@ def main():
            if code & 0x70 == 0x20:
                oids[oid] = oids.get(oid, 0) + 1
                total_loads += 1
-            if code == 0x00:
+            if code == 0x00:    # restart
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
-                thisinterval = ts / interval
+                thisinterval = ts // interval
                h0 = he = ts
                if not quiet:
                    print time.ctime(ts)[4:-5],
@@ -232,6 +225,7 @@ def main():
    except KeyboardInterrupt:
        print "\nInterrupted.  Stats so far:\n"
+    end_pos = f.tell()
    f.close()
    rte = time.time()
    if not quiet:
@@ -245,8 +239,8 @@ def main():
    # Print statistics
    if dostats:
        print
-        print "Read %s records (%s bytes) in %.1f seconds" % (
+        print "Read %s trace records (%s bytes) in %.1f seconds" % (
-            addcommas(records), addcommas(records*24), rte-rt0)
+            addcommas(records), addcommas(end_pos), rte-rt0)
        print "Versions:   %s records used a version" % addcommas(versions)
        print "First time: %s" % time.ctime(t0)
        print "Last time:  %s" % time.ctime(te)
@@ -309,9 +303,8 @@ def dumpbysize(bysize, how, how2):
                                loads)
 def dumpbyinterval(byinterval, h0, he):
-    loads = 0
+    loads = hits = 0
-    hits = 0
+    for code in byinterval:
-    for code in byinterval.keys():
        if code & 0x70 == 0x20:
            n = byinterval[code]
            loads += n
@@ -328,8 +321,7 @@ def dumpbyinterval(byinterval, h0, he):
        addcommas(loads), addcommas(hits), hr)
 def hitrate(bycode):
-    loads = 0
+    loads = hits = 0
-    hits = 0
    for code in bycode:
        if code & 0x70 == 0x20:
            n = bycode[code]
@@ -389,7 +381,6 @@ explain = {
    0x50: "store (version)",
    0x52: "store (current, non-version)",
    0x54: "store (non-current)",
    }
 if __name__ == "__main__":