Merge rev 37385 from 3.4 branch.

Many little bugfixes and improvements in stats.py. This has survived several 100 MB of trace files I generated over the last few days, so it's solid now if not necessarily perfect. Replaced simul.py with the much broader-ranging code Jeremy and I were working on a couple years ago, although it can't work with the current trace file format (no real loss there -- the simul.py it's replacing can't work with the current format either).

Merge rev 37385 from 3.4 branch.
Many little bugfixes and improvements in stats.py. This has survived several 100 MB of trace files I generated over the last few days, so it's solid now if not necessarily perfect. Replaced simul.py with the much broader-ranging code Jeremy and I were working on a couple years ago, although it can't work with the current trace file format (no real loss there -- the simul.py it's replacing can't work with the current format either).
437ec8a6 · Tim Peters · 6b4c92e0 · 437ec8a6 · 437ec8a6 · 437ec8a6
Commit 437ec8a6 authored Jul 22, 2005 by Tim Peters
Show whitespace changes
Inline Side-by-side

Showing with 1037 additions and 86 deletions

doc/ZEO/trace.txt doc/ZEO/trace.txt +1 -1

src/ZEO/simul.py src/ZEO/simul.py +996 -36

src/ZEO/stats.py src/ZEO/stats.py +40 -49

No files found.
--- a/doc/ZEO/trace.txt
+++ b/doc/ZEO/trace.txt
@@ -24,7 +24,7 @@ application server) must be restarted.

 The trace file can grow pretty quickly; on a moderately loaded server, we
 observed it growing by 5 MB per hour.  The file consists of binary records,
-each 26 bytes long if 8-byte oids are in use; a detailed description of the
+each 34 bytes long if 8-byte oids are in use; a detailed description of the
 record lay-out is given in stats.py.  No sensitive data is logged:  data
 record sizes and binary object and transaction ids are logged, but no
 information about object types or names, user names, version names,

--- a/src/ZEO/simul.py
+++ b/src/ZEO/simul.py
+#! /usr/bin/env python
 ##############################################################################
 #
 # Copyright (c) 2001, 2002 Zope Corporation and Contributors.
 # All Rights Reserved.
 #
 # This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
@@ -13,7 +14,7 @@
 ##############################################################################
 """Cache simulation.

-Usage: simul.py [-bflyz] [-X] [-s size] tracefile
+Usage: simul.py [-bflyz] [-s size] tracefile

 Use one of -b, -f, -l, -y or -z select the cache simulator:
 -b: buddy system allocator
@@ -24,8 +25,6 @@ Use one of -b, -f, -l, -y or -z select the cache simulator:

 Options:
 -s size: cache size in MB (default 20 MB)
-X: enable heuristic checking for misaligned records: oids > 2**32
-    will be rejected; this requires the tracefile to be seekable

 Note: the buddy system allocator rounds the cache size up to a power of 2
 """
@@ -34,6 +33,9 @@ import sys
 import time
 import getopt
 import struct
+import math
+
+from sets import Set

 def usage(msg):
    print >>sys.stderr, msg
@@ -44,9 +46,9 @@ def main():
    MB = 1000*1000
    cachelimit = 20*MB
    simclass = ZEOCacheSimulation
-    heuristic = 0
+    theta = omicron = None
    try:
-        opts, args = getopt.getopt(sys.argv[1:], "bflyzs:X")
+        opts, args = getopt.getopt(sys.argv[1:], "bflyz2cOaTUs:o:t:")
    except getopt.error, msg:
        usage(msg)
        return 2
@@ -63,13 +65,32 @@ def main():
            simclass = ZEOCacheSimulation
        if o == '-s':
            cachelimit = int(float(a)*MB)
-        if o == '-X':
-            heuristic = 1
+        if o == '-2':
+            simclass = TwoQSimluation
+        if o == '-c':
+            simclass = CircularCacheSimulation
+        if o == '-o':
+            omicron = float(a)
+        if o == '-t':
+            theta = float(a)
+        if o == '-O':
+            simclass = OracleSimulation
+        if o == '-a':
+            simclass = ARCCacheSimulation
+        if o == '-T':
+            simclass = ThorSimulation
+        if o == '-U':
+            simclass = UnboundedSimulation
+
    if len(args) != 1:
        usage("exactly one file argument required")
        return 2
    filename = args[0]

+    if omicron is not None and simclass != CircularCacheSimulation:
+        usage("-o flag only useful with -c (CircularCacheSimulation)")
+        return 2
+
    # Open file
    if filename.endswith(".gz"):
        # Open gzipped file
@@ -95,6 +116,11 @@ def main():
            return 1

    # Create simulation object
+    if omicron is not None or theta is not None:
+        sim = simclass(cachelimit, omicron, theta)
+    elif simclass is OracleSimulation:
+        sim = simclass(cachelimit, filename)
+    else:
        sim = simclass(cachelimit)

    # Print output header
@@ -107,21 +133,21 @@ def main():
    struct_unpack = struct.unpack
    while 1:
        # Read a record and decode it
-        r = f_read(10)
-        if len(r) < 10:
+        r = f_read(8)
+        if len(r) < 8:
            break
-        offset += 10
-        ts, code, lenoid = struct_unpack(">iiH", r)
+        offset += 8
+        ts, code = struct_unpack(">ii", r)
        if ts == 0:
            # Must be a misaligned record caused by a crash
            ##print "Skipping 8 bytes at offset", offset-8
            continue
-        r = f_read(8 + lenoid)
-        if len(r) < 8 + lenoid:
+        r = f_read(16)
+        if len(r) < 16:
            break
-        offset += 8 + lenoid
+        offset += 16
        records += 1
-        serial, oid = struct_unpack(">8s%ds" % lenoid, r)
+        oid, serial = struct_unpack(">8s8s", r)
        # Decode the code
        dlen, version, code, current = (code & 0x7fffff00,
                                        code & 0x80,
@@ -153,17 +179,20 @@ class Simulation:
        # Initialize global statistics
        self.epoch = None
        self.total_loads = 0
-        self.total_hits = 0 # Subclass must increment
-        self.total_invals = 0
+        self.total_hits = 0       # subclass must increment
+        self.total_invals = 0     # subclass must increment
        self.total_writes = 0
+        if not hasattr(self, "extras"):
+            self.extras = (self.extraname,)
+        self.format = self.format + " %7s" * len(self.extras)
        # Reset per-run statistics and set up simulation data
        self.restart()

    def restart(self):
        # Reset per-run statistics
        self.loads = 0
-        self.hits = 0 # Subclass must increment
-        self.invals = 0
+        self.hits = 0       # subclass must increment
+        self.invals = 0     # subclass must increment
        self.writes = 0
        self.ts0 = None

@@ -201,12 +230,14 @@ class Simulation:
        pass

    def load(self, oid, size):
+        # Must increment .hits and .total_hits as appropriate.
        pass

    def inval(self, oid):
+        # Must increment .invals and .total_invals as appropriate.
        pass

-    format = "%12s %9s %8s %8s %6s %6s %6s %6s"
+    format = "%12s %9s %8s %8s %6s %6s %7s"

    # Subclass should override extraname to name known instance variables;
    # if extraname is 'foo', both self.foo and self.total_foo must exist:
@@ -215,34 +246,46 @@ class Simulation:
    def printheader(self):
        print "%s, cache size %s bytes" % (self.__class__.__name__,
                                           addcommas(self.cachelimit))
-        print self.format % (
-            "START TIME", "DURATION", "LOADS", "HITS",
-            "INVALS", "WRITES", self.extraname.upper(), "HITRATE")
+        self.extraheader()
+        extranames = tuple([s.upper() for s in self.extras])
+        args = ("START TIME", "DURATION", "LOADS", "HITS",
+                "INVALS", "WRITES", "HITRATE") + extranames
+        print self.format % args
+
+    def extraheader(self):
+        pass

    nreports = 0

-    def report(self):
+    def report(self, extratext=''):
        if self.loads:
            self.nreports += 1
-            print self.format % (
-                time.ctime(self.ts0)[4:-8],
+            args = (time.ctime(self.ts0)[4:-8],
                    duration(self.ts1 - self.ts0),
                    self.loads, self.hits, self.invals, self.writes,
-                getattr(self, self.extraname),
                    hitrate(self.loads, self.hits))
+            args += tuple([getattr(self, name) for name in self.extras])
+            print self.format % args, extratext

    def finish(self):
+        # Make sure that the last line of output ends with "OVERALL".  This
+        # makes it much easier for another program parsing the output to
+        # find summary statistics.
+        if self.nreports < 2:
+            self.report('OVERALL')
+        else:
            self.report()
-        if self.nreports > 1:
-            print (self.format + " OVERALL") % (
+            args = (
                time.ctime(self.epoch)[4:-8],
                duration(self.ts1 - self.epoch),
                self.total_loads,
                self.total_hits,
                self.total_invals,
                self.total_writes,
-                getattr(self, "total_" + self.extraname),
                hitrate(self.total_loads, self.total_hits))
+            args += tuple([getattr(self, "total_" + name)
+                           for name in self.extras])
+            print (self.format + " OVERALL") % args

 class ZEOCacheSimulation(Simulation):

@@ -433,6 +476,710 @@ class Node:
        self.next = next
        prev.next = next.prev = self

+am = object()
+a1in = object()
+a1out = object()
+
+class Node2Q(Node):
+
+    __slots__ = ["kind", "hits"]
+
+    def __init__(self, oid, size, kind=None):
+        Node.__init__(self, oid, size)
+        self.kind = kind
+        self.hits = 0
+
+    def linkbefore(self, next):
+        if next.kind != self.kind:
+            self.kind = next.kind
+        Node.linkbefore(self, next)
+
+class TwoQSimluation(Simulation):
+
+    # The original 2Q algorithm is page based and the authors offer
+    # tuning guidlines based on a page-based cache.  Our cache is
+    # object based, so, for example, it's hard to compute the number
+    # of oids to store in a1out based on the size of a1in.
+
+    extras = "evicts", "hothit", "am_add"
+
+    NodeClass = Node2Q
+
+    def __init__(self, cachelimit, outlen=10000, threshold=0):
+        Simulation.__init__(self, cachelimit)
+
+        # The promotion threshold: If a hit occurs in a1out, it is
+        # promoted to am if the number of hits on the object while it
+        # was in a1in is at least threshold.  The standard 2Q scheme
+        # uses a threshold of 0.
+        self.threshold = threshold
+        self.am_limit = 3 * self.cachelimit / 4
+        self.a1in_limit = self.cachelimit / 4
+
+        self.cache = {}
+        self.am_size = 0
+        self.a1in_size = 0
+        self.a1out_size = 0
+
+        self.total_evicts = 0
+        self.total_hothit = 0
+        self.total_am_add = 0
+        self.a1out_limit = outlen
+
+        # An LRU queue of hot objects
+        self.am = self.NodeClass(None, None, am)
+        self.am.linkbefore(self.am)
+        # A FIFO queue of recently referenced objects.  It's purpose
+        # is to absorb references to objects that are accessed a few
+        # times in short order, then forgotten about.
+        self.a1in = self.NodeClass(None, None, a1in)
+        self.a1in.linkbefore(self.a1in)
+        # A FIFO queue of recently reference oids.
+        # This queue only stores the oids, not any data.  If we get a
+        # hit in this queue, promote the object to am.
+        self.a1out = self.NodeClass(None, None, a1out)
+        self.a1out.linkbefore(self.a1out)
+
+    def makespace(self, size):
+        for space in 0, size:
+            if self.enoughspace(size):
+                return
+            self.evict_a1in(space)
+            if self.enoughspace(size):
+                return
+            self.evict_am(space)
+
+    def enoughspace(self, size):
+        totalsize = self.a1in_size + self.am_size
+        return totalsize + size < self.cachelimit
+
+    def evict_a1in(self, extra):
+        while self.a1in_size + extra > self.a1in_limit:
+            if self.a1in.next is self.a1in:
+                return
+            assert self.a1in.next is not None
+            node = self.a1in.next
+            self.evicts += 1
+            self.total_evicts += 1
+            node.linkbefore(self.a1out)
+            self.a1out_size += 1
+            self.a1in_size -= node.size
+            if self.a1out_size > self.a1out_limit:
+                assert self.a1out.next is not None
+                node = self.a1out.next
+                node.unlink()
+                del self.cache[node.oid]
+                self.a1out_size -= 1
+
+    def evict_am(self, extra):
+        while self.am_size + extra > self.am_limit:
+            if self.am.next is self.am:
+                return
+            assert self.am.next is not None
+            node = self.am.next
+            self.evicts += 1
+            self.total_evicts += 1
+            # This node hasn't been accessed in a while, so just
+            # forget about it.
+            node.unlink()
+            del self.cache[node.oid]
+            self.am_size -= node.size
+
+    def write(self, oid, size):
+        # A write always follows a read (ZODB doesn't allow blind writes).
+        # So this write must have followed a recent read of the object.
+        # Don't change it's position, but do update the size.
+
+        # XXX For now, don't evict pages if the new version of the object
+        # is big enough to require eviction.
+        node = self.cache.get(oid)
+        if node is None or node.kind is a1out:
+            return
+        if node.kind is am:
+            self.am_size = self.am_size - node.size + size
+            node.size = size
+        else:
+            self.a1in_size = self.a1in_size - node.size + size
+            node.size = size
+
+    def load(self, oid, size):
+        node = self.cache.get(oid)
+        if node is not None:
+            if node.kind is am:
+                self.hits += 1
+                self.total_hits += 1
+                self.hothit += 1
+                self.total_hothit += 1
+                node.hits += 1
+                node.linkbefore(self.am)
+            elif node.kind is a1in:
+                self.hits += 1
+                self.total_hits += 1
+                node.hits += 1
+            elif node.kind is a1out:
+                self.a1out_size -= 1
+                if node.hits >= self.threshold:
+                    self.makespace(node.size)
+                    self.am_size += node.size
+                    node.linkbefore(self.am)
+                    self.cache[oid] = node
+                    self.am_add += 1
+                    self.total_am_add += 1
+                else:
+                    node.unlink()
+                    self.insert(oid, size)
+        else:
+            self.insert(oid, size)
+
+    def insert(self, oid, size):
+        # New objects enter the cache via a1in.  If they
+        # are frequently used over a long enough time, they
+        # will be promoted to am -- but only via a1out.
+        self.makespace(size)
+        node = self.NodeClass(oid, size, a1in)
+        node.linkbefore(self.a1in)
+        self.cache[oid] = node
+        self.a1in_size += node.size
+
+    def inval(self, oid):
+        # The original 2Q algorithm didn't have to deal with
+        # invalidations.  My own solution: Move it to the head of
+        # a1out.
+        node = self.cache.get(oid)
+        if node is None:
+            return
+        self.invals += 1
+        self.total_invals += 1
+        # XXX Should an invalidation to a1out count?
+        if node.kind is a1out:
+            return
+        node.linkbefore(self.a1out)
+        if node.kind is am:
+            self.am_size -= node.size
+        else:
+            self.a1in_size -= node.size
+
+    def restart(self):
+        Simulation.restart(self)
+
+        self.evicts = 0
+        self.hothit = 0
+        self.am_add = 0
+
+lruT = object()
+lruB = object()
+fifoT = object()
+fifoB = object()
+
+class ARCCacheSimulation(Simulation):
+
+    # Based on the paper ARC: A Self-Tuning, Low Overhead Replacement
+    # Cache by Nimrod Megiddo and Dharmendra S. Modha, USENIX FAST
+    # 2003.  The paper describes a block-based cache.  A lot of the
+    # details need to be fiddled to work with an object-based cache.
+    # For size issues, the key insight ended up being conditions
+    # A.1-A.5 rather than the details of the algorithm in Fig. 4.
+
+    extras = "lruThits", "evicts", "p"
+
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+        # There are two pairs of linked lists.  Each pair has a top and
+        # bottom half.  The bottom half contains metadata, but not actual
+        # objects.
+
+        # LRU list of frequently used objects
+        self.lruT = Node2Q(None, None, lruT)
+        self.lruT.linkbefore(self.lruT)
+        self.lruT_len = 0
+        self.lruT_size = 0
+
+        self.lruB = Node2Q(None, None, lruB)
+        self.lruB.linkbefore(self.lruB)
+        self.lruB_len = 0
+        self.lruB_size = 0
+
+        # FIFO list of objects seen once
+        self.fifoT = Node2Q(None, None, fifoT)
+        self.fifoT.linkbefore(self.fifoT)
+        self.fifoT_len = 0
+        self.fifoT_size = 0
+
+        self.fifoB = Node2Q(None, None, fifoB)
+        self.fifoB.linkbefore(self.fifoB)
+        self.fifoB_len = 0
+        self.fifoB_size = 0
+
+        # maps oid to node
+        self.cache = {}
+
+        # The paper says that p should be adjust be 1 as the minimum:
+        # "The compound effect of such small increments and decrements
+        # to p s quite profound as we will demonstrated in the next
+        # section."  Not really, as far as I can tell.  In my traces
+        # with a very small cache, it was taking far too long to adjust
+        # towards favoring some FIFO component.  I would guess that the
+        # chief difference is that our caches are much bigger than the
+        # ones they experimented with.  Their biggest cache had 512K
+        # entries, while our smallest cache will have 40 times that many
+        # entries.
+
+        self.p = 0
+        # XXX multiply computed adjustments to p by walk_factor
+        self.walk_factor = 500
+
+        # statistics
+        self.total_hits = 0
+        self.total_lruThits = 0
+        self.total_fifoThits = 0
+        self.total_evicts = 0
+
+    def restart(self):
+        Simulation.restart(self)
+        self.hits = 0
+        self.lruThits = 0
+        self.fifoThits = 0
+        self.evicts = 0
+
+    def write(self, oid, size):
+        pass
+
+    def replace(self, lruB=False):
+        self.evicts += 1
+        self.total_evicts += 1
+        if self.fifoT_size > self.p or (lruB and self.fifoT_size == self.p):
+            node = self.fifoT.next
+            if node is self.fifoT:
+                return 0
+            assert node is not self.fifoT, self.stats()
+            node.linkbefore(self.fifoB)
+            self.fifoT_len -= 1
+            self.fifoT_size -= node.size
+            self.fifoB_len += 1
+            self.fifoB_size += node.size
+        else:
+            node = self.lruT.next
+            if node is self.lruT:
+                return 0
+            assert node is not self.lruT, self.stats()
+            node.linkbefore(self.lruB)
+            self.lruT_len -= 1
+            self.lruT_size -= node.size
+            self.lruB_len += 1
+            self.lruB_size += node.size
+        return node.size
+
+    def stats(self):
+        self.totalsize = self.lruT_size + self.fifoT_size
+        self.allsize = self.totalsize + self.lruB_size + self.fifoB_size
+        print "cachelimit = %s totalsize=%s allsize=%s" % (
+            addcommas(self.cachelimit),
+            addcommas(self.totalsize),
+            addcommas(self.allsize))
+
+        fmt = (
+            "p=%(p)d\n"
+            "lruT  = %(lruT_len)5d / %(lruT_size)8d / %(lruThits)d\n"
+            "fifoT = %(fifoT_len)5d / %(fifoT_size)8d / %(fifoThits)d\n"
+            "lruB  = %(lruB_len)5d / %(lruB_size)8d\n"
+            "fifoB = %(fifoB_len)5d / %(fifoB_size)8d\n"
+            "loads=%(loads)d hits=%(hits)d evicts=%(evicts)d\n"
+            )
+        print fmt % self.__dict__
+
+    def report(self):
+        self.total_p = self.p
+        Simulation.report(self)
+##        self.stats()
+
+    def load(self, oid, size):
+##        maybe(self.stats, p=0.002)
+        node = self.cache.get(oid)
+        if node is None:
+            # cache miss: We're going to insert a new object in fifoT.
+            # If fifo is full, we'll need to evict something to make
+            # room for it.
+
+            prev = need = size
+            while need > 0:
+                if size + self.fifoT_size + self.fifoB_size >= self.cachelimit:
+                    if need + self.fifoT_size >= self.cachelimit:
+                        node = self.fifoB.next
+                        assert node is not self.fifoB, self.stats()
+                        node.unlink()
+                        del self.cache[node.oid]
+                        self.fifoB_size -= node.size
+                        self.fifoB_len -= 1
+                        self.evicts += 1
+                        self.total_evicts += 1
+                    else:
+                        node = self.fifoB.next
+                        assert node is not self.fifoB, self.stats()
+                        node.unlink()
+                        del self.cache[node.oid]
+                        self.fifoB_size -= node.size
+                        self.fifoB_len -= 1
+                        if self.fifoT_size + self.lruT_size > self.cachelimit:
+                            need -= self.replace()
+                else:
+                    incache_size = self.fifoT_size + self.lruT_size + need
+                    total_size = (incache_size + self.fifoB_size
+                                  + self.lruB_size)
+                    if total_size >= self.cachelimit * 2:
+                        node = self.lruB.next
+                        if node is self.lruB:
+                            break
+                        assert node is not self.lruB
+                        node.unlink()
+                        del self.cache[node.oid]
+                        self.lruB_size -= node.size
+                        self.lruB_len -= 1
+                    elif incache_size > self.cachelimit:
+                        need -= self.replace()
+                    else:
+                        break
+                if need == prev:
+                    # XXX hack, apparently we can't get rid of anything else
+                    break
+                prev = need
+
+            node = Node2Q(oid, size)
+            node.linkbefore(self.fifoT)
+            self.fifoT_len += 1
+            self.fifoT_size += size
+            self.cache[oid] = node
+        else:
+            # a cache hit, but possibly in a bottom list that doesn't
+            # actually hold the object
+            if node.kind is lruT:
+                node.linkbefore(self.lruT)
+
+                self.hits += 1
+                self.total_hits += 1
+                self.lruThits += 1
+                self.total_lruThits += 1
+
+            elif node.kind is fifoT:
+                node.linkbefore(self.lruT)
+                self.fifoT_len -= 1
+                self.lruT_len += 1
+                self.fifoT_size -= node.size
+                self.lruT_size += node.size
+
+                self.hits += 1
+                self.total_hits += 1
+                self.fifoThits += 1
+                self.total_fifoThits += 1
+
+            elif node.kind is fifoB:
+                node.linkbefore(self.lruT)
+                self.fifoB_len -= 1
+                self.lruT_len += 1
+                self.fifoB_size -= node.size
+                self.lruT_size += node.size
+
+                # XXX need a better min than 1?
+##                print "adapt+", max(1, self.lruB_size // self.fifoB_size)
+                delta = max(1, self.lruB_size / max(1, self.fifoB_size))
+                self.p += delta * self.walk_factor
+                if self.p > self.cachelimit:
+                    self.p = self.cachelimit
+
+                need = node.size
+                if self.lruT_size + self.fifoT_size + need > self.cachelimit:
+                    while need > 0:
+                        r = self.replace()
+                        if not r:
+                            break
+                        need -= r
+
+            elif node.kind is lruB:
+                node.linkbefore(self.lruT)
+                self.lruB_len -= 1
+                self.lruT_len += 1
+                self.lruB_size -= node.size
+                self.lruT_size += node.size
+
+                # XXX need a better min than 1?
+##                print "adapt-", max(1, self.fifoB_size // self.lruB_size)
+                delta = max(1, self.fifoB_size / max(1, self.lruB_size))
+                self.p -= delta * self.walk_factor
+                if self.p < 0:
+                    self.p = 0
+
+                need = node.size
+                if self.lruT_size + self.fifoT_size + need > self.cachelimit:
+                    while need > 0:
+                        r = self.replace(lruB=True)
+                        if not r:
+                            break
+                        need -= r
+
+    def inval(self, oid):
+        pass
+
+    def extraheader(self):
+        pass
+
+class OracleSimulation(LRUCacheSimulation):
+
+    # Not sure how to implement this yet.  This is a cache where I
+    # cheat to see how good we could actually do.  The cache
+    # replacement problem for multi-size caches is NP-hard, so we're
+    # not going to have an optimal solution.
+
+    # At the moment, the oracle is mostly blind.  It knows which
+    # objects will be referenced more than once, so that it can
+    # ignore objects referenced only once.  In most traces, these
+    # objects account for about 20% of references.
+
+    def __init__(self, cachelimit, filename):
+        LRUCacheSimulation.__init__(self, cachelimit)
+        self.count = {}
+        self.scan(filename)
+
+    def load(self, oid, size):
+        node = self.cache.get(oid)
+        if node is not None:
+            self.hits += 1
+            self.total_hits += 1
+            node.linkbefore(self.head)
+        else:
+            if oid in self.count:
+                self.write(oid, size)
+
+    def scan(self, filename):
+        # scan the file in advance to figure out which objects will
+        # be referenced more than once.
+        f = open(filename, "rb")
+        struct_unpack = struct.unpack
+        f_read = f.read
+        offset = 0
+        while 1:
+            # Read a record and decode it
+            r = f_read(8)
+            if len(r) < 8:
+                break
+            offset += 8
+            ts, code = struct_unpack(">ii", r)
+            if ts == 0:
+                # Must be a misaligned record caused by a crash
+                ##print "Skipping 8 bytes at offset", offset-8
+                continue
+            r = f_read(16)
+            if len(r) < 16:
+                break
+            offset += 16
+            oid, serial = struct_unpack(">8s8s", r)
+            if code & 0x70 == 0x20:
+                # only look at loads
+                self.count[oid] = self.count.get(oid, 0) + 1
+
+        all = len(self.count)
+
+        # Now remove everything with count == 1
+        once = [oid for oid, count in self.count.iteritems()
+                if count == 1]
+        for oid in once:
+            del self.count[oid]
+
+        print "Scanned file, %d unique oids, %d repeats" % (
+            all, len(self.count))
+
+class CircularCacheSimulation(Simulation):
+
+    # The cache is managed as a single file with a pointer that
+    # goes around the file, circularly, forever.  New objects
+    # are written at the current pointer, evicting whatever was
+    # there previously.
+
+    # For each cache hit, there is some distance between the current
+    # pointer offset and the offset of the cached data record.  The
+    # cache can be configured to copy objects to the current offset
+    # depending on how far away they are now.  The omicron parameter
+    # specifies a percentage
+
+    extras = "evicts", "copies", "inuse", "skips"
+
+    def __init__(self, cachelimit, omicron=None, skip=None):
+        Simulation.__init__(self, cachelimit)
+        self.omicron = omicron or 0
+        self.skip = skip or 0
+        self.total_evicts = 0
+        self.total_copies = 0
+        self.total_skips = 0
+        # Current offset in file
+        self.offset = 0
+        # Map offset in file to tuple of size, oid
+        self.filemap = {0: (self.cachelimit, None)}
+        # Map oid to offset, node
+        self.cache = {}
+        # LRU list of oids
+        self.head = Node(None, None)
+        self.head.linkbefore(self.head)
+
+    def extraheader(self):
+        print "omicron = %s, theta = %s" % (self.omicron, self.skip)
+
+    def restart(self):
+        Simulation.restart(self)
+        self.evicts = 0
+        self.copies = 0
+        self.skips = 0
+
+    def load(self, oid, size):
+        p = self.cache.get(oid)
+        if p is None:
+            self.add(oid, size)
+        else:
+            pos, node = p
+            self.hits += 1
+            self.total_hits += 1
+            node.linkbefore(self.head)
+            self.copy(oid, size, pos)
+
+    def check(self):
+        d = dict(self.filemap)
+        done = {}
+        while d:
+            pos, (size, oid) = d.popitem()
+            next = pos + size
+            if not (next in d or next in done or next == self.cachelimit):
+                print "check", next, pos, size, repr(oid)
+                self.dump()
+                raise RuntimeError
+            done[pos] = pos
+
+    def dump(self):
+        print len(self.filemap)
+        L = list(self.filemap)
+        L.sort()
+        for k in L:
+            v = self.filemap[k]
+            print k, v[0], repr(v[1])
+
+    def add(self, oid, size):
+        avail = self.makeroom(size)
+        assert oid not in self.cache
+        self.filemap[self.offset] = size, oid
+        node = Node(oid, size)
+        node.linkbefore(self.head)
+        self.cache[oid] = self.offset, node
+        self.offset += size
+        # All the space made available must be accounted for in filemap.
+        excess = avail - size
+        if excess:
+            self.filemap[self.offset] = excess, None
+
+    def makeroom(self, need):
+        if self.offset + need > self.cachelimit:
+            self.offset = 0
+        pos = self.offset
+        # Evict enough objects to make the necessary space available.
+        self.compute_closeenough()
+        evicted = False
+        while need > 0:
+            if pos == self.cachelimit:
+                print "wrap makeroom", need
+                pos = 0
+            try:
+                size, oid = self.filemap[pos]
+            except:
+                self.dump()
+                raise
+
+            if not evicted and self.skip and oid and self.closeenough(oid):
+                self.skips += 1
+                self.total_skips += 1
+                self.offset += size
+                pos += size
+                continue
+
+            evicted = True
+            del self.filemap[pos]
+
+            if oid is not None:
+                self.evicts += 1
+                self.total_evicts += 1
+                pos, node = self.cache.pop(oid)
+                node.unlink()
+            need -= size
+            pos += size
+
+        return pos - self.offset
+
+    def compute_closeenough(self):
+        self.lru = {}
+        n = int(len(self.cache) * self.skip)
+        node = self.head.prev
+        while n > 0:
+            self.lru[node.oid] = True
+            node = node.prev
+            n -= 1
+
+    def closeenough(self, oid):
+        # If oid is in the top portion of the most recently used
+        # elements, skip it.
+        return oid in self.lru
+
+    def copy(self, oid, size, pos):
+        # Copy only if the distance is greater than omicron.
+        dist = self.offset - pos
+        if dist < 0:
+            dist += self.cachelimit
+        if dist < self.omicron * self.cachelimit:
+            self.copies += 1
+            self.total_copies += 1
+            self.filemap[pos] = size, None
+            pos, node =  self.cache.pop(oid)
+            node.unlink()
+            self.add(oid, size)
+
+    def inval(self, oid):
+        p = self.cache.get(oid)
+        if p is None:
+            return
+        pos, node = p
+        self.invals += 1
+        self.total_invals += 1
+        size, _oid = self.filemap[pos]
+        assert oid == _oid
+        self.filemap[pos] = size, None
+        pos, node = self.cache.pop(oid)
+        node.unlink()
+
+    def write(self, oid, size):
+        p = self.cache.get(oid)
+        if p is None:
+            return
+        pos, node = p
+        oldsize, _oid = self.filemap[pos]
+        assert oid == _oid
+        if size == oldsize:
+            return
+        if size < oldsize:
+            excess = oldsize - size
+            self.filemap[pos] = size, oid
+            self.filemap[pos + size] = excess, None
+        else:
+            self.filemap[pos] = oldsize, None
+            pos, node = self.cache.pop(oid)
+            node.unlink()
+            self.add(oid, size)
+
+    def report(self):
+        free = used = total = 0
+        for size, oid in self.filemap.itervalues():
+            total += size
+            if oid:
+                used += size
+            else:
+                free += size
+
+        self.inuse = round(100.0 * used / total, 1)
+        self.total_inuse = self.inuse
+        Simulation.report(self)
+
 class BuddyCacheSimulation(LRUCacheSimulation):

    def __init__(self, cachelimit):
@@ -753,5 +1500,218 @@ def addcommas(n):
        i -= 3
    return sign + s

+import random
+
+def maybe(f, p=0.5):
+    if random.random() < p:
+        f()
+
+#############################################################################
+# Thor-like eviction scheme.
+#
+# The cache keeps such a list of all objects, and uses a travelling pointer
+# to decay the worth of objects over time.
+
+class ThorNode(Node):
+
+    __slots__ = ['worth']
+
+    def __init__(self, oid, size, worth=None):
+        Node.__init__(self, oid, size)
+        self.worth = worth
+
+class ThorListHead(Node):
+    def __init__(self):
+        Node.__init__(self, 0, 0)
+        self.next = self.prev = self
+
+class ThorSimulation(Simulation):
+
+    extras = "evicts", "trips"
+
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+
+        # Maximum total of object sizes we keep in cache.
+        self.maxsize = cachelimit
+        # Current total of object sizes in cache.
+        self.currentsize = 0
+
+        # A worth byte maps to a set of all objects with that worth.
+        # This is cheap to keep updated, and makes finding low-worth
+        # objects for eviction trivial (just march over the worthsets
+        # list, in order).
+        self.worthsets = [Set() for dummy in range(256)]
+
+        # We keep a circular list of all objects in cache.  currentobj
+        # walks around it forever.  Each time _tick() is called, the
+        # worth of currentobj is decreased, basically by shifting
+        # right 1, and currentobj moves on to the next object.  When
+        # an object is first inserted, it enters the list right before
+        # currentobj.  When an object is accessed, its worth is
+        # increased by or'ing in 0x80.  This scheme comes from the
+        # Thor system, and is an inexpensive way to account for both
+        # recency and frequency of access:  recency is reflected in
+        # the leftmost bit set, and frequency by how many bits are
+        # set.
+        #
+        # Note:  because evictions are interleaved with ticks,
+        # unlinking an object is tricky, lest we evict currentobj.  The
+        # class _unlink method takes care of this properly.
+        self.listhead = ThorListHead()
+        self.currentobj = self.listhead
+
+        # Map an object.oid to its ThorNode.
+        self.oid2object = {}
+
+        self.total_evicts = self.total_trips = 0
+
+    # Unlink object from the circular list, taking care not to lose
+    # track of the current object.  Always call this instead of
+    # invoking obj.unlink() directly.
+    def _unlink(self, obj):
+        assert obj is not self.listhead
+        if obj is self.currentobj:
+            self.currentobj = obj.next
+        obj.unlink()
+
+    # Change obj.worth to newworth, maintaining invariants.
+    def _change_worth(self, obj, newworth):
+        if obj.worth != newworth:
+            self.worthsets[obj.worth].remove(obj)
+            obj.worth = newworth
+            self.worthsets[newworth].add(obj)
+
+    def add(self, object):
+        assert object.oid not in self.oid2object
+        self.oid2object[object.oid] = object
+
+        newsize = self.currentsize + object.size
+        if newsize > self.maxsize:
+            self._evictbytes(newsize - self.maxsize)
+        self.currentsize += object.size
+        object.linkbefore(self.currentobj)
+
+        if object.worth is None:
+            # Give smaller objects higher initial worth.  This favors kicking
+            # out unreferenced large objects before kicking out unreferenced
+            # small objects.  On real life traces, this is a significant
+            # win for the hit rate.
+            object.worth = 32 - int(round(math.log(object.size, 2)))
+        self.worthsets[object.worth].add(object)
+
+    # Decrease the worth of the current object, and advance the
+    # current object.
+    def _tick(self):
+        c = self.currentobj
+        if c is self.listhead:
+            c = c.next
+            if c is self.listhead:  # list is empty
+                return
+            self.total_trips += 1
+            self.trips += 1
+        self._change_worth(c, (c.worth + 1) >> 1)
+        self.currentobj = c.next
+
+    def access(self, oid):
+        self._tick()
+        obj = self.oid2object.get(oid)
+        if obj is None:
+            return None
+        self._change_worth(obj, obj.worth | 0x80)
+        return obj
+
+    # Evict objects of least worth first, until at least nbytes bytes
+    # have been freed.
+    def _evictbytes(self, nbytes):
+        for s in self.worthsets:
+            while s:
+                if nbytes <= 0:
+                    return
+                obj = s.pop()
+                nbytes -= obj.size
+                self._evictobj(obj)
+
+    def _evictobj(self, obj):
+        self.currentsize -= obj.size
+        self.worthsets[obj.worth].discard(obj)
+        del self.oid2object[obj.oid]
+        self._unlink(obj)
+
+        self.evicts += 1
+        self.total_evicts += 1
+
+    def _evict_without_bumping_evict_stats(self, obj):
+        self._evictobj(obj)
+        self.evicts -= 1
+        self.total_evicts -= 1
+
+    # Simulator overrides from here on.
+
+    def restart(self):
+        # Reset base class
+        Simulation.restart(self)
+        # Reset additional per-run statistics
+        self.evicts = self.trips = 0
+
+    def write(self, oid, size):
+        obj = self.oid2object.get(oid)
+        worth = None
+        if obj is not None:
+            worth = obj.worth
+            self._evict_without_bumping_evict_stats(obj)
+        self.add(ThorNode(oid, size, worth))
+
+    def load(self, oid, size):
+        if self.access(oid) is not None:
+            self.hits += 1
+            self.total_hits += 1
+        else:
+            self.write(oid, size)
+
+    def inval(self, oid):
+        obj = self.oid2object.get(oid)
+        if obj is not None:
+            self.invals += 1
+            self.total_invals += 1
+            self._evict_without_bumping_evict_stats(obj)
+
+    # Take the "x" off to see additional stats after each restart period.
+    def xreport(self):
+        Simulation.report(self)
+        print 'non-empty worth sets', sum(map(bool, self.worthsets)),
+        print 'objects', len(self.oid2object),
+        print 'size', self.currentsize
+
+#############################################################################
+# Perfection:  What if the cache were unbounded, and never forgot anything?
+# This simulator answers that question directly; the cache size parameter
+# isn't used.
+
+class UnboundedSimulation(Simulation):
+
+    extraname = 'evicts'   # for some reason we *have* to define >= 1 extra
+
+    def __init__(self, cachelimit):
+        Simulation.__init__(self, cachelimit)
+        self.oids = Set()
+        self.evicts = self.total_evicts = 0
+
+    def write(self, oid, size):
+        self.oids.add(oid)
+
+    def load(self, oid, size):
+        if oid in self.oids:
+            self.hits += 1
+            self.total_hits += 1
+        else:
+            self.oids.add(oid)
+
+    def inval(self, oid):
+        if oid in self.oids:
+            self.invals += 1
+            self.total_invals += 1
+            self.oids.remove(oid)
+
 if __name__ == "__main__":
    sys.exit(main())
--- a/src/ZEO/stats.py
+++ b/src/ZEO/stats.py
@@ -26,7 +26,7 @@ Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile

 """File format:

-Each record is 18 bytes, plus a variable number of bytes to store an oid,
+Each record is 26 bytes, plus a variable number of bytes to store an oid,
 with the following layout.  Numbers are big-endian integers.

 Offset  Size  Contents
@@ -35,8 +35,9 @@ Offset  Size  Contents
 4       3     data size, in 256-byte increments, rounded up
 7       1     code (see below)
 8       2     object id length
-10      8     serial number
-18  variable  object id
+10      8     start tid
+18      8     end tid
+26  variable  object id

 The code at offset 7 packs three fields:

@@ -131,74 +132,66 @@ def main():
            print >> sys.stderr, "can't open %s: %s" % (filename, msg)
            return 1

-    # Read file, gathering statistics, and printing each record if verbose
    rt0 = time.time()
-    # bycode -- map code to count of occurrences
-    bycode = {}
-    # records -- number of records
-    records = 0
-    # version -- number of records with versions
-    versions = 0
-    t0 = te = None
-    # datarecords -- number of records with dlen set
-    datarecords = 0
-    datasize = 0L
-    # oids -- maps oid to number of times it was loaded
-    oids = {}
-    # bysize -- maps data size to number of loads
-    bysize = {}
-    # bysize -- maps data size to number of writes
-    bysizew = {}
+    bycode = {}     # map code to count of occurrences
+    byinterval = {} # map code to count in current interval
+    records = 0     # number of trace records read
+    versions = 0    # number of trace records with versions
+    datarecords = 0 # number of records with dlen set
+    datasize = 0L   # sum of dlen across records with dlen set
+    oids = {}       # map oid to number of times it was loaded
+    bysize = {}     # map data size to number of loads
+    bysizew = {}    # map data size to number of writes
    total_loads = 0
-    byinterval = {}
-    thisinterval = None
-    h0 = he = None
-    offset = 0
+    t0 = None       # first timestamp seen
+    te = None       # most recent timestamp seen
+    h0 = None       # timestamp at start of current interval
+    he = None       # timestamp at end of current interval
+    thisinterval = None  # generally te//interval
    f_read = f.read
    struct_unpack = struct.unpack
+    # Read file, gathering statistics, and printing each record if verbose.
    try:
        while 1:
-            r = f_read(8)
+            r = f_read(8) # timestamp:4 code:4
            if len(r) < 8:
                break
-            offset += 8
            ts, code = struct_unpack(">ii", r)
            if ts == 0:
-                # Must be a misaligned record caused by a crash
+                # Must be a misaligned record caused by a crash.
                if not quiet:
-                    print "Skipping 8 bytes at offset", offset-8
+                    print "Skipping 8 bytes at offset", f.tell() - 8
                continue
-            r = f_read(18)
-            if len(r) < 10:
+            r = f_read(18) # oidlen:2 starttid:8 endtid:8
+            if len(r) < 18:
                break
-            offset += 10
-            records += 1
            oidlen, start_tid, end_tid = struct_unpack(">H8s8s", r)
            oid = f_read(oidlen)
-            if len(oid) != oidlen:
+            if len(oid) < oidlen:
                break
-            offset += oidlen
+            records += 1
            if t0 is None:
                t0 = ts
-                thisinterval = t0 / interval
+                thisinterval = t0 // interval
                h0 = he = ts
            te = ts
-            if ts / interval != thisinterval:
+            if ts // interval != thisinterval:
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
-                thisinterval = ts / interval
+                thisinterval = ts // interval
                h0 = ts
            he = ts
            dlen, code = code & 0x7fffff00, code & 0xff
            if dlen:
                datarecords += 1
                datasize += dlen
-            version = '-'
            if code & 0x80:
                version = 'V'
                versions += 1
-            code = code & 0x7e
+            else:
+                version = '-'
+            code &= 0x7e
            bycode[code] = bycode.get(code, 0) + 1
            byinterval[code] = byinterval.get(code, 0) + 1
            if dlen:
@@ -220,11 +213,11 @@ def main():
            if code & 0x70 == 0x20:
                oids[oid] = oids.get(oid, 0) + 1
                total_loads += 1
-            if code == 0x00:
+            if code == 0x00:    # restart
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
-                thisinterval = ts / interval
+                thisinterval = ts // interval
                h0 = he = ts
                if not quiet:
                    print time.ctime(ts)[4:-5],
@@ -232,6 +225,7 @@ def main():
    except KeyboardInterrupt:
        print "\nInterrupted.  Stats so far:\n"

+    end_pos = f.tell()
    f.close()
    rte = time.time()
    if not quiet:
@@ -245,8 +239,8 @@ def main():
    # Print statistics
    if dostats:
        print
-        print "Read %s records (%s bytes) in %.1f seconds" % (
-            addcommas(records), addcommas(records*24), rte-rt0)
+        print "Read %s trace records (%s bytes) in %.1f seconds" % (
+            addcommas(records), addcommas(end_pos), rte-rt0)
        print "Versions:   %s records used a version" % addcommas(versions)
        print "First time: %s" % time.ctime(t0)
        print "Last time:  %s" % time.ctime(te)
@@ -309,9 +303,8 @@ def dumpbysize(bysize, how, how2):
                                loads)

 def dumpbyinterval(byinterval, h0, he):
-    loads = 0
-    hits = 0
-    for code in byinterval.keys():
+    loads = hits = 0
+    for code in byinterval:
        if code & 0x70 == 0x20:
            n = byinterval[code]
            loads += n
@@ -328,8 +321,7 @@ def dumpbyinterval(byinterval, h0, he):
        addcommas(loads), addcommas(hits), hr)

 def hitrate(bycode):
-    loads = 0
-    hits = 0
+    loads = hits = 0
    for code in bycode:
        if code & 0x70 == 0x20:
            n = bycode[code]
@@ -389,7 +381,6 @@ explain = {
    0x50: "store (version)",
    0x52: "store (current, non-version)",
    0x54: "store (non-current)",
-
    }

 if __name__ == "__main__":