From e03ff27ffa4f089a3ace2de188dcae745ada8c41 Mon Sep 17 00:00:00 2001
From: Christian Theune <ct@gocept.com>
Date: Thu, 13 Dec 2007 10:15:38 +0000
Subject: [PATCH] Completed file cache tests. Added a fuzzing test suite for
 the filecache API to exercise it under more stressful conditions.

---
 src/ZEO/tests/filecache.txt | 212 ++++++++++++++++++++++++++++++++----
 src/ZEO/tests/test_cache.py | 109 +++++++++++++++++-
 2 files changed, 299 insertions(+), 22 deletions(-)

diff --git a/src/ZEO/tests/filecache.txt b/src/ZEO/tests/filecache.txt
index 2b5a1ac3..c1713127 100644
--- a/src/ZEO/tests/filecache.txt
+++ b/src/ZEO/tests/filecache.txt
@@ -9,9 +9,8 @@ are not tested.
 As the FileCache calls back to the client cache we'll use a dummy to monitor
 those calls:
 
-  >>> class ClientCacheDummy(object):
-  ...     def _evicted(self, o):
-  ...         pass
+  >>> from ZEO.tests.test_cache import ClientCacheDummy, oid
+  >>> tid = oid
   >>> cache_dummy = ClientCacheDummy()
 
 We'll instanciate a FileCache with 200 bytes of space:
@@ -28,14 +27,6 @@ Initially the cache is empty:
   >>> fc.getStats()
   (0, 0, 0, 0, 0)
 
-We'll use a helper function to allow writing OIDs and TIDs as simple integers
-in this test:
-
-  >>> from ZODB.utils import repr_to_oid
-  >>> def oid(o):
-  ...     repr = '%016x' % o
-  ...     return repr_to_oid(repr)
-  >>> tid = oid
 
 Basic usage
 ===========
@@ -80,6 +71,12 @@ stored:
   >>> obj1_1_copy is obj1_1
   False
 
+The cache allows us to iterate over all entries in it:
+
+  >>> list(fc)  # doctest: +ELLIPSIS
+  [<ZEO.cache.Entry object at 0x...>]
+
+
 When an object gets superseded we can update it. This only modifies the header,
 not the actual data. This is useful when invalidations tell us about the
 `end_tid` of an object:
@@ -118,13 +115,14 @@ from the cache and another free record the reaches to the end of the file.
 
 The first record has a size of 143 bytes:
 
-  1 ('f') + 4 (size) + 8 (OID) + 8 (TID) + 8 (end_tid) + 2 (version length) + 4 (data length) + 100 (old data) + 8 (OID) == 143
+  143 = 1 ('f') + 4 (size) + 8 (OID) + 8 (TID) + 8 (end_tid) + 2 (version length) +
+  4 (data length) + 100 (old data) + 8 (OID)
 
 The second record has a size of 45 bytes:
 
-  1 ('f') + 4 (size) + 40 (free space)
+  45 = 1 ('f') + 4 (size) + 40 (free space)
 
-Note that the last byt is an 'x' because the initialisation of the cache file
+Note that the last byte is an 'x' because the initialisation of the cache file
 forced the absolute size of the file by seeking to byte 200 and writing an 'x'.
 
   >>> from ZEO.tests.test_cache import hexprint
@@ -143,18 +141,192 @@ forced the absolute size of the file by seeking to byte 200 and writing an 'x'.
   000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
   000000c0  00 00 00 00 00 00 00 78                           |.......x        |
 
+Case 1: Allocating a new block that fits after the last used one
+
+  >>> obj2_1 = Object(key=(oid(2), tid(1)), version='', data='**',
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj2_1)
+
+The new block fits exactly in the remaining 45 bytes (43 bytes header + 2
+bytes payload) so the beginning of the data is the same except for the last 45
+bytes:
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 66 00 00 00  |ZEC3........f...|
+  00000010  8f 00 00 00 00 00 00 00  01 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  02 00 00 00 00 00 64 23  |..............d#|
+  00000030  23 23 23 23 23 23 23 23  23 23 23 23 23 23 23 23  |################|
+  00000040  23 23 23 23 23 23 23 23  23 23 23 23 23 23 23 23  |################|
+  00000050  23 23 23 23 23 23 23 23  23 23 23 23 23 23 23 23  |################|
+  00000060  23 23 23 23 23 23 23 23  23 23 23 23 23 23 23 23  |################|
+  00000070  23 23 23 23 23 23 23 23  23 23 23 23 23 23 23 23  |################|
+  00000080  23 23 23 23 23 23 23 23  23 23 23 23 23 23 23 23  |################|
+  00000090  23 23 23 00 00 00 00 00  00 00 01 61 00 00 00 2d  |###........a...-|
+  000000a0  00 00 00 00 00 00 00 02  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 02 2a 2a  |..............**|
+  000000c0  00 00 00 00 00 00 00 02                           |........        |
+
+Case 2: Allocating a block that wraps around and frees *exactly* one block
+
+  >>> obj3_1 = Object(key=(oid(3), tid(1)), version='', data='@'*100,
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj3_1)
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 61 00 00 00  |ZEC3........a...|
+  00000010  8f 00 00 00 00 00 00 00  03 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  00 00 00 00 00 00 64 40  |..............d@|
+  00000030  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000040  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000050  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000060  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000070  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000080  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000090  40 40 40 00 00 00 00 00  00 00 03 61 00 00 00 2d  |@@@........a...-|
+  000000a0  00 00 00 00 00 00 00 02  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 02 2a 2a  |..............**|
+  000000c0  00 00 00 00 00 00 00 02                           |........        |
+
+Case 3: Allocating a block that requires 1 byte less than the next block
+
+  >>> obj4_1 = Object(key=(oid(4), tid(1)), version='', data='~',
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj4_1)
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 61 00 00 00  |ZEC3........a...|
+  00000010  8f 00 00 00 00 00 00 00  03 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  00 00 00 00 00 00 64 40  |..............d@|
+  00000030  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000040  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000050  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000060  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000070  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000080  40 40 40 40 40 40 40 40  40 40 40 40 40 40 40 40  |@@@@@@@@@@@@@@@@|
+  00000090  40 40 40 00 00 00 00 00  00 00 03 61 00 00 00 2c  |@@@........a...,|
+  000000a0  00 00 00 00 00 00 00 04  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 01 7e 00  |..............~.|
+  000000c0  00 00 00 00 00 00 04 31                           |.......1        |
+
+Case 4: Allocating a block that requires 2 bytes less than the next block
+
+  >>> obj4_1 = Object(key=(oid(5), tid(1)), version='', data='^'*98,
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj4_1)
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 61 00 00 00  |ZEC3........a...|
+  00000010  8d 00 00 00 00 00 00 00  05 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  00 00 00 00 00 00 62 5e  |..............b^|
+  00000030  5e 5e 5e 5e 5e 5e 5e 5e  5e 5e 5e 5e 5e 5e 5e 5e  |^^^^^^^^^^^^^^^^|
+  00000040  5e 5e 5e 5e 5e 5e 5e 5e  5e 5e 5e 5e 5e 5e 5e 5e  |^^^^^^^^^^^^^^^^|
+  00000050  5e 5e 5e 5e 5e 5e 5e 5e  5e 5e 5e 5e 5e 5e 5e 5e  |^^^^^^^^^^^^^^^^|
+  00000060  5e 5e 5e 5e 5e 5e 5e 5e  5e 5e 5e 5e 5e 5e 5e 5e  |^^^^^^^^^^^^^^^^|
+  00000070  5e 5e 5e 5e 5e 5e 5e 5e  5e 5e 5e 5e 5e 5e 5e 5e  |^^^^^^^^^^^^^^^^|
+  00000080  5e 5e 5e 5e 5e 5e 5e 5e  5e 5e 5e 5e 5e 5e 5e 5e  |^^^^^^^^^^^^^^^^|
+  00000090  5e 00 00 00 00 00 00 00  05 32 03 61 00 00 00 2c  |^........2.a...,|
+  000000a0  00 00 00 00 00 00 00 04  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 01 7e 00  |..............~.|
+  000000c0  00 00 00 00 00 00 04 31                           |.......1        |
+
+Case 5: Allocating a block that requires 3 bytes less than the next block
+
+The end of the file is already a bit crowded and would create a rather complex
+situation to work on. We create an entry with the size of 95 byte which will
+be inserted at the beginning of the file, leaving a 3 byte free space after
+it.
+
+  >>> obj4_1 = Object(key=(oid(6), tid(1)), version='', data='+'*95,
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj4_1)
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 61 00 00 00  |ZEC3........a...|
+  00000010  8a 00 00 00 00 00 00 00  06 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  00 00 00 00 00 00 5f 2b  |.............._+|
+  00000030  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 2b  |++++++++++++++++|
+  00000040  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 2b  |++++++++++++++++|
+  00000050  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 2b  |++++++++++++++++|
+  00000060  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 2b  |++++++++++++++++|
+  00000070  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 2b 2b  |++++++++++++++++|
+  00000080  2b 2b 2b 2b 2b 2b 2b 2b  2b 2b 2b 2b 2b 2b 00 00  |++++++++++++++..|
+  00000090  00 00 00 00 00 06 33 00  05 32 03 61 00 00 00 2c  |......3..2.a...,|
+  000000a0  00 00 00 00 00 00 00 04  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 01 7e 00  |..............~.|
+  000000c0  00 00 00 00 00 00 04 31                           |.......1        |
+
+Case 6: Allocating a block that requires 4 bytes less than the next block
+
+As in our previous case, we'll write a block that only fits in the first
+block's place to avoid dealing with the cluttering at the end of the cache
+file.
+
+  >>> obj4_1 = Object(key=(oid(7), tid(1)), version='', data='-'*91,
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj4_1)
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 61 00 00 00  |ZEC3........a...|
+  00000010  86 00 00 00 00 00 00 00  07 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  00 00 00 00 00 00 5b 2d  |..............[-|
+  00000030  2d 2d 2d 2d 2d 2d 2d 2d  2d 2d 2d 2d 2d 2d 2d 2d  |----------------|
+  00000040  2d 2d 2d 2d 2d 2d 2d 2d  2d 2d 2d 2d 2d 2d 2d 2d  |----------------|
+  00000050  2d 2d 2d 2d 2d 2d 2d 2d  2d 2d 2d 2d 2d 2d 2d 2d  |----------------|
+  00000060  2d 2d 2d 2d 2d 2d 2d 2d  2d 2d 2d 2d 2d 2d 2d 2d  |----------------|
+  00000070  2d 2d 2d 2d 2d 2d 2d 2d  2d 2d 2d 2d 2d 2d 2d 2d  |----------------|
+  00000080  2d 2d 2d 2d 2d 2d 2d 2d  2d 2d 00 00 00 00 00 00  |----------......|
+  00000090  00 07 34 00 00 06 33 00  05 32 03 61 00 00 00 2c  |..4...3..2.a...,|
+  000000a0  00 00 00 00 00 00 00 04  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 01 7e 00  |..............~.|
+  000000c0  00 00 00 00 00 00 04 31                           |.......1        |
+
+Case 7: Allocating a block that requires >= 5 bytes less than the next block
+
+Again, we replace the block at the beginning of the cache.
+
+  >>> obj4_1 = Object(key=(oid(8), tid(1)), version='', data='='*86,
+  ...                 start_tid=tid(1), end_tid=None)
+  >>> fc.add(obj4_1)
+
+  >>> hexprint(fc.f)  # doctest: +REPORT_NDIFF
+  00000000  5a 45 43 33 00 00 00 00  00 00 00 00 61 00 00 00  |ZEC3........a...|
+  00000010  81 00 00 00 00 00 00 00  08 00 00 00 00 00 00 00  |................|
+  00000020  01 00 00 00 00 00 00 00  00 00 00 00 00 00 56 3d  |..............V=|
+  00000030  3d 3d 3d 3d 3d 3d 3d 3d  3d 3d 3d 3d 3d 3d 3d 3d  |================|
+  00000040  3d 3d 3d 3d 3d 3d 3d 3d  3d 3d 3d 3d 3d 3d 3d 3d  |================|
+  00000050  3d 3d 3d 3d 3d 3d 3d 3d  3d 3d 3d 3d 3d 3d 3d 3d  |================|
+  00000060  3d 3d 3d 3d 3d 3d 3d 3d  3d 3d 3d 3d 3d 3d 3d 3d  |================|
+  00000070  3d 3d 3d 3d 3d 3d 3d 3d  3d 3d 3d 3d 3d 3d 3d 3d  |================|
+  00000080  3d 3d 3d 3d 3d 00 00 00  00 00 00 00 08 66 00 00  |=====........f..|
+  00000090  00 05 34 00 00 06 33 00  05 32 03 61 00 00 00 2c  |..4...3..2.a...,|
+  000000a0  00 00 00 00 00 00 00 04  00 00 00 00 00 00 00 01  |................|
+  000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 01 7e 00  |..............~.|
+  000000c0  00 00 00 00 00 00 04 31                           |.......1        |
+
+
 Statistic functions
 ===================
 
-clearStats
-getStats
-__len__
-__iter__
-__contains__
-__close__
+The `getStats` method talks about the added objects, added bytes, evicted
+objects, evicted bytes and accesses to the cache:
+
+  >>> fc.getStats()
+  (8, 917, 5, 601, 2)
+
+We can reset the stats by calling the `clearStats` method:
+
+  >>> fc.clearStats()
+  >>> fc.getStats()
+  (0, 0, 0, 0, 0)
 
 
 Cleanup
 =======
 
+As the cache is non-persistent, its file will be gone from disk after closing
+the cache:
+
+  >>> fc.f  # doctest: +ELLIPSIS
+  <open file '<fdopen>', mode 'w+b' at 0x...>
   >>> fc.close()
+  >>> fc.f
diff --git a/src/ZEO/tests/test_cache.py b/src/ZEO/tests/test_cache.py
index 29bc1f08..859418f4 100644
--- a/src/ZEO/tests/test_cache.py
+++ b/src/ZEO/tests/test_cache.py
@@ -14,13 +14,15 @@
 """Basic unit tests for a multi-version client cache."""
 
 import os
+import random
 import tempfile
 import unittest
 import doctest
 import string
+import sys
 
 import ZEO.cache
-from ZODB.utils import p64
+from ZODB.utils import p64, repr_to_oid
 
 
 n1 = p64(1)
@@ -39,7 +41,7 @@ def hexprint(file):
         printable = ""
         hex = ""
         for character in line:
-            if character in string.printable:
+            if character in string.printable and not ord(character) in [12,13,9]:
                 printable += character
             else:
                 printable += '.'
@@ -51,6 +53,38 @@ def hexprint(file):
         offset += 16
 
 
+class ClientCacheDummy(object):
+
+    def __init__(self):
+        self.objects = {}
+
+    def _evicted(self, o):
+        if o.key in self.objects:
+            del self.objects[o.key]
+
+
+def oid(o):
+    repr = '%016x' % o
+    return repr_to_oid(repr)
+tid = oid
+
+
+class FileCacheFuzzing(unittest.TestCase):
+
+    def testFileCacheFuzzing(self):
+        cache_dummy = ClientCacheDummy()
+        fc = ZEO.cache.FileCache(maxsize=5000, fpath=None,
+                                      parent=cache_dummy)
+        for i in xrange(10000):
+            size = random.randint(0,5500)
+            obj = ZEO.cache.Object(key=(oid(i), oid(1)), version='',
+                                   data='*'*size, start_tid=oid(1),
+                                   end_tid=None)
+            fc.add(obj)
+        hexprint(fc.f)
+        fc.close()
+
+
 class CacheTests(unittest.TestCase):
 
     def setUp(self):
@@ -151,6 +185,76 @@ class CacheTests(unittest.TestCase):
         # TODO:  Need to make sure eviction of non-current data
         # and of version data are handled correctly.
 
+    def _run_fuzzing(self):
+        current_tid = 1
+        current_oid = 1
+        def log(*args):
+            #print args
+            pass
+        cache = self.fuzzy_cache
+        objects = self.fuzzy_cache_client.objects
+        for operation in xrange(10000):
+            op = random.choice(['add', 'access', 'remove', 'update', 'settid'])
+            if not objects:
+                op = 'add'
+            log(op)
+            if op == 'add':
+                current_oid += 1
+                key = (oid(current_oid), tid(current_tid))
+                object = ZEO.cache.Object(
+                    key=key, version='', data='*'*random.randint(1,60*1024),
+                    start_tid=tid(current_tid), end_tid=None)
+                assert key not in objects
+                log(key, len(object.data), current_tid)
+                cache.add(object)
+                if (object.size + ZEO.cache.OBJECT_HEADER_SIZE >
+                    cache.maxsize - ZEO.cache.ZEC3_HEADER_SIZE):
+                    assert key not in cache
+                else:
+                    objects[key] = object
+                    assert key in cache, key
+            elif op == 'access':
+                key = random.choice(objects.keys())
+                log(key)
+                object = objects[key]
+                found = cache.access(key)
+                assert object.data == found.data
+                assert object.key == found.key
+                assert object.size == found.size == (len(object.data)+object.TOTAL_FIXED_SIZE)
+            elif op == 'remove':
+                key = random.choice(objects.keys())
+                log(key)
+                cache.remove(key)
+                assert key not in cache
+                assert key not in objects
+            elif op == 'update':
+                key = random.choice(objects.keys())
+                object = objects[key]
+                log(key, object.key)
+                if not object.end_tid:
+                    object.end_tid = tid(current_tid)
+                    log(key, current_tid)
+                    cache.update(object)
+            elif op == 'settid':
+                current_tid += 1
+                log(current_tid)
+                cache.settid(tid(current_tid))
+        cache.close()
+
+    def testFuzzing(self):
+        random.seed()
+        seed = random.randint(0, sys.maxint)
+        random.seed(seed)
+        self.fuzzy_cache_client = ClientCacheDummy()
+        self.fuzzy_cache = ZEO.cache.FileCache(
+            random.randint(100, 50*1024), None, self.fuzzy_cache_client)
+        try:
+            self._run_fuzzing()
+        except:
+            print "Error in fuzzing with seed", seed
+            hexprint(self.fuzzy_cache.f)
+            raise
+
     def testSerialization(self):
         self.cache.store(n1, "", n2, None, "data for n1")
         self.cache.store(n2, "version", n2, None, "version data for n2")
@@ -181,5 +285,6 @@ class CacheTests(unittest.TestCase):
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(CacheTests))
+    suite.addTest(unittest.makeSuite(FileCacheFuzzing))
     suite.addTest(doctest.DocFileSuite('filecache.txt'))
     return suite
-- 
2.30.9