From e03ff27ffa4f089a3ace2de188dcae745ada8c41 Mon Sep 17 00:00:00 2001 From: Christian Theune <ct@gocept.com> Date: Thu, 13 Dec 2007 10:15:38 +0000 Subject: [PATCH] Completed file cache tests. Added a fuzzing test suite for the filecache API to exercise it under more stressful conditions. --- src/ZEO/tests/filecache.txt | 212 ++++++++++++++++++++++++++++++++---- src/ZEO/tests/test_cache.py | 109 +++++++++++++++++- 2 files changed, 299 insertions(+), 22 deletions(-) diff --git a/src/ZEO/tests/filecache.txt b/src/ZEO/tests/filecache.txt index 2b5a1ac3..c1713127 100644 --- a/src/ZEO/tests/filecache.txt +++ b/src/ZEO/tests/filecache.txt @@ -9,9 +9,8 @@ are not tested. As the FileCache calls back to the client cache we'll use a dummy to monitor those calls: - >>> class ClientCacheDummy(object): - ... def _evicted(self, o): - ... pass + >>> from ZEO.tests.test_cache import ClientCacheDummy, oid + >>> tid = oid >>> cache_dummy = ClientCacheDummy() We'll instanciate a FileCache with 200 bytes of space: @@ -28,14 +27,6 @@ Initially the cache is empty: >>> fc.getStats() (0, 0, 0, 0, 0) -We'll use a helper function to allow writing OIDs and TIDs as simple integers -in this test: - - >>> from ZODB.utils import repr_to_oid - >>> def oid(o): - ... repr = '%016x' % o - ... return repr_to_oid(repr) - >>> tid = oid Basic usage =========== @@ -80,6 +71,12 @@ stored: >>> obj1_1_copy is obj1_1 False +The cache allows us to iterate over all entries in it: + + >>> list(fc) # doctest: +ELLIPSIS + [<ZEO.cache.Entry object at 0x...>] + + When an object gets superseded we can update it. This only modifies the header, not the actual data. This is useful when invalidations tell us about the `end_tid` of an object: @@ -118,13 +115,14 @@ from the cache and another free record the reaches to the end of the file. The first record has a size of 143 bytes: - 1 ('f') + 4 (size) + 8 (OID) + 8 (TID) + 8 (end_tid) + 2 (version length) + 4 (data length) + 100 (old data) + 8 (OID) == 143 + 143 = 1 ('f') + 4 (size) + 8 (OID) + 8 (TID) + 8 (end_tid) + 2 (version length) + + 4 (data length) + 100 (old data) + 8 (OID) The second record has a size of 45 bytes: - 1 ('f') + 4 (size) + 40 (free space) + 45 = 1 ('f') + 4 (size) + 40 (free space) -Note that the last byt is an 'x' because the initialisation of the cache file +Note that the last byte is an 'x' because the initialisation of the cache file forced the absolute size of the file by seeking to byte 200 and writing an 'x'. >>> from ZEO.tests.test_cache import hexprint @@ -143,18 +141,192 @@ forced the absolute size of the file by seeking to byte 200 and writing an 'x'. 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 000000c0 00 00 00 00 00 00 00 78 |.......x | +Case 1: Allocating a new block that fits after the last used one + + >>> obj2_1 = Object(key=(oid(2), tid(1)), version='', data='**', + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj2_1) + +The new block fits exactly in the remaining 45 bytes (43 bytes header + 2 +bytes payload) so the beginning of the data is the same except for the last 45 +bytes: + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 66 00 00 00 |ZEC3........f...| + 00000010 8f 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 02 00 00 00 00 00 64 23 |..............d#| + 00000030 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################| + 00000040 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################| + 00000050 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################| + 00000060 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################| + 00000070 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################| + 00000080 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 23 |################| + 00000090 23 23 23 00 00 00 00 00 00 00 01 61 00 00 00 2d |###........a...-| + 000000a0 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 02 2a 2a |..............**| + 000000c0 00 00 00 00 00 00 00 02 |........ | + +Case 2: Allocating a block that wraps around and frees *exactly* one block + + >>> obj3_1 = Object(key=(oid(3), tid(1)), version='', data='@'*100, + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj3_1) + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC3........a...| + 00000010 8f 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 00 00 00 00 00 00 64 40 |..............d@| + 00000030 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000040 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000050 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000060 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000070 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000080 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000090 40 40 40 00 00 00 00 00 00 00 03 61 00 00 00 2d |@@@........a...-| + 000000a0 00 00 00 00 00 00 00 02 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 02 2a 2a |..............**| + 000000c0 00 00 00 00 00 00 00 02 |........ | + +Case 3: Allocating a block that requires 1 byte less than the next block + + >>> obj4_1 = Object(key=(oid(4), tid(1)), version='', data='~', + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj4_1) + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC3........a...| + 00000010 8f 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 00 00 00 00 00 00 64 40 |..............d@| + 00000030 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000040 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000050 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000060 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000070 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000080 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 |@@@@@@@@@@@@@@@@| + 00000090 40 40 40 00 00 00 00 00 00 00 03 61 00 00 00 2c |@@@........a...,| + 000000a0 00 00 00 00 00 00 00 04 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7e 00 |..............~.| + 000000c0 00 00 00 00 00 00 04 31 |.......1 | + +Case 4: Allocating a block that requires 2 bytes less than the next block + + >>> obj4_1 = Object(key=(oid(5), tid(1)), version='', data='^'*98, + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj4_1) + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC3........a...| + 00000010 8d 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 00 00 00 00 00 00 62 5e |..............b^| + 00000030 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^| + 00000040 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^| + 00000050 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^| + 00000060 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^| + 00000070 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^| + 00000080 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e 5e |^^^^^^^^^^^^^^^^| + 00000090 5e 00 00 00 00 00 00 00 05 32 03 61 00 00 00 2c |^........2.a...,| + 000000a0 00 00 00 00 00 00 00 04 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7e 00 |..............~.| + 000000c0 00 00 00 00 00 00 04 31 |.......1 | + +Case 5: Allocating a block that requires 3 bytes less than the next block + +The end of the file is already a bit crowded and would create a rather complex +situation to work on. We create an entry with the size of 95 byte which will +be inserted at the beginning of the file, leaving a 3 byte free space after +it. + + >>> obj4_1 = Object(key=(oid(6), tid(1)), version='', data='+'*95, + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj4_1) + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC3........a...| + 00000010 8a 00 00 00 00 00 00 00 06 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 00 00 00 00 00 00 5f 2b |.............._+| + 00000030 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++| + 00000040 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++| + 00000050 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++| + 00000060 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++| + 00000070 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b |++++++++++++++++| + 00000080 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 2b 00 00 |++++++++++++++..| + 00000090 00 00 00 00 00 06 33 00 05 32 03 61 00 00 00 2c |......3..2.a...,| + 000000a0 00 00 00 00 00 00 00 04 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7e 00 |..............~.| + 000000c0 00 00 00 00 00 00 04 31 |.......1 | + +Case 6: Allocating a block that requires 4 bytes less than the next block + +As in our previous case, we'll write a block that only fits in the first +block's place to avoid dealing with the cluttering at the end of the cache +file. + + >>> obj4_1 = Object(key=(oid(7), tid(1)), version='', data='-'*91, + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj4_1) + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC3........a...| + 00000010 86 00 00 00 00 00 00 00 07 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 00 00 00 00 00 00 5b 2d |..............[-| + 00000030 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------| + 00000040 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------| + 00000050 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------| + 00000060 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------| + 00000070 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d |----------------| + 00000080 2d 2d 2d 2d 2d 2d 2d 2d 2d 2d 00 00 00 00 00 00 |----------......| + 00000090 00 07 34 00 00 06 33 00 05 32 03 61 00 00 00 2c |..4...3..2.a...,| + 000000a0 00 00 00 00 00 00 00 04 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7e 00 |..............~.| + 000000c0 00 00 00 00 00 00 04 31 |.......1 | + +Case 7: Allocating a block that requires >= 5 bytes less than the next block + +Again, we replace the block at the beginning of the cache. + + >>> obj4_1 = Object(key=(oid(8), tid(1)), version='', data='='*86, + ... start_tid=tid(1), end_tid=None) + >>> fc.add(obj4_1) + + >>> hexprint(fc.f) # doctest: +REPORT_NDIFF + 00000000 5a 45 43 33 00 00 00 00 00 00 00 00 61 00 00 00 |ZEC3........a...| + 00000010 81 00 00 00 00 00 00 00 08 00 00 00 00 00 00 00 |................| + 00000020 01 00 00 00 00 00 00 00 00 00 00 00 00 00 56 3d |..............V=| + 00000030 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================| + 00000040 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================| + 00000050 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================| + 00000060 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================| + 00000070 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d 3d |================| + 00000080 3d 3d 3d 3d 3d 00 00 00 00 00 00 00 08 66 00 00 |=====........f..| + 00000090 00 05 34 00 00 06 33 00 05 32 03 61 00 00 00 2c |..4...3..2.a...,| + 000000a0 00 00 00 00 00 00 00 04 00 00 00 00 00 00 00 01 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7e 00 |..............~.| + 000000c0 00 00 00 00 00 00 04 31 |.......1 | + + Statistic functions =================== -clearStats -getStats -__len__ -__iter__ -__contains__ -__close__ +The `getStats` method talks about the added objects, added bytes, evicted +objects, evicted bytes and accesses to the cache: + + >>> fc.getStats() + (8, 917, 5, 601, 2) + +We can reset the stats by calling the `clearStats` method: + + >>> fc.clearStats() + >>> fc.getStats() + (0, 0, 0, 0, 0) Cleanup ======= +As the cache is non-persistent, its file will be gone from disk after closing +the cache: + + >>> fc.f # doctest: +ELLIPSIS + <open file '<fdopen>', mode 'w+b' at 0x...> >>> fc.close() + >>> fc.f diff --git a/src/ZEO/tests/test_cache.py b/src/ZEO/tests/test_cache.py index 29bc1f08..859418f4 100644 --- a/src/ZEO/tests/test_cache.py +++ b/src/ZEO/tests/test_cache.py @@ -14,13 +14,15 @@ """Basic unit tests for a multi-version client cache.""" import os +import random import tempfile import unittest import doctest import string +import sys import ZEO.cache -from ZODB.utils import p64 +from ZODB.utils import p64, repr_to_oid n1 = p64(1) @@ -39,7 +41,7 @@ def hexprint(file): printable = "" hex = "" for character in line: - if character in string.printable: + if character in string.printable and not ord(character) in [12,13,9]: printable += character else: printable += '.' @@ -51,6 +53,38 @@ def hexprint(file): offset += 16 +class ClientCacheDummy(object): + + def __init__(self): + self.objects = {} + + def _evicted(self, o): + if o.key in self.objects: + del self.objects[o.key] + + +def oid(o): + repr = '%016x' % o + return repr_to_oid(repr) +tid = oid + + +class FileCacheFuzzing(unittest.TestCase): + + def testFileCacheFuzzing(self): + cache_dummy = ClientCacheDummy() + fc = ZEO.cache.FileCache(maxsize=5000, fpath=None, + parent=cache_dummy) + for i in xrange(10000): + size = random.randint(0,5500) + obj = ZEO.cache.Object(key=(oid(i), oid(1)), version='', + data='*'*size, start_tid=oid(1), + end_tid=None) + fc.add(obj) + hexprint(fc.f) + fc.close() + + class CacheTests(unittest.TestCase): def setUp(self): @@ -151,6 +185,76 @@ class CacheTests(unittest.TestCase): # TODO: Need to make sure eviction of non-current data # and of version data are handled correctly. + def _run_fuzzing(self): + current_tid = 1 + current_oid = 1 + def log(*args): + #print args + pass + cache = self.fuzzy_cache + objects = self.fuzzy_cache_client.objects + for operation in xrange(10000): + op = random.choice(['add', 'access', 'remove', 'update', 'settid']) + if not objects: + op = 'add' + log(op) + if op == 'add': + current_oid += 1 + key = (oid(current_oid), tid(current_tid)) + object = ZEO.cache.Object( + key=key, version='', data='*'*random.randint(1,60*1024), + start_tid=tid(current_tid), end_tid=None) + assert key not in objects + log(key, len(object.data), current_tid) + cache.add(object) + if (object.size + ZEO.cache.OBJECT_HEADER_SIZE > + cache.maxsize - ZEO.cache.ZEC3_HEADER_SIZE): + assert key not in cache + else: + objects[key] = object + assert key in cache, key + elif op == 'access': + key = random.choice(objects.keys()) + log(key) + object = objects[key] + found = cache.access(key) + assert object.data == found.data + assert object.key == found.key + assert object.size == found.size == (len(object.data)+object.TOTAL_FIXED_SIZE) + elif op == 'remove': + key = random.choice(objects.keys()) + log(key) + cache.remove(key) + assert key not in cache + assert key not in objects + elif op == 'update': + key = random.choice(objects.keys()) + object = objects[key] + log(key, object.key) + if not object.end_tid: + object.end_tid = tid(current_tid) + log(key, current_tid) + cache.update(object) + elif op == 'settid': + current_tid += 1 + log(current_tid) + cache.settid(tid(current_tid)) + cache.close() + + def testFuzzing(self): + random.seed() + seed = random.randint(0, sys.maxint) + random.seed(seed) + self.fuzzy_cache_client = ClientCacheDummy() + self.fuzzy_cache = ZEO.cache.FileCache( + random.randint(100, 50*1024), None, self.fuzzy_cache_client) + try: + self._run_fuzzing() + except: + print "Error in fuzzing with seed", seed + hexprint(self.fuzzy_cache.f) + raise + def testSerialization(self): self.cache.store(n1, "", n2, None, "data for n1") self.cache.store(n2, "version", n2, None, "version data for n2") @@ -181,5 +285,6 @@ class CacheTests(unittest.TestCase): def test_suite(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(CacheTests)) + suite.addTest(unittest.makeSuite(FileCacheFuzzing)) suite.addTest(doctest.DocFileSuite('filecache.txt')) return suite -- 2.30.9