Add a record iteration protocol to FileStorage. You can use the record...

Add a record iteration protocol to FileStorage. You can use the record iterator to iterate over all current revisions of data pickles in the storage. In order to support calling via ZEO, we don't implement this as an actual iterator. An example of using the record iterator protocol is as follows: storage = FileStorage('anexisting.fs') next_oid = None while 1: oid, tid, data, next_oid = storage.record_iternext(next_oid) # do something with oid, tid and data if next_oid is None: break The behavior of the iteration protocol is now to iterate over all current records in the database in ascending oid order, although this is not a promise to do so in the future.

Add a record iteration protocol to FileStorage. You can use the record...
Add a record iteration protocol to FileStorage. You can use the record iterator to iterate over all current revisions of data pickles in the storage. In order to support calling via ZEO, we don't implement this as an actual iterator. An example of using the record iterator protocol is as follows: storage = FileStorage('anexisting.fs') next_oid = None while 1: oid, tid, data, next_oid = storage.record_iternext(next_oid) # do something with oid, tid and data if next_oid is None: break The behavior of the iteration protocol is now to iterate over all current records in the database in ascending oid order, although this is not a promise to do so in the future.
7f220b80 · Chris McDonough · ac5116e4 · 7f220b80 · 7f220b80 · 7f220b80
Commit 7f220b80 authored Mar 20, 2005 by Chris McDonough
4 changed files
--- a/src/ZODB/FileStorage/FileStorage.py
+++ b/src/ZODB/FileStorage/FileStorage.py
@@ -371,8 +371,13 @@ class FileStorage(BaseStorage.BaseStorage,
            return None
        pos = long(pos)

-        if isinstance(index, DictType):
-            # Convert to fsIndex.
+        if (
+            isinstance(index, DictType) or
+            (isinstance(index, fsIndex) and isinstance(index._data, DictType))
+             ):
+            # Convert dictionary indexes to fsIndexes *or* convert fsIndexes
+            # which have a DictType `_data` attribute to a new fsIndex (newer
+            # fsIndexes have an OOBTree as `_data`)
            newindex = fsIndex()
            newindex.update(index)
            index = newindex
@@ -1397,6 +1402,19 @@ class FileStorage(BaseStorage.BaseStorage,
                if e.errno != errno.ENOENT:
                    raise

+    def record_iternext(self, next=None):
+        index = self._index
+        oid = index.minKey(next)
+        
+        try:
+            next_oid = index.minKey(self.new_oid(oid))
+        except ValueError: # "empty tree" error
+            next_oid = None
+
+        data, tid = self.load(oid, None) # ignore versions
+        return oid, tid, data, next_oid
+        
+

 def shift_transactions_forward(index, vindex, tindex, file, pos, opos):
    """Copy transactions forward in the data file

--- a/src/ZODB/fsIndex.py
+++ b/src/ZODB/fsIndex.py
@@ -39,6 +39,7 @@
 import struct

 from BTrees._fsBTree import fsBucket
+from BTrees.OOBTree import OOBTree

 # convert between numbers and six-byte strings

@@ -48,10 +49,18 @@ def num2str(n):
 def str2num(s):
    return struct.unpack(">Q", "\000\000" + s)[0]

+def prefix_plus_one(s):
+    num = str2num(s)
+    return num2str(num + 1)
+
+def prefix_minus_one(s):
+    num = str2num(s)
+    return num2str(num - 1)
+
 class fsIndex(object):

    def __init__(self):
-        self._data = {}
+        self._data = OOBTree()

    def __getitem__(self, key):
        return str2num(self._data[key[:6]][key[6:]])
@@ -126,31 +135,61 @@ class fsIndex(object):
    def values(self):
        return list(self.itervalues())

-    def maxKey(self):
-        # This is less general than the BTree method of the same name:  we
-        # only care about the largest key in the entire tree.  By
-        # construction, that's the largest oid in use in the associated
-        # FileStorage.
-
-        keys = self._data.keys()
-        if not keys:
-            # This is the same exception a BTree maxKey() raises when the
-            # tree is empty.
-            raise ValueError("empty tree")
-
-        # We expect that keys is small, since each fsBTree in _data.values()
-        # can hold as many as 2**16 = 64K entries.  So this max() should go
-        # fast too.  Regardless, there's no faster way to find the largest
-        # prefix.
-        biggest_prefix = max(keys)
+    # Comment below applies for the following minKey and maxKey methods
+    #
+    # Obscure:  what if `tree` is actually empty?  We're relying here on
+    # that this class doesn't implement __delitem__:  once a key gets
+    # into an fsIndex, the only way it can go away is by invoking
+    # clear().  Therefore nothing in _data.values() is ever empty.
+    #
+    # Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are
+    # very efficient.
+
+    def minKey(self, key=None):
+        if key is None:
+            smallest_prefix = self._data.minKey()
+        else:
+            smallest_prefix = self._data.minKey(key[:6])
+            
+        tree = self._data[smallest_prefix]
+
+        assert tree
+
+        if key is None:
+            smallest_suffix = tree.minKey()
+        else:
+            try:
+                smallest_suffix = tree.minKey(key[6:])
+            except ValueError: # 'empty tree' (no suffix >= arg)
+                next_prefix = prefix_plus_one(smallest_prefix)
+                smallest_prefix = self._data.minKey(next_prefix)
+                tree = self._data[smallest_prefix]
+                assert tree
+                smallest_suffix = tree.minKey()
+
+        return smallest_prefix + smallest_suffix
+
+    def maxKey(self, key=None):
+        if key is None:
+            biggest_prefix = self._data.maxKey()
+        else:
+            biggest_prefix = self._data.maxKey(key[:6])
+
        tree = self._data[biggest_prefix]

-        # Obscure:  what if tree is actually empty?  We're relying here on
-        # that this class doesn't implement __delitem__:  once a key gets
-        # into an fsIndex, the only way it can go away is by invoking
-        # clear().  Therefore nothing in _data.values() is ever empty.
-        #
-        # Note that because `tree` is an fsBTree, its maxKey() method is very
-        # efficient.
        assert tree
-        return biggest_prefix + tree.maxKey()
+
+        if key is None:
+            biggest_suffix = tree.maxKey()
+        else:
+            try:
+                biggest_suffix = tree.maxKey(key[6:])
+            except ValueError: # 'empty tree' (no suffix <= arg)
+                next_prefix = prefix_minus_one(biggest_prefix)
+                biggest_prefix = self._data.maxKey(next_prefix)
+                tree = self._data[biggest_prefix]
+                assert tree
+                biggest_suffix = tree.maxKey()
+
+        return biggest_prefix + biggest_suffix
+
--- a/src/ZODB/tests/testFileStorage.py
+++ b/src/ZODB/tests/testFileStorage.py
@@ -137,6 +137,41 @@ class FileStorageTests(
        # Python dict.
        self.check_conversion_to_fsIndex(read_only=True)

+    def check_conversion_from_dict_to_btree_data_in_fsIndex(self):
+        # To support efficient range searches on its keys as part of
+        # implementing a record iteration protocol in FileStorage, we
+        # converted the fsIndex class from using a dictionary as its
+        # self._data attribute to using an OOBTree in its stead.
+
+        from ZODB.fsIndex import fsIndex
+        from BTrees.OOBTree import OOBTree
+
+        # Create some data, and remember the index.
+        for i in range(10):
+            self._dostore()
+        data_dict = dict(self._storage._index._data)
+
+        # Replace the OOBTree with a dictionary and commit it.
+        self._storage._index._data = data_dict
+        get_transaction().commit()
+
+        # Save the index.
+        self._storage.close()
+
+        # Verify it's converted to fsIndex in memory upon open.
+        self.open()
+        self.assert_(isinstance(self._storage._index, fsIndex))
+        self.assert_(isinstance(self._storage._index._data, OOBTree))
+
+        # Verify it has the right content.
+        new_data_dict = dict(self._storage._index._data)
+        self.assertEqual(len(data_dict), len(new_data_dict))
+
+        for k in data_dict:
+            old_tree = data_dict[k]
+            new_tree = new_data_dict[k]
+            self.assertEqual(list(old_tree.items()), list(new_tree.items()))
+
    def check_save_after_load_with_no_index(self):
        for i in range(10):
            self._dostore()
@@ -288,6 +323,35 @@ class FileStorageTests(
        else:
            self.fail("expected CorruptedError")

+    def check_record_iternext(self):
+        from ZODB.DB import DB
+
+        db = DB(self._storage)
+        conn = db.open()
+        conn.root()['abc'] = MinPO('abc')
+        conn.root()['xyz'] = MinPO('xyz')
+        get_transaction().commit()
+
+        # Ensure it's all on disk.
+        db.close()
+        self._storage.close()
+
+        self.open()
+
+        key = None
+        for x in ('\000', '\001', '\002'):
+            oid, tid, data, next_oid = self._storage.record_iternext(key)
+            self.assertEqual(oid, ('\000' * 7) + x)
+            key = next_oid
+            expected_data, expected_tid = self._storage.load(oid, '')
+            self.assertEqual(expected_data, data)
+            self.assertEqual(expected_tid, tid)
+            if x == '\002':
+                self.assertEqual(next_oid, None)
+            else:
+                self.assertNotEqual(next_oid, None)
+
+
 class FileStorageRecoveryTest(
    StorageTestBase.StorageTestBase,
    RecoveryStorage.RecoveryStorage,

--- a/src/ZODB/tests/testfsIndex.py
+++ b/src/ZODB/tests/testfsIndex.py
@@ -15,7 +15,7 @@ import unittest
 import random

 from ZODB.fsIndex import fsIndex
-from ZODB.utils import p64
+from ZODB.utils import p64, z64


 class Test(unittest.TestCase):
@@ -130,6 +130,44 @@ class Test(unittest.TestCase):
            index_max = index.maxKey()
            self.assertEqual(index_max, correct_max)

+        index.clear()
+        a = '\000\000\000\000\000\001\000\000'
+        b = '\000\000\000\000\000\002\000\000'
+        c = '\000\000\000\000\000\003\000\000'
+        d = '\000\000\000\000\000\004\000\000'
+        index[a] = 1
+        index[c] = 2
+        self.assertEqual(index.maxKey(b), a)
+        self.assertEqual(index.maxKey(d), c)
+        self.assertRaises(ValueError, index.maxKey, z64)
+
+    def testMinKey(self):
+        index = self.index
+        index.clear()
+
+        # An empty index should complain.
+        self.assertRaises(ValueError, index.minKey)
+
+        # Now build up a tree with random values, and check maxKey at each
+        # step.
+        correct_min = "\xff" * 8   # bigger than anything we'll add
+        for i in range(1000):
+            key = p64(random.randrange(100000000))
+            index[key] = i
+            correct_min = min(correct_min, key)
+            index_min = index.minKey()
+            self.assertEqual(index_min, correct_min)
+
+        index.clear()
+        a = '\000\000\000\000\000\001\000\000'
+        b = '\000\000\000\000\000\002\000\000'
+        c = '\000\000\000\000\000\003\000\000'
+        d = '\000\000\000\000\000\004\000\000'
+        index[a] = 1
+        index[c] = 2
+        self.assertEqual(index.minKey(b), c)
+        self.assertRaises(ValueError, index.minKey, d)
+
 def test_suite():
    loader=unittest.TestLoader()
    return loader.loadTestsFromTestCase(Test)