Commit 55983988 authored by Chris McDonough's avatar Chris McDonough

Add a record iteration protocol to FileStorage. You can use the record...

Add a record iteration protocol to FileStorage.  You can use the record iterator to iterate over all current revisions
of data pickles in the storage.

In order to support calling via ZEO, we don't implement this as an actual iterator.  An example of using the record iterator
protocol is as follows:

storage = FileStorage('anexisting.fs')
next_oid = None
while 1:
    oid, tid, data, next_oid = storage.record_iternext(next_oid)
    # do something with oid, tid and data
    if next_oid is None:
        break

The behavior of the iteration protocol is now to iterate over all current records in the database in ascending oid order, 
although this is not a promise to do so in the future.

parent ecaa428c
......@@ -371,8 +371,13 @@ class FileStorage(BaseStorage.BaseStorage,
return None
pos = long(pos)
if isinstance(index, DictType):
# Convert to fsIndex.
if (
isinstance(index, DictType) or
(isinstance(index, fsIndex) and isinstance(index._data, DictType))
):
# Convert dictionary indexes to fsIndexes *or* convert fsIndexes
# which have a DictType `_data` attribute to a new fsIndex (newer
# fsIndexes have an OOBTree as `_data`)
newindex = fsIndex()
newindex.update(index)
index = newindex
......@@ -1397,6 +1402,19 @@ class FileStorage(BaseStorage.BaseStorage,
if e.errno != errno.ENOENT:
raise
def record_iternext(self, next=None):
index = self._index
oid = index.minKey(next)
try:
next_oid = index.minKey(self.new_oid(oid))
except ValueError: # "empty tree" error
next_oid = None
data, tid = self.load(oid, None) # ignore versions
return oid, tid, data, next_oid
def shift_transactions_forward(index, vindex, tindex, file, pos, opos):
"""Copy transactions forward in the data file
......
......@@ -39,6 +39,7 @@
import struct
from BTrees._fsBTree import fsBucket
from BTrees.OOBTree import OOBTree
# convert between numbers and six-byte strings
......@@ -48,10 +49,18 @@ def num2str(n):
def str2num(s):
return struct.unpack(">Q", "\000\000" + s)[0]
def prefix_plus_one(s):
num = str2num(s)
return num2str(num + 1)
def prefix_minus_one(s):
num = str2num(s)
return num2str(num - 1)
class fsIndex(object):
def __init__(self):
self._data = {}
self._data = OOBTree()
def __getitem__(self, key):
return str2num(self._data[key[:6]][key[6:]])
......@@ -126,31 +135,61 @@ class fsIndex(object):
def values(self):
return list(self.itervalues())
def maxKey(self):
# This is less general than the BTree method of the same name: we
# only care about the largest key in the entire tree. By
# construction, that's the largest oid in use in the associated
# FileStorage.
keys = self._data.keys()
if not keys:
# This is the same exception a BTree maxKey() raises when the
# tree is empty.
raise ValueError("empty tree")
# We expect that keys is small, since each fsBTree in _data.values()
# can hold as many as 2**16 = 64K entries. So this max() should go
# fast too. Regardless, there's no faster way to find the largest
# prefix.
biggest_prefix = max(keys)
# Comment below applies for the following minKey and maxKey methods
#
# Obscure: what if `tree` is actually empty? We're relying here on
# that this class doesn't implement __delitem__: once a key gets
# into an fsIndex, the only way it can go away is by invoking
# clear(). Therefore nothing in _data.values() is ever empty.
#
# Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are
# very efficient.
def minKey(self, key=None):
if key is None:
smallest_prefix = self._data.minKey()
else:
smallest_prefix = self._data.minKey(key[:6])
tree = self._data[smallest_prefix]
assert tree
if key is None:
smallest_suffix = tree.minKey()
else:
try:
smallest_suffix = tree.minKey(key[6:])
except ValueError: # 'empty tree' (no suffix >= arg)
next_prefix = prefix_plus_one(smallest_prefix)
smallest_prefix = self._data.minKey(next_prefix)
tree = self._data[smallest_prefix]
assert tree
smallest_suffix = tree.minKey()
return smallest_prefix + smallest_suffix
def maxKey(self, key=None):
if key is None:
biggest_prefix = self._data.maxKey()
else:
biggest_prefix = self._data.maxKey(key[:6])
tree = self._data[biggest_prefix]
# Obscure: what if tree is actually empty? We're relying here on
# that this class doesn't implement __delitem__: once a key gets
# into an fsIndex, the only way it can go away is by invoking
# clear(). Therefore nothing in _data.values() is ever empty.
#
# Note that because `tree` is an fsBTree, its maxKey() method is very
# efficient.
assert tree
return biggest_prefix + tree.maxKey()
if key is None:
biggest_suffix = tree.maxKey()
else:
try:
biggest_suffix = tree.maxKey(key[6:])
except ValueError: # 'empty tree' (no suffix <= arg)
next_prefix = prefix_minus_one(biggest_prefix)
biggest_prefix = self._data.maxKey(next_prefix)
tree = self._data[biggest_prefix]
assert tree
biggest_suffix = tree.maxKey()
return biggest_prefix + biggest_suffix
......@@ -137,6 +137,41 @@ class FileStorageTests(
# Python dict.
self.check_conversion_to_fsIndex(read_only=True)
def check_conversion_from_dict_to_btree_data_in_fsIndex(self):
# To support efficient range searches on its keys as part of
# implementing a record iteration protocol in FileStorage, we
# converted the fsIndex class from using a dictionary as its
# self._data attribute to using an OOBTree in its stead.
from ZODB.fsIndex import fsIndex
from BTrees.OOBTree import OOBTree
# Create some data, and remember the index.
for i in range(10):
self._dostore()
data_dict = dict(self._storage._index._data)
# Replace the OOBTree with a dictionary and commit it.
self._storage._index._data = data_dict
get_transaction().commit()
# Save the index.
self._storage.close()
# Verify it's converted to fsIndex in memory upon open.
self.open()
self.assert_(isinstance(self._storage._index, fsIndex))
self.assert_(isinstance(self._storage._index._data, OOBTree))
# Verify it has the right content.
new_data_dict = dict(self._storage._index._data)
self.assertEqual(len(data_dict), len(new_data_dict))
for k in data_dict:
old_tree = data_dict[k]
new_tree = new_data_dict[k]
self.assertEqual(list(old_tree.items()), list(new_tree.items()))
def check_save_after_load_with_no_index(self):
for i in range(10):
self._dostore()
......@@ -288,6 +323,35 @@ class FileStorageTests(
else:
self.fail("expected CorruptedError")
def check_record_iternext(self):
from ZODB.DB import DB
db = DB(self._storage)
conn = db.open()
conn.root()['abc'] = MinPO('abc')
conn.root()['xyz'] = MinPO('xyz')
get_transaction().commit()
# Ensure it's all on disk.
db.close()
self._storage.close()
self.open()
key = None
for x in ('\000', '\001', '\002'):
oid, tid, data, next_oid = self._storage.record_iternext(key)
self.assertEqual(oid, ('\000' * 7) + x)
key = next_oid
expected_data, expected_tid = self._storage.load(oid, '')
self.assertEqual(expected_data, data)
self.assertEqual(expected_tid, tid)
if x == '\002':
self.assertEqual(next_oid, None)
else:
self.assertNotEqual(next_oid, None)
class FileStorageRecoveryTest(
StorageTestBase.StorageTestBase,
RecoveryStorage.RecoveryStorage,
......
......@@ -15,7 +15,7 @@ import unittest
import random
from ZODB.fsIndex import fsIndex
from ZODB.utils import p64
from ZODB.utils import p64, z64
class Test(unittest.TestCase):
......@@ -130,6 +130,44 @@ class Test(unittest.TestCase):
index_max = index.maxKey()
self.assertEqual(index_max, correct_max)
index.clear()
a = '\000\000\000\000\000\001\000\000'
b = '\000\000\000\000\000\002\000\000'
c = '\000\000\000\000\000\003\000\000'
d = '\000\000\000\000\000\004\000\000'
index[a] = 1
index[c] = 2
self.assertEqual(index.maxKey(b), a)
self.assertEqual(index.maxKey(d), c)
self.assertRaises(ValueError, index.maxKey, z64)
def testMinKey(self):
index = self.index
index.clear()
# An empty index should complain.
self.assertRaises(ValueError, index.minKey)
# Now build up a tree with random values, and check maxKey at each
# step.
correct_min = "\xff" * 8 # bigger than anything we'll add
for i in range(1000):
key = p64(random.randrange(100000000))
index[key] = i
correct_min = min(correct_min, key)
index_min = index.minKey()
self.assertEqual(index_min, correct_min)
index.clear()
a = '\000\000\000\000\000\001\000\000'
b = '\000\000\000\000\000\002\000\000'
c = '\000\000\000\000\000\003\000\000'
d = '\000\000\000\000\000\004\000\000'
index[a] = 1
index[c] = 2
self.assertEqual(index.minKey(b), c)
self.assertRaises(ValueError, index.minKey, d)
def test_suite():
loader=unittest.TestLoader()
return loader.loadTestsFromTestCase(Test)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment