Commit f97e6be8 authored by Tim Peters's avatar Tim Peters

Many small comment and code improvements.

parent 2a65e10b
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
############################################################################## ##############################################################################
"""Disk-based client cache for ZEO. """Disk-based client cache for ZEO.
ClientCache exposes an API used by the ZEO client storage. FileCache ClientCache exposes an API used by the ZEO client storage. FileCache stores
stores objects one disk using a 2-tuple of oid and tid as key. objects on disk using a 2-tuple of oid and tid as key.
The upper cache's API is similar to a storage API with methods like The upper cache's API is similar to a storage API with methods like load(),
load(), store(), and invalidate(). It manages in-memory data store(), and invalidate(). It manages in-memory data structures that allow
structures that allow it to map this richer API onto the simple it to map this richer API onto the simple key-based API of the lower-level
key-based API of the lower-level cache. cache.
""" """
import bisect import bisect
...@@ -55,8 +55,8 @@ from ZODB.utils import z64, u64 ...@@ -55,8 +55,8 @@ from ZODB.utils import z64, u64
# <p> # <p>
# When the client is connected to the server, it receives # When the client is connected to the server, it receives
# invalidations every time an object is modified. Whe the client is # invalidations every time an object is modified. Whe the client is
# disconnected, it must perform cache verification to make sure its # disconnected then reconnect, it must perform cache verification to make
# cached data is synchronized with the storage's current state. # sure its cached data is synchronized with the storage's current state.
# <p> # <p>
# quick verification # quick verification
# full verification # full verification
...@@ -422,79 +422,118 @@ class ClientCache: ...@@ -422,79 +422,118 @@ class ClientCache:
# in the header used by the cache's storage format. # in the header used by the cache's storage format.
class Object(object): class Object(object):
__slots__ = (# pair, object id, txn id -- something usable as a dict key __slots__ = (# pair (object id, txn id) -- something usable as a dict key;
# the second part of the part is equal to start_tid below # the second part of the pair is equal to start_tid
"key", "key",
"start_tid", # string, id of txn that wrote the data # string, tid of txn that wrote the data
"end_tid", # string, id of txn that wrote next revision "start_tid",
# or None
"version", # string, name of version
"data", # string, the actual data record for the object
"size", # total size of serialized object # string, tid of txn that wrote next revision, or None
# if the data is current; if not None, end_tid is strictly
# greater than start_tid
"end_tid",
# string, name of version
"version",
# string, the actual data record for the object
"data",
# total size of serialized object; this includes the
# data, version, and all overhead (header) bytes.
"size",
) )
# A serialized Object on disk looks like:
#
# offset # bytes value
# ------ ------- -----
# 0 8 end_tid; string
# 8 2 len(version); 2-byte signed int
# 10 4 len(data); 4-byte signed int
# 14 len(version) version; string
# 14+len(version) len(data) the object pickle; string
# 14+len(version)+
# len(data) 8 oid; string
# The serialization format uses an end tid of "\0" * 8 (z64), the least
# 8-byte string, to represent None. It isn't possible for an end_tid
# to be 0, because it must always be strictly greater than the start_tid.
fmt = ">8shi" # end_tid, len(self.version), len(self.data)
FIXED_HEADER_SIZE = struct.calcsize(fmt)
assert FIXED_HEADER_SIZE == 14
TOTAL_FIXED_SIZE = FIXED_HEADER_SIZE + 8 # +8 for the oid at the end
def __init__(self, key, version, data, start_tid, end_tid): def __init__(self, key, version, data, start_tid, end_tid):
self.key = key self.key = key
self.version = version self.version = version
self.data = data self.data = data
self.start_tid = start_tid self.start_tid = start_tid
self.end_tid = end_tid self.end_tid = end_tid
# The size of a the serialized object on disk, include the # The size of a the serialized object on disk, including the
# 14-byte header, the length of data and version, and a # 14-byte header, the lengths of data and version, and a
# copy of the 8-byte oid. # copy of the 8-byte oid.
if data is not None: if data is not None:
self.size = 22 + len(data) + len(version) self.size = self.TOTAL_FIXED_SIZE + len(data) + len(version)
# The serialization format uses an end tid of "\0" * 8, the least
# 8-byte string, to represent None. It isn't possible for an
# end_tid to be 0, because it must always be strictly greater
# than the start_tid.
fmt = ">8shi" def get_header(self):
# Return just the fixed-size serialization header.
return struct.pack(self.fmt,
self.end_tid or z64,
len(self.version),
len(self.data))
def serialize(self, f): def serialize(self, f):
# Write standard form of Object to file, f. # Write standard form of Object to file f.
self.serialize_header(f) f.writelines([self.get_header(),
f.write(self.data) self.version,
f.write(self.key[0]) self.data,
self.key[0]])
def serialize_header(self, f): def serialize_header(self, f):
s = struct.pack(self.fmt, self.end_tid or "\0" * 8, # Write the fixed-sized serialization header, + the version.
len(self.version), len(self.data)) # Why is the version part of this?
f.write(s) f.writelines([self.get_header(), self.version])
f.write(self.version)
# fromFile is a class constructor, unserializing an Object from the
# current position in file f. Exclusive access to f for the duration
# is assumed. The key is a (start_tid, oid) pair, and the oid must
# match the serialized oid. If header_only is true, .data is left
# None in the Object returned.
def fromFile(cls, f, key, header_only=False): def fromFile(cls, f, key, header_only=False):
s = f.read(struct.calcsize(cls.fmt)) s = f.read(cls.FIXED_HEADER_SIZE)
if not s: if not s:
return None return None
oid, start_tid = key oid, start_tid = key
end_tid, vlen, dlen = struct.unpack(cls.fmt, s) end_tid, vlen, dlen = struct.unpack(cls.fmt, s)
if end_tid == z64: if end_tid == z64:
end_tid = None end_tid = None
version = f.read(vlen) version = f.read(vlen)
if vlen != len(version): if vlen != len(version):
raise ValueError("corrupted record, version") raise ValueError("corrupted record, version")
if header_only: if header_only:
data = None data = None
f.seek(dlen, 1)
else: else:
data = f.read(dlen) data = f.read(dlen)
if dlen != len(data): if dlen != len(data):
raise ValueError("corrupted record, data") raise ValueError("corrupted record, data")
s = f.read(8)
if s != oid: s = f.read(8)
raise ValueError("corrupted record, oid") if s != oid:
raise ValueError("corrupted record, oid")
return cls((oid, start_tid), version, data, start_tid, end_tid) return cls((oid, start_tid), version, data, start_tid, end_tid)
fromFile = classmethod(fromFile) fromFile = classmethod(fromFile)
def sync(f):
f.flush()
if hasattr(os, 'fsync'):
os.fsync(f.fileno())
# Entry just associates a key with a file offset. It's used by FileCache.
class Entry(object): class Entry(object):
__slots__ = (# object key -- something usable as a dict key. __slots__ = (# object key -- something usable as a dict key.
'key', 'key',
...@@ -513,9 +552,6 @@ class Entry(object): ...@@ -513,9 +552,6 @@ class Entry(object):
self.offset = offset self.offset = offset
magic = "ZEC3"
OBJECT_HEADER_SIZE = 1 + 4 + 16
## ##
# FileCache stores a cache in a single on-disk file. # FileCache stores a cache in a single on-disk file.
...@@ -525,9 +561,12 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16 ...@@ -525,9 +561,12 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16
# The file begins with a 12-byte header. The first four bytes are the # The file begins with a 12-byte header. The first four bytes are the
# file's magic number - ZEC3 - indicating zeo cache version 3. The # file's magic number - ZEC3 - indicating zeo cache version 3. The
# next eight bytes are the last transaction id. # next eight bytes are the last transaction id.
#
# The file is a contiguous sequence of blocks. All blocks begin with magic = "ZEC3"
# a one-byte status indicator: ZEC3_HEADER_SIZE = 12
# After the header, the file contains a contiguous sequence of blocks. All
# blocks begin with a one-byte status indicator:
# #
# 'a' # 'a'
# Allocated. The block holds an object; the next 4 bytes are >I # Allocated. The block holds an object; the next 4 bytes are >I
...@@ -540,10 +579,6 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16 ...@@ -540,10 +579,6 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16
# '1', '2', '3', '4' # '1', '2', '3', '4'
# The block is free, and consists of 1, 2, 3 or 4 bytes total. # The block is free, and consists of 1, 2, 3 or 4 bytes total.
# #
# 'Z'
# File header. The file starts with a magic number, currently
# 'ZEC3' and an 8-byte transaction id.
#
# "Total" includes the status byte, and size bytes. There are no # "Total" includes the status byte, and size bytes. There are no
# empty (size 0) blocks. # empty (size 0) blocks.
...@@ -556,6 +591,8 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16 ...@@ -556,6 +591,8 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16
# 16 bytes oid + tid, string. # 16 bytes oid + tid, string.
# size-OBJECT_HEADER_SIZE bytes, the object pickle. # size-OBJECT_HEADER_SIZE bytes, the object pickle.
OBJECT_HEADER_SIZE = 1 + 4 + 16
# The cache's currentofs goes around the file, circularly, forever. # The cache's currentofs goes around the file, circularly, forever.
# It's always the starting offset of some block. # It's always the starting offset of some block.
# #
...@@ -564,10 +601,14 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16 ...@@ -564,10 +601,14 @@ OBJECT_HEADER_SIZE = 1 + 4 + 16
# blocks needed to make enough room for the new object are evicted, # blocks needed to make enough room for the new object are evicted,
# starting at currentofs. Exception: if currentofs is close enough # starting at currentofs. Exception: if currentofs is close enough
# to the end of the file that the new object can't fit in one # to the end of the file that the new object can't fit in one
# contiguous chunk, currentofs is reset to 0 first. # contiguous chunk, currentofs is reset to ZEC3_HEADER_SIZE first.
# Do all possible to ensure that the bytes we wrote are really on # Do all possible to ensure that the bytes we wrote to file f are really on
# disk. # disk.
def sync(f):
f.flush()
if hasattr(os, 'fsync'):
os.fsync(f.fileno())
class FileCache(object): class FileCache(object):
...@@ -598,13 +639,13 @@ class FileCache(object): ...@@ -598,13 +639,13 @@ class FileCache(object):
# Always the offset into the file of the start of a block. # Always the offset into the file of the start of a block.
# New and relocated objects are always written starting at # New and relocated objects are always written starting at
# currentofs. # currentofs.
self.currentofs = 12 self.currentofs = ZEC3_HEADER_SIZE
self.fpath = fpath self.fpath = fpath
if not reuse or not fpath or not os.path.exists(fpath): if not reuse or not fpath or not os.path.exists(fpath):
self.new = True self.new = True
if fpath: if fpath:
self.f = file(fpath, 'wb+') self.f = open(fpath, 'wb+')
else: else:
self.f = tempfile.TemporaryFile() self.f = tempfile.TemporaryFile()
# Make sure the OS really saves enough bytes for the file. # Make sure the OS really saves enough bytes for the file.
...@@ -616,9 +657,11 @@ class FileCache(object): ...@@ -616,9 +657,11 @@ class FileCache(object):
self.f.write(magic) self.f.write(magic)
self.f.write(z64) self.f.write(z64)
# and one free block. # and one free block.
self.f.write('f' + struct.pack(">I", self.maxsize - 12)) self.f.write('f' + struct.pack(">I", self.maxsize -
ZEC3_HEADER_SIZE))
self.sync() self.sync()
self.filemap[12] = self.maxsize - 12, None self.filemap[ZEC3_HEADER_SIZE] = (self.maxsize - ZEC3_HEADER_SIZE,
None)
else: else:
self.new = False self.new = False
self.f = None self.f = None
...@@ -635,7 +678,7 @@ class FileCache(object): ...@@ -635,7 +678,7 @@ class FileCache(object):
if self.new: if self.new:
return return
fsize = os.path.getsize(self.fpath) fsize = os.path.getsize(self.fpath)
self.f = file(self.fpath, 'rb+') self.f = open(self.fpath, 'rb+')
_magic = self.f.read(4) _magic = self.f.read(4)
if _magic != magic: if _magic != magic:
raise ValueError("unexpected magic number: %r" % _magic) raise ValueError("unexpected magic number: %r" % _magic)
...@@ -643,7 +686,7 @@ class FileCache(object): ...@@ -643,7 +686,7 @@ class FileCache(object):
# Remember the largest free block. That seems a # Remember the largest free block. That seems a
# decent place to start currentofs. # decent place to start currentofs.
max_free_size = max_free_offset = 0 max_free_size = max_free_offset = 0
ofs = 12 ofs = ZEC3_HEADER_SIZE
while ofs < fsize: while ofs < fsize:
self.f.seek(ofs) self.f.seek(ofs)
ent = None ent = None
...@@ -717,7 +760,7 @@ class FileCache(object): ...@@ -717,7 +760,7 @@ class FileCache(object):
def _makeroom(self, nbytes): def _makeroom(self, nbytes):
assert 0 < nbytes <= self.maxsize assert 0 < nbytes <= self.maxsize
if self.currentofs + nbytes > self.maxsize: if self.currentofs + nbytes > self.maxsize:
self.currentofs = 12 self.currentofs = ZEC3_HEADER_SIZE
ofs = self.currentofs ofs = self.currentofs
while nbytes > 0: while nbytes > 0:
size, e = self.filemap.pop(ofs) size, e = self.filemap.pop(ofs)
...@@ -780,7 +823,7 @@ class FileCache(object): ...@@ -780,7 +823,7 @@ class FileCache(object):
self._writeobj(object, available) self._writeobj(object, available)
def _verify_filemap(self, display=False): def _verify_filemap(self, display=False):
a = 12 a = ZEC3_HEADER_SIZE
f = self.f f = self.f
while a < self.maxsize: while a < self.maxsize:
f.seek(a) f.seek(a)
...@@ -859,7 +902,6 @@ class FileCache(object): ...@@ -859,7 +902,6 @@ class FileCache(object):
# This method should be called when the object header is modified. # This method should be called when the object header is modified.
def update(self, obj): def update(self, obj):
e = self.key2entry[obj.key] e = self.key2entry[obj.key]
self.f.seek(e.offset + OBJECT_HEADER_SIZE) self.f.seek(e.offset + OBJECT_HEADER_SIZE)
obj.serialize_header(self.f) obj.serialize_header(self.f)
...@@ -869,6 +911,7 @@ class FileCache(object): ...@@ -869,6 +911,7 @@ class FileCache(object):
raise ValueError("new last tid (%s) must be greater than " raise ValueError("new last tid (%s) must be greater than "
"previous one (%s)" % (u64(tid), "previous one (%s)" % (u64(tid),
u64(self.tid))) u64(self.tid)))
assert isinstance(tid, str) and len(tid) == 8
self.tid = tid self.tid = tid
self.f.seek(4) self.f.seek(4)
self.f.write(tid) self.f.write(tid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment