Commit 476fc8eb authored by Tim Peters's avatar Tim Peters

More code and comment cleanups. Notable changes:

FileCache:  removed the currentsize attribute.  It was initialized
to 0 but never updated.  Some tests referenced it, but since it was
always 0 those tests weren't getting any good from it.  Don't
recall (or never knew) what its intended purpose may have been.

FileCache.remove():  Repaired major bug.  This mistakenly stored
the "this disk block is free now" status byte into the start of
the serialized Object record's end_tid field (incorrect seek
offset).  The in-memory structures were correct then, but got out
of synch with the disk file; the latter then still claimed to have
info for a "live" object revision, but that revision was actually
dead, and the info on disk had a corrupt end_tid value.
parent f97e6be8
...@@ -2,6 +2,12 @@ What's new in ZODB3 3.3.1? ...@@ -2,6 +2,12 @@ What's new in ZODB3 3.3.1?
========================== ==========================
Release date: xx-xxx-2004 Release date: xx-xxx-2004
ZEO client cache
----------------
- Fixed a bug wherein an object removed from the client cache didn't
properly mark the file slice it occupied as being available for reuse.
persistent persistent
---------- ----------
......
...@@ -447,15 +447,15 @@ class Object(object): ...@@ -447,15 +447,15 @@ class Object(object):
# A serialized Object on disk looks like: # A serialized Object on disk looks like:
# #
# offset # bytes value # offset # bytes value
# ------ ------- ----- # ------ ------- -----
# 0 8 end_tid; string # 0 8 end_tid; string
# 8 2 len(version); 2-byte signed int # 8 2 len(version); 2-byte signed int
# 10 4 len(data); 4-byte signed int # 10 4 len(data); 4-byte signed int
# 14 len(version) version; string # 14 len(version) version; string
# 14+len(version) len(data) the object pickle; string # 14+len(version) len(data) the object pickle; string
# 14+len(version)+ # 14+len(version)+
# len(data) 8 oid; string # len(data) 8 oid; string
# The serialization format uses an end tid of "\0" * 8 (z64), the least # The serialization format uses an end tid of "\0" * 8 (z64), the least
# 8-byte string, to represent None. It isn't possible for an end_tid # 8-byte string, to represent None. It isn't possible for an end_tid
...@@ -556,7 +556,7 @@ class Entry(object): ...@@ -556,7 +556,7 @@ class Entry(object):
## ##
# FileCache stores a cache in a single on-disk file. # FileCache stores a cache in a single on-disk file.
# #
# On-disk cache structure # On-disk cache structure.
# #
# The file begins with a 12-byte header. The first four bytes are the # The file begins with a 12-byte header. The first four bytes are the
# file's magic number - ZEC3 - indicating zeo cache version 3. The # file's magic number - ZEC3 - indicating zeo cache version 3. The
...@@ -583,13 +583,13 @@ ZEC3_HEADER_SIZE = 12 ...@@ -583,13 +583,13 @@ ZEC3_HEADER_SIZE = 12
# empty (size 0) blocks. # empty (size 0) blocks.
# XXX This needs a lot more hair. # Allocated blocks have more structure:
# The structure of an allocated block is more complicated:
# #
# 1 byte allocation status ('a'). # 1 byte allocation status ('a').
# 4 bytes block size, >I format. # 4 bytes block size, >I format.
# 16 bytes oid + tid, string. # 16 bytes oid + tid, string.
# size-OBJECT_HEADER_SIZE bytes, the object pickle. # size-OBJECT_HEADER_SIZE bytes, the serialization of an Object (see
# class Object for details).
OBJECT_HEADER_SIZE = 1 + 4 + 16 OBJECT_HEADER_SIZE = 1 + 4 + 16
...@@ -613,11 +613,19 @@ def sync(f): ...@@ -613,11 +613,19 @@ def sync(f):
class FileCache(object): class FileCache(object):
def __init__(self, maxsize, fpath, parent, reuse=True): def __init__(self, maxsize, fpath, parent, reuse=True):
# Maximum total of object sizes we keep in cache. # - `maxsize`: total size of the cache file, in bytes
# - `fpath`: filepath for the cache file, or None; see `reuse`
# - `parent`: the ClientCache this FileCache is part of
# - `reuse`: If true, and fpath is not None, and fpath names a
# file that exists, that pre-existing file is used (persistent
# cache). In all other cases a new file is created: a temp
# file if fpath is None, else with path fpath.
assert maxsize >= 1000 # although 1000 is still absurdly low
self.maxsize = maxsize self.maxsize = maxsize
# Current total of object sizes in cache.
self.currentsize = 0
self.parent = parent self.parent = parent
# tid for the most recent transaction we know about. This is also
# stored near the start of the file.
self.tid = None self.tid = None
# Map offset in file to pair (data record size, Entry). # Map offset in file to pair (data record size, Entry).
...@@ -625,10 +633,12 @@ class FileCache(object): ...@@ -625,10 +633,12 @@ class FileCache(object):
# filemap always contains a complete account of what's in the # filemap always contains a complete account of what's in the
# file -- study method _verify_filemap for executable checking # file -- study method _verify_filemap for executable checking
# of the relevant invariants. An offset is at the start of a # of the relevant invariants. An offset is at the start of a
# block iff it's a key in filemap. # block iff it's a key in filemap. The data record size is
# stored in the file too, so we could just seek to the offset
# and read it up; keeping it in memory too is an optimization.
self.filemap = {} self.filemap = {}
# Map key to Entry. There's one entry for each object in the # Map key to Entry. There's one Entry for each object in the
# cache file. After # cache file. After
# obj = key2entry[key] # obj = key2entry[key]
# then # then
...@@ -641,6 +651,12 @@ class FileCache(object): ...@@ -641,6 +651,12 @@ class FileCache(object):
# currentofs. # currentofs.
self.currentofs = ZEC3_HEADER_SIZE self.currentofs = ZEC3_HEADER_SIZE
# self.new is false iff we're reusing an existing file.
# self.f is the open file object.
# When we're not reusing an existing file, self.f is left None
# here -- the scan() method must be called then to open the file
# (and it sets self.f).
self.fpath = fpath self.fpath = fpath
if not reuse or not fpath or not os.path.exists(fpath): if not reuse or not fpath or not os.path.exists(fpath):
self.new = True self.new = True
...@@ -663,17 +679,19 @@ class FileCache(object): ...@@ -663,17 +679,19 @@ class FileCache(object):
self.filemap[ZEC3_HEADER_SIZE] = (self.maxsize - ZEC3_HEADER_SIZE, self.filemap[ZEC3_HEADER_SIZE] = (self.maxsize - ZEC3_HEADER_SIZE,
None) None)
else: else:
# Reuse an existing file. scan() will open & read it.
self.new = False self.new = False
assert fpath
self.f = None self.f = None
# Statistics: _n_adds, _n_added_bytes, # Statistics: _n_adds, _n_added_bytes,
# _n_evicts, _n_evicted_bytes # _n_evicts, _n_evicted_bytes
self.clearStats() self.clearStats()
##
# Scan the current contents of the cache file, calling install # Scan the current contents of the cache file, calling install
# for each object found in the cache. This method should only # for each object found in the cache. This method should only
# be called once to initialize the cache from disk. # be called once to initialize the cache from disk.
def scan(self, install): def scan(self, install):
if self.new: if self.new:
return return
...@@ -683,6 +701,8 @@ class FileCache(object): ...@@ -683,6 +701,8 @@ class FileCache(object):
if _magic != magic: if _magic != magic:
raise ValueError("unexpected magic number: %r" % _magic) raise ValueError("unexpected magic number: %r" % _magic)
self.tid = self.f.read(8) self.tid = self.f.read(8)
if len(self.tid) != 8:
raise ValueError("cache file too small -- no tid at start")
# Remember the largest free block. That seems a # Remember the largest free block. That seems a
# decent place to start currentofs. # decent place to start currentofs.
max_free_size = max_free_offset = 0 max_free_size = max_free_offset = 0
...@@ -702,7 +722,8 @@ class FileCache(object): ...@@ -702,7 +722,8 @@ class FileCache(object):
elif status in '1234': elif status in '1234':
size = int(status) size = int(status)
else: else:
assert 0, hex(ord(status)) raise ValueError("unknown status byte value %s in client "
"cache file" % 0, hex(ord(status)))
self.filemap[ofs] = size, ent self.filemap[ofs] = size, ent
if ent is None and size > max_free_size: if ent is None and size > max_free_size:
...@@ -710,7 +731,9 @@ class FileCache(object): ...@@ -710,7 +731,9 @@ class FileCache(object):
ofs += size ofs += size
assert ofs == fsize if ofs != fsize:
raise ValueError("final offset %s != file size %s in client "
"cache file" % (ofs, fsize))
if __debug__: if __debug__:
self._verify_filemap() self._verify_filemap()
self.currentofs = max_free_offset self.currentofs = max_free_offset
...@@ -728,35 +751,47 @@ class FileCache(object): ...@@ -728,35 +751,47 @@ class FileCache(object):
self._n_accesses self._n_accesses
) )
##
# The number of objects currently in the cache.
def __len__(self): def __len__(self):
return len(self.key2entry) return len(self.key2entry)
##
# Iterate over the objects in the cache, producing an Entry for each.
def __iter__(self): def __iter__(self):
return self.key2entry.itervalues() return self.key2entry.itervalues()
##
# Test whether an (oid, tid) pair is in the cache.
def __contains__(self, key): def __contains__(self, key):
return key in self.key2entry return key in self.key2entry
##
# Do all possible to ensure all bytes written to the file so far are
# actually on disk.
def sync(self): def sync(self):
sync(self.f) sync(self.f)
##
# Close the underlying file. No methods accessing the cache should be
# used after this.
def close(self): def close(self):
if self.f: if self.f:
self.sync() self.sync()
self.f.close() self.f.close()
self.f = None self.f = None
##
# Evict objects as necessary to free up at least nbytes bytes, # Evict objects as necessary to free up at least nbytes bytes,
# starting at currentofs. If currentofs is closer than nbytes to # starting at currentofs. If currentofs is closer than nbytes to
# the end of the file, currentofs is reset to 0. The number of # the end of the file, currentofs is reset to ZEC3_HEADER_SIZE first.
# bytes actually freed may be (and probably will be) greater than # The number of bytes actually freed may be (and probably will be)
# nbytes, and is _makeroom's return value. The file is not # greater than nbytes, and is _makeroom's return value. The file is not
# altered by _makeroom. filemap is updated to reflect the # altered by _makeroom. filemap is updated to reflect the
# evictions, and it's the caller's responsibilty both to fiddle # evictions, and it's the caller's responsibilty both to fiddle
# the file, and to update filemap, to account for all the space # the file, and to update filemap, to account for all the space
# freed (starting at currentofs when _makeroom returns, and # freed (starting at currentofs when _makeroom returns, and
# spanning the number of bytes retured by _makeroom). # spanning the number of bytes retured by _makeroom).
def _makeroom(self, nbytes): def _makeroom(self, nbytes):
assert 0 < nbytes <= self.maxsize assert 0 < nbytes <= self.maxsize
if self.currentofs + nbytes > self.maxsize: if self.currentofs + nbytes > self.maxsize:
...@@ -770,11 +805,11 @@ class FileCache(object): ...@@ -770,11 +805,11 @@ class FileCache(object):
nbytes -= size nbytes -= size
return ofs - self.currentofs return ofs - self.currentofs
##
# Write Object obj, with data, to file starting at currentofs. # Write Object obj, with data, to file starting at currentofs.
# nfreebytes are already available for overwriting, and it's # nfreebytes are already available for overwriting, and it's
# guranteed that's enough. obj.offset is changed to reflect the # guranteed that's enough. obj.offset is changed to reflect the
# new data record position, and filemap is updated to match. # new data record position, and filemap is updated to match.
def _writeobj(self, obj, nfreebytes): def _writeobj(self, obj, nfreebytes):
size = OBJECT_HEADER_SIZE + obj.size size = OBJECT_HEADER_SIZE + obj.size
assert size <= nfreebytes assert size <= nfreebytes
...@@ -806,12 +841,20 @@ class FileCache(object): ...@@ -806,12 +841,20 @@ class FileCache(object):
# written. # written.
self.filemap[self.currentofs] = excess, None self.filemap[self.currentofs] = excess, None
##
# Add Object object to the cache. This may evict existing objects, to
# make room (and almost certainly will, in steady state once the cache
# is first full). The object must not already be in the cache.
def add(self, object): def add(self, object):
size = OBJECT_HEADER_SIZE + object.size size = OBJECT_HEADER_SIZE + object.size
if size > self.maxsize: # A number of cache simulation experiments all concluded that the
# 2nd-level ZEO cache got a much higher hit rate if "very large"
# objects simply weren't cached. For now, we ignore the request
# only if the entire cache file is too small to hold the object.
if size > self.maxsize - ZEC3_HEADER_SIZE:
return return
assert size <= self.maxsize
assert size <= self.maxsize
assert object.key not in self.key2entry assert object.key not in self.key2entry
assert len(object.key[0]) == 8 assert len(object.key[0]) == 8
assert len(object.key[1]) == 8 assert len(object.key[1]) == 8
...@@ -822,31 +865,12 @@ class FileCache(object): ...@@ -822,31 +865,12 @@ class FileCache(object):
available = self._makeroom(size) available = self._makeroom(size)
self._writeobj(object, available) self._writeobj(object, available)
def _verify_filemap(self, display=False): ##
a = ZEC3_HEADER_SIZE # Evict the object represented by Entry `e` from the cache, freeing
f = self.f # `size` bytes in the file for reuse. `size` is used only for summary
while a < self.maxsize: # statistics. This does not alter the file, or self.filemap or
f.seek(a) # self.key2entry (those are the caller's responsibilities). It does
status = f.read(1) # invoke _evicted(Object) on our parent.
if status in 'af':
size, = struct.unpack(">I", f.read(4))
else:
size = int(status)
if display:
if a == self.currentofs:
print '*****',
print "%c%d" % (status, size),
size2, obj = self.filemap[a]
assert size == size2
assert (obj is not None) == (status == 'a')
if obj is not None:
assert obj.offset == a
assert self.key2entry[obj.key] is obj
a += size
if display:
print
assert a == self.maxsize
def _evictobj(self, e, size): def _evictobj(self, e, size):
self._n_evicts += 1 self._n_evicts += 1
self._n_evicted_bytes += size self._n_evicted_bytes += size
...@@ -857,8 +881,7 @@ class FileCache(object): ...@@ -857,8 +881,7 @@ class FileCache(object):
self.parent._evicted(o) self.parent._evicted(o)
## ##
# Return object for key or None if not in cache. # Return Object for key, or None if not in cache.
def access(self, key): def access(self, key):
self._n_accesses += 1 self._n_accesses += 1
e = self.key2entry.get(key) e = self.key2entry.get(key)
...@@ -872,8 +895,7 @@ class FileCache(object): ...@@ -872,8 +895,7 @@ class FileCache(object):
return Object.fromFile(self.f, key) return Object.fromFile(self.f, key)
## ##
# Remove object for key from cache, if present. # Remove Object for key from cache, if present.
def remove(self, key): def remove(self, key):
# If an object is being explicitly removed, we need to load # If an object is being explicitly removed, we need to load
# its header into memory and write a free block marker to the # its header into memory and write a free block marker to the
...@@ -888,24 +910,33 @@ class FileCache(object): ...@@ -888,24 +910,33 @@ class FileCache(object):
return return
offset = e.offset offset = e.offset
size, e2 = self.filemap[offset] size, e2 = self.filemap[offset]
assert size >= 5 # only free blocks are tiny
self.f.seek(offset + OBJECT_HEADER_SIZE) self.f.seek(offset + OBJECT_HEADER_SIZE)
o = Object.fromFile(self.f, key, header_only=True) o = Object.fromFile(self.f, key, header_only=True)
self.f.seek(offset + OBJECT_HEADER_SIZE) # Because `size` >= 5, we can change an allocated block to a free
# block just by overwriting the 'a' status byte with 'f' -- the
# size field stays the same.
self.f.seek(offset)
self.f.write('f') self.f.write('f')
self.f.flush() self.f.flush()
self.parent._evicted(o) self.parent._evicted(o)
self.filemap[offset] = size, None self.filemap[offset] = size, None
## ##
# Update on-disk representation of obj. # Update on-disk representation of Object obj.
# #
# This method should be called when the object header is modified. # This method should be called when the object header is modified.
# obj must be in the cache.
def update(self, obj): def update(self, obj):
e = self.key2entry[obj.key] e = self.key2entry[obj.key]
self.f.seek(e.offset + OBJECT_HEADER_SIZE) self.f.seek(e.offset + OBJECT_HEADER_SIZE)
obj.serialize_header(self.f) obj.serialize_header(self.f)
##
# Update our idea of the most recent tid. This is stored in the
# instance, and also written out near the start of the cache file. The
# new tid must be strictly greater than our current idea of the most
# recent tid.
def settid(self, tid): def settid(self, tid):
if self.tid is not None and tid <= self.tid: if self.tid is not None and tid <= self.tid:
raise ValueError("new last tid (%s) must be greater than " raise ValueError("new last tid (%s) must be greater than "
...@@ -913,6 +944,34 @@ class FileCache(object): ...@@ -913,6 +944,34 @@ class FileCache(object):
u64(self.tid))) u64(self.tid)))
assert isinstance(tid, str) and len(tid) == 8 assert isinstance(tid, str) and len(tid) == 8
self.tid = tid self.tid = tid
self.f.seek(4) self.f.seek(len(magic))
self.f.write(tid) self.f.write(tid)
self.f.flush() self.f.flush()
##
# This debug method marches over the entire cache file, verifying that
# the current contents match the info in self.filemap and self.key2entry.
def _verify_filemap(self, display=False):
a = ZEC3_HEADER_SIZE
f = self.f
while a < self.maxsize:
f.seek(a)
status = f.read(1)
if status in 'af':
size, = struct.unpack(">I", f.read(4))
else:
size = int(status)
if display:
if a == self.currentofs:
print '*****',
print "%c%d" % (status, size),
size2, obj = self.filemap[a]
assert size == size2
assert (obj is not None) == (status == 'a')
if obj is not None:
assert obj.offset == a
assert self.key2entry[obj.key] is obj
a += size
if display:
print
assert a == self.maxsize
...@@ -114,13 +114,11 @@ class CacheTests(unittest.TestCase): ...@@ -114,13 +114,11 @@ class CacheTests(unittest.TestCase):
n = p64(i) n = p64(i)
self.cache.store(n, "", n, None, data[i]) self.cache.store(n, "", n, None, data[i])
self.assertEquals(len(self.cache), i + 1) self.assertEquals(len(self.cache), i + 1)
self.assert_(self.cache.fc.currentsize < maxsize)
# The cache now uses 1225 bytes. The next insert # The cache now uses 1225 bytes. The next insert
# should delete some objects. # should delete some objects.
n = p64(50) n = p64(50)
self.cache.store(n, "", n, None, data[51]) self.cache.store(n, "", n, None, data[51])
self.assert_(len(self.cache) < 51) self.assert_(len(self.cache) < 51)
self.assert_(self.cache.fc.currentsize <= maxsize)
# XXX Need to make sure eviction of non-current data # XXX Need to make sure eviction of non-current data
# and of version data are handled correctly. # and of version data are handled correctly.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment