Commit cfe7eae1 authored by Chris McDonough's avatar Chris McDonough

- Get rid of streamsize attribute as well as redundant next method

  on BlobFile.

- Implement __del__ on BlobFile, which attempts to close the filehandle.

- Use a weak reference to the blobfile in the BlobDataManager so as to
  not keep the blobfile alive for longer than necessary (mainly to support
  the idiom of opening a blobfile without assigning it to a name).
  It is no longer necessary to explicitly close a blobfile.

- Raise IOError if an invalid mode is passed in to Blob.open.

- Add some comments about why things are the way they are.

 
parent ca575add
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import os import os
import time import time
import tempfile import tempfile
import weakref
from zope.interface import implements from zope.interface import implements
...@@ -22,7 +23,17 @@ class Blob(Persistent): ...@@ -22,7 +23,17 @@ class Blob(Persistent):
_p_blob_data = None _p_blob_data = None
def open(self, mode="r"): def open(self, mode="r"):
"""Returns a file(-like) object for handling the blob data.""" """ Returns a file(-like) object representing blob data. This
method will either return the file object, raise a BlobError
or an IOError. A file may be open for exclusive read any
number of times, but may not be opened simultaneously for read
and write during the course of a single transaction and may
not be opened for simultaneous writes during the course of a
single transaction. Additionally, the file handle which
results from this method call is unconditionally closed at
transaction boundaries and so may not be used across
transactions. """
result = None result = None
if (mode.startswith("r") or mode=="U"): if (mode.startswith("r") or mode=="U"):
...@@ -35,7 +46,7 @@ class Blob(Persistent): ...@@ -35,7 +46,7 @@ class Blob(Persistent):
self._p_blob_readers += 1 self._p_blob_readers += 1
result = BlobFile(self._current_filename(), mode, self) result = BlobFile(self._current_filename(), mode, self)
if mode.startswith("w"): elif mode.startswith("w"):
if self._p_blob_readers != 0: if self._p_blob_readers != 0:
raise BlobError, "Already opened for reading." raise BlobError, "Already opened for reading."
...@@ -45,7 +56,7 @@ class Blob(Persistent): ...@@ -45,7 +56,7 @@ class Blob(Persistent):
self._p_blob_writers += 1 self._p_blob_writers += 1
result = BlobFile(self._p_blob_uncommitted, mode, self) result = BlobFile(self._p_blob_uncommitted, mode, self)
if mode.startswith("a"): elif mode.startswith("a"):
if self._p_blob_readers != 0: if self._p_blob_readers != 0:
raise BlobError, "Already opened for reading." raise BlobError, "Already opened for reading."
...@@ -53,6 +64,7 @@ class Blob(Persistent): ...@@ -53,6 +64,7 @@ class Blob(Persistent):
# Create a new working copy # Create a new working copy
self._p_blob_uncommitted = utils.mktmp() self._p_blob_uncommitted = utils.mktmp()
uncommitted = BlobFile(self._p_blob_uncommitted, mode, self) uncommitted = BlobFile(self._p_blob_uncommitted, mode, self)
# NOTE: _p_blob data appears by virtue of Connection._setstate
utils.cp(file(self._p_blob_data), uncommitted) utils.cp(file(self._p_blob_data), uncommitted)
uncommitted.seek(0) uncommitted.seek(0)
else: else:
...@@ -62,6 +74,9 @@ class Blob(Persistent): ...@@ -62,6 +74,9 @@ class Blob(Persistent):
self._p_blob_writers +=1 self._p_blob_writers +=1
result = uncommitted result = uncommitted
else:
raise IOError, 'invalid mode: %s ' % mode
if result is not None: if result is not None:
# we register ourselves as a data manager with the # we register ourselves as a data manager with the
...@@ -79,6 +94,8 @@ class Blob(Persistent): ...@@ -79,6 +94,8 @@ class Blob(Persistent):
# utility methods # utility methods
def _current_filename(self): def _current_filename(self):
# NOTE: _p_blob_data and _p_blob_uncommitted appear by virtue of
# Connection._setstate
return self._p_blob_uncommitted or self._p_blob_data return self._p_blob_uncommitted or self._p_blob_data
def _change(self): def _change(self):
...@@ -103,7 +120,7 @@ class Blob(Persistent): ...@@ -103,7 +120,7 @@ class Blob(Persistent):
class BlobDataManager: class BlobDataManager:
"""Special data manager to handle transaction boundaries for blobs. """Special data manager to handle transaction boundaries for blobs.
Blobs need some special care taking on transaction boundaries. As Blobs need some special care-taking on transaction boundaries. As
a) the ghost objects might get reused, the _p_ reader and writer a) the ghost objects might get reused, the _p_ reader and writer
refcount attributes must be set to a consistent state refcount attributes must be set to a consistent state
...@@ -118,7 +135,10 @@ class BlobDataManager: ...@@ -118,7 +135,10 @@ class BlobDataManager:
def __init__(self, blob, filehandle): def __init__(self, blob, filehandle):
self.blob = blob self.blob = blob
self.filehandle = filehandle # we keep a weakref to the file handle because we don't want to
# keep it alive if all other references to it die (e.g. in the
# case it's opened without assigning it to a name).
self.fhref = weakref.ref(filehandle)
self.subtransaction = False self.subtransaction = False
self.sortkey = time.time() self.sortkey = time.time()
...@@ -143,12 +163,16 @@ class BlobDataManager: ...@@ -143,12 +163,16 @@ class BlobDataManager:
def commit(self, object, transaction): def commit(self, object, transaction):
if not self.subtransaction: if not self.subtransaction:
self.blob._rc_clear() # clear all blob refcounts self.blob._rc_clear() # clear all blob refcounts
self.filehandle.close() filehandle = self.fhref()
if filehandle is not None:
filehandle.close()
def abort(self, object, transaction): def abort(self, object, transaction):
if not self.subtransaction: if not self.subtransaction:
self.blob._rc_clear() self.blob._rc_clear()
self.filehandle.close() filehandle = self.fhref()
if filehandle is not None:
filehandle.close()
def sortKey(self): def sortKey(self):
return self.sortkey return self.sortkey
...@@ -160,16 +184,20 @@ class BlobDataManager: ...@@ -160,16 +184,20 @@ class BlobDataManager:
pass pass
class BlobFile(file): class BlobFile(file):
""" A BlobFile is a file that can be used within a transaction boundary """
""" A BlobFile is a file that can be used within a transaction
boundary; a BlobFile is just a Python file object, we only
override methods which cause a change to blob data in order to
call methods on our 'parent' persistent blob object signifying
that the change happened. """
# XXX those files should be created in the same partition as # XXX these files should be created in the same partition as
# the storage later puts them to avoid copying them ... # the storage later puts them to avoid copying them ...
def __init__(self, name, mode, blob): def __init__(self, name, mode, blob):
super(BlobFile, self).__init__(name, mode) super(BlobFile, self).__init__(name, mode)
self.blob = blob self.blob = blob
self.streamsize = 1<<16 self.close_called = False
def write(self, data): def write(self, data):
super(BlobFile, self).write(data) super(BlobFile, self).write(data)
...@@ -182,14 +210,19 @@ class BlobFile(file): ...@@ -182,14 +210,19 @@ class BlobFile(file):
def truncate(self, size=0): def truncate(self, size=0):
super(BlobFile, self).truncate(size) super(BlobFile, self).truncate(size)
self.blob._change() self.blob._change()
def close(self):
self.blob._rc_decref(self.mode)
super(BlobFile, self).close()
def next(self):
data = self.read(self.streamsize)
if not data:
raise StopIteration
return data
def close(self):
# we don't want to decref twice
if not self.close_called:
self.blob._rc_decref(self.mode)
self.close_called = True
super(BlobFile, self).close()
def __del__(self):
# XXX we need to ensure that the file is closed at object
# expiration or our blob's refcount won't be decremented.
# This probably needs some work; I don't know if the names
# 'BlobFile' or 'super' will be available at program exit, but
# we'll assume they will be for now in the name of not
# muddying the code needlessly.
self.close()
...@@ -83,23 +83,16 @@ Now we can read it: ...@@ -83,23 +83,16 @@ Now we can read it:
'Hi, Blob!\nBlob is fine.' 'Hi, Blob!\nBlob is fine.'
>>> f4a.close() >>> f4a.close()
Please, always remember closing an opened blob, otherwise you might get You shouldn't need to explicitly close a blob unless you hold a reference
blocked later on. Therefore you should avoid using the result of open() to it via a name. If the first line in the following test kept a reference
without binding it to a name: around via a name, the second call to open it in a writable mode would fail
with a BlobError, but it doesn't.
>>> myblob.open("r").read() >>> myblob.open("r+").read()
'Hi, Blob!\nBlob is fine.' 'Hi, Blob!\nBlob is fine.'
>>> f4b = myblob.open("a") >>> f4b = myblob.open("a")
Traceback (most recent call last): >>> f4b.close()
...
BlobError: Already opened for reading.
To clean that up, we have to commit or abort the current transaction, so the reference
counters for opened blob files get to a valid state again:
>>> import transaction
>>> transaction.commit()
We can read lines out of the blob too: We can read lines out of the blob too:
>>> f5 = myblob.open("r") >>> f5 = myblob.open("r")
...@@ -125,6 +118,7 @@ We can use the object returned by a blob open call as an iterable: ...@@ -125,6 +118,7 @@ We can use the object returned by a blob open call as an iterable:
>>> for line in f7: >>> for line in f7:
... print line ... print line
Hi, Blob! Hi, Blob!
<BLANKLINE>
Blob is fine. Blob is fine.
>>> f7.close() >>> f7.close()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment