- Get rid of streamsize attribute as well as redundant next method

on BlobFile. - Implement __del__ on BlobFile, which attempts to close the filehandle. - Use a weak reference to the blobfile in the BlobDataManager so as to not keep the blobfile alive for longer than necessary (mainly to support the idiom of opening a blobfile without assigning it to a name). It is no longer necessary to explicitly close a blobfile. - Raise IOError if an invalid mode is passed in to Blob.open. - Add some comments about why things are the way they are.

- Get rid of streamsize attribute as well as redundant next method
on BlobFile. - Implement __del__ on BlobFile, which attempts to close the filehandle. - Use a weak reference to the blobfile in the BlobDataManager so as to not keep the blobfile alive for longer than necessary (mainly to support the idiom of opening a blobfile without assigning it to a name). It is no longer necessary to explicitly close a blobfile. - Raise IOError if an invalid mode is passed in to Blob.open. - Add some comments about why things are the way they are.
cfe7eae1 · Chris McDonough · ca575add · cfe7eae1 · cfe7eae1
Commit cfe7eae1 authored Mar 25, 2005 by Chris McDonough
2 changed files
--- a/branches/ctheune-blobsupport/src/ZODB/Blobs/Blob.py
+++ b/branches/ctheune-blobsupport/src/ZODB/Blobs/Blob.py
@@ -2,6 +2,7 @@
 import os
 import time
 import tempfile
+import weakref

 from zope.interface import implements

@@ -22,7 +23,17 @@ class Blob(Persistent):
    _p_blob_data = None

    def open(self, mode="r"):
-        """Returns a file(-like) object for handling the blob data."""
+        """ Returns a file(-like) object representing blob data.  This
+        method will either return the file object, raise a BlobError
+        or an IOError.  A file may be open for exclusive read any
+        number of times, but may not be opened simultaneously for read
+        and write during the course of a single transaction and may
+        not be opened for simultaneous writes during the course of a
+        single transaction. Additionally, the file handle which
+        results from this method call is unconditionally closed at
+        transaction boundaries and so may not be used across
+        transactions.  """
+        
        result = None

        if (mode.startswith("r") or mode=="U"):
@@ -35,7 +46,7 @@ class Blob(Persistent):
            self._p_blob_readers += 1
            result = BlobFile(self._current_filename(), mode, self)

-        if mode.startswith("w"):
+        elif mode.startswith("w"):
            if self._p_blob_readers != 0:
                raise BlobError, "Already opened for reading."

@@ -45,7 +56,7 @@ class Blob(Persistent):
            self._p_blob_writers += 1
            result = BlobFile(self._p_blob_uncommitted, mode, self)

-        if mode.startswith("a"):
+        elif mode.startswith("a"):
            if self._p_blob_readers != 0:
                raise BlobError, "Already opened for reading."

@@ -53,6 +64,7 @@ class Blob(Persistent):
                # Create a new working copy
                self._p_blob_uncommitted = utils.mktmp()
                uncommitted = BlobFile(self._p_blob_uncommitted, mode, self)
+                # NOTE: _p_blob data appears by virtue of Connection._setstate
                utils.cp(file(self._p_blob_data), uncommitted)
                uncommitted.seek(0)
            else:
@@ -62,6 +74,9 @@ class Blob(Persistent):
            self._p_blob_writers +=1
            result = uncommitted

+        else:
+            raise IOError, 'invalid mode: %s ' % mode
+
        if result is not None:

            # we register ourselves as a data manager with the
@@ -79,6 +94,8 @@ class Blob(Persistent):
    # utility methods

    def _current_filename(self):
+        # NOTE: _p_blob_data and _p_blob_uncommitted appear by virtue of
+        # Connection._setstate
        return self._p_blob_uncommitted or self._p_blob_data

    def _change(self):
@@ -103,7 +120,7 @@ class Blob(Persistent):
 class BlobDataManager:
    """Special data manager to handle transaction boundaries for blobs.

-    Blobs need some special care taking on transaction boundaries. As
+    Blobs need some special care-taking on transaction boundaries. As

    a) the ghost objects might get reused, the _p_ reader and writer
       refcount attributes must be set to a consistent state
@@ -118,7 +135,10 @@ class BlobDataManager:

    def __init__(self, blob, filehandle):
        self.blob = blob
-        self.filehandle = filehandle
+        # we keep a weakref to the file handle because we don't want to
+        # keep it alive if all other references to it die (e.g. in the
+        # case it's opened without assigning it to a name).
+        self.fhref = weakref.ref(filehandle)
        self.subtransaction = False
        self.sortkey = time.time()

@@ -143,12 +163,16 @@ class BlobDataManager:
    def commit(self, object, transaction):
        if not self.subtransaction:
            self.blob._rc_clear() # clear all blob refcounts
-            self.filehandle.close()
+            filehandle = self.fhref()
+            if filehandle is not None:
+                filehandle.close()

    def abort(self, object, transaction):
        if not self.subtransaction:
            self.blob._rc_clear()
-            self.filehandle.close()
+            filehandle = self.fhref()
+            if filehandle is not None:
+                filehandle.close()

    def sortKey(self):
        return self.sortkey
@@ -160,16 +184,20 @@ class BlobDataManager:
        pass

 class BlobFile(file):
-    """ A BlobFile is a file that can be used within a transaction boundary """
    
+    """ A BlobFile is a file that can be used within a transaction
+    boundary; a BlobFile is just a Python file object, we only
+    override methods which cause a change to blob data in order to
+    call methods on our 'parent' persistent blob object signifying
+    that the change happened. """

-    # XXX those files should be created in the same partition as
+    # XXX these files should be created in the same partition as
    # the storage later puts them to avoid copying them ...

    def __init__(self, name, mode, blob):
        super(BlobFile, self).__init__(name, mode)
        self.blob = blob
-        self.streamsize = 1<<16
+        self.close_called = False

    def write(self, data):
        super(BlobFile, self).write(data)
@@ -184,12 +212,17 @@ class BlobFile(file):
        self.blob._change()

    def close(self):
+        # we don't want to decref twice
+        if not self.close_called:
            self.blob._rc_decref(self.mode)
+            self.close_called = True
            super(BlobFile, self).close()

-    def next(self):
-        data = self.read(self.streamsize)
-        if not data:
-            raise StopIteration
-        return data
-
+    def __del__(self):
+        # XXX we need to ensure that the file is closed at object
+        # expiration or our blob's refcount won't be decremented.
+        # This probably needs some work; I don't know if the names
+        # 'BlobFile' or 'super' will be available at program exit, but
+        # we'll assume they will be for now in the name of not
+        # muddying the code needlessly.
+        self.close()
--- a/branches/ctheune-blobsupport/src/ZODB/Blobs/Blob.txt
+++ b/branches/ctheune-blobsupport/src/ZODB/Blobs/Blob.txt
@@ -83,22 +83,15 @@ Now we can read it:
    'Hi, Blob!\nBlob is fine.'
    >>> f4a.close()

-Please, always remember closing an opened blob, otherwise you might get
-blocked later on. Therefore you should avoid using the result of open()
-without binding it to a name:
+You shouldn't need to explicitly close a blob unless you hold a reference
+to it via a name.  If the first line in the following test kept a reference
+around via a name, the second call to open it in a writable mode would fail
+with a BlobError, but it doesn't.

-    >>> myblob.open("r").read()
+    >>> myblob.open("r+").read()
    'Hi, Blob!\nBlob is fine.'
    >>> f4b = myblob.open("a")
-    Traceback (most recent call last):
-        ...
-    BlobError: Already opened for reading.
-    
-To clean that up, we have to commit or abort the current transaction, so the reference
-counters for opened blob files get to a valid state again:
-
-    >>> import transaction
-    >>> transaction.commit()
+    >>> f4b.close()
    
 We can read lines out of the blob too:

@@ -125,6 +118,7 @@ We can use the object returned by a blob open call as an iterable:
    >>> for line in f7:
    ...     print line
    Hi, Blob!
+    <BLANKLINE>
    Blob is fine.
    >>> f7.close()