Commit b8fbeb93 authored by Jason Madden's avatar Jason Madden

Fix the cleanup of uncommitted blob data under PyPy.

parent 255e256e
...@@ -57,6 +57,15 @@ valid_modes = 'r', 'w', 'r+', 'a', 'c' ...@@ -57,6 +57,15 @@ valid_modes = 'r', 'w', 'r+', 'a', 'c'
# This introduces a threading issue, since a blob file may be destroyed # This introduces a threading issue, since a blob file may be destroyed
# via GC in any thread. # via GC in any thread.
# PyPy 2.5 doesn't properly call the cleanup function
# of a weakref when the weakref object dies at the same time
# as the object it refers to. In other words, this doesn't work:
# self._ref = weakref.ref(self, lambda ref: ...)
# because the function never gets called. The Blob class used
# to use that pattern to clean up uncommitted files; now we use this
# module-level global (but still keep a reference in the Blob in case
# we need premature cleanup)
_blob_close_refs = []
@zope.interface.implementer(ZODB.interfaces.IBlob) @zope.interface.implementer(ZODB.interfaces.IBlob)
class Blob(persistent.Persistent): class Blob(persistent.Persistent):
...@@ -65,6 +74,7 @@ class Blob(persistent.Persistent): ...@@ -65,6 +74,7 @@ class Blob(persistent.Persistent):
_p_blob_uncommitted = None # Filename of the uncommitted (dirty) data _p_blob_uncommitted = None # Filename of the uncommitted (dirty) data
_p_blob_committed = None # Filename of the committed data _p_blob_committed = None # Filename of the committed data
_p_blob_ref = None # weakreference to self; also in _blob_close_refs
readers = writers = None readers = writers = None
...@@ -283,8 +293,13 @@ class Blob(persistent.Persistent): ...@@ -283,8 +293,13 @@ class Blob(persistent.Persistent):
def cleanup(ref): def cleanup(ref):
if os.path.exists(filename): if os.path.exists(filename):
os.remove(filename) os.remove(filename)
try:
_blob_close_refs.remove(ref)
except ValueError:
pass
self._p_blob_ref = weakref.ref(self, cleanup) self._p_blob_ref = weakref.ref(self, cleanup)
_blob_close_refs.append(self._p_blob_ref)
return filename return filename
def _uncommitted(self): def _uncommitted(self):
...@@ -293,6 +308,10 @@ class Blob(persistent.Persistent): ...@@ -293,6 +308,10 @@ class Blob(persistent.Persistent):
filename = self._p_blob_uncommitted filename = self._p_blob_uncommitted
if filename is None and self._p_blob_committed is None: if filename is None and self._p_blob_committed is None:
filename = self._create_uncommitted_file() filename = self._create_uncommitted_file()
try:
_blob_close_refs.remove(self._p_blob_ref)
except ValueError:
pass
self._p_blob_uncommitted = self._p_blob_ref = None self._p_blob_uncommitted = self._p_blob_ref = None
return filename return filename
......
...@@ -31,7 +31,7 @@ Aborting a blob add leaves the blob unchanged: ...@@ -31,7 +31,7 @@ Aborting a blob add leaves the blob unchanged:
... fp.read() ... fp.read()
'this is blob 1' 'this is blob 1'
It doesn't clear the file because there is no previously committed version: It doesn't clear the file because there is no previously committed version:
>>> fname = blob1._p_blob_uncommitted >>> fname = blob1._p_blob_uncommitted
>>> import os >>> import os
...@@ -79,7 +79,7 @@ resulting filehandle is accomplished via the filehandle's read method:: ...@@ -79,7 +79,7 @@ resulting filehandle is accomplished via the filehandle's read method::
>>> blob1afh1.read() >>> blob1afh1.read()
'this is blob 1' 'this is blob 1'
Let's make another filehandle for read only to blob1a. Aach file Let's make another filehandle for read only to blob1a. Each file
handle has a reference to the (same) underlying blob:: handle has a reference to the (same) underlying blob::
>>> blob1afh2 = blob1a.open("r") >>> blob1afh2 = blob1a.open("r")
......
...@@ -345,6 +345,18 @@ def gc_blob_removes_uncommitted_data(): ...@@ -345,6 +345,18 @@ def gc_blob_removes_uncommitted_data():
>>> os.path.exists(fname) >>> os.path.exists(fname)
True True
>>> file = blob = None >>> file = blob = None
PyPy not being reference counted actually needs GC to be
explicitly requested. In experiments, it finds the weakref
on the first collection, but only does the cleanup on the second
collection:
>>> import gc
>>> _ = gc.collect()
>>> _ = gc.collect()
Now the file is gone on all platforms:
>>> os.path.exists(fname) >>> os.path.exists(fname)
False False
""" """
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment