Improved zeo blob cache clean up to make it a bit more robust and to

avoid spurious test failures.

Improved zeo blob cache clean up to make it a bit more robust and to
avoid spurious test failures.
d628565b · Jim Fulton · bee97cf1 · d628565b · d628565b · d628565b
Commit d628565b authored Jun 05, 2009 by Jim Fulton
Showing with 101 additions and 70 deletions

src/CHANGES.txt src/CHANGES.txt +2 -0

src/ZEO/ClientStorage.py src/ZEO/ClientStorage.py +75 -48

src/ZEO/tests/zeo_blob_cache.test src/ZEO/tests/zeo_blob_cache.test +24 -22

No files found.
--- a/src/CHANGES.txt
+++ b/src/CHANGES.txt
@@ -14,6 +14,8 @@ Bugs Fixed
 - Fixed analyze.py and added test.
+- ZEO client blob cache size management is a little bit more robust.
 3.9.0b1 (2009-05-04)
 ====================

--- a/src/ZEO/ClientStorage.py
+++ b/src/ZEO/ClientStorage.py
@@ -38,6 +38,7 @@ import socket
 import stat
 import sys
 import tempfile
+import thread
 import threading
 import time
 import types
@@ -398,6 +399,7 @@ class ClientStorage(object):
        self._blob_cache_size = blob_cache_size
        self._blob_data_bytes_loaded = 0
        if blob_cache_size is not None:
+            assert blob_cache_size_check < 100
            self._blob_cache_size_check = (
                blob_cache_size * blob_cache_size_check / 100)
            self._check_blob_size()
@@ -477,7 +479,7 @@ class ClientStorage(object):
        check_blob_size_thread = threading.Thread(
            target=_check_blob_cache_size,
-            args=(self.blob_dir, self._blob_cache_size),
+            args=(self.blob_dir, target),
            )
        check_blob_size_thread.setDaemon(True)
        check_blob_size_thread.start()
@@ -1620,7 +1622,6 @@ cache_file_name = re.compile(r'\d+$').match
 def _check_blob_cache_size(blob_dir, target):
    logger = logging.getLogger(__name__+'.check_blob_cache')
-    logger.info("Checking blob cache size")
    layout = open(os.path.join(blob_dir, ZODB.blob.LAYOUT_MARKER)
                  ).read().strip()
@@ -1628,63 +1629,89 @@ def _check_blob_cache_size(blob_dir, target):
        logger.critical("Invalid blob directory layout %s", layout)
        raise ValueError("Invalid blob directory layout", layout)
+    attempt_path = os.path.join(blob_dir, 'check_size.attempt')
    try:
        check_lock = zc.lockfile.LockFile(
            os.path.join(blob_dir, 'check_size.lock'))
    except zc.lockfile.LockError:
-        # Someone is already cleaning up, so don't bother
+        try:
-        logger.info("Another thread is checking the blob cache size")
+            time.sleep(1)
-        return
+            check_lock = zc.lockfile.LockFile(
+                os.path.join(blob_dir, 'check_size.lock'))
+        except zc.lockfile.LockError:
+            # Someone is already cleaning up, so don't bother
+            logger.debug("%s Another thread is checking the blob cache size.",
+                         thread.get_ident())
+            open(attempt_path, 'w').close() # Mark that we tried
+            return
+    logger.debug("%s Checking blob cache size. (target: %s)",
+                 thread.get_ident(), target)
    try:
-        size = 0
+        while 1:
-        blob_suffix = ZODB.blob.BLOB_SUFFIX
+            size = 0
-        files_by_atime = BTrees.IOBTree.BTree()
+            blob_suffix = ZODB.blob.BLOB_SUFFIX
+            files_by_atime = BTrees.OOBTree.BTree()
-        for dirname in os.listdir(blob_dir):
-            if not cache_file_name(dirname):
+            for dirname in os.listdir(blob_dir):
-                continue
+                if not cache_file_name(dirname):
-            base = os.path.join(blob_dir, dirname)
-            if not os.path.isdir(base):
-                continue
-            for file_name in os.listdir(base):
-                if not file_name.endswith(blob_suffix):
                    continue
-                file_name = os.path.join(base, file_name)
+                base = os.path.join(blob_dir, dirname)
-                if not os.path.isfile(file_name):
+                if not os.path.isdir(base):
                    continue
-                stat = os.stat(file_name)
+                for file_name in os.listdir(base):
-                size += stat.st_size
+                    if not file_name.endswith(blob_suffix):
-                t = int(stat.st_atime)
+                        continue
-                if t not in files_by_atime:
+                    file_path = os.path.join(base, file_name)
-                    files_by_atime[t] = []
+                    if not os.path.isfile(file_path):
-                files_by_atime[t].append(file_name)
+                        continue
+                    stat = os.stat(file_path)
-        logger.info("blob cache size: %s", size)
+                    size += stat.st_size
+                    t = stat.st_atime
-        while size > target and files_by_atime:
+                    if t not in files_by_atime:
-            for file_name in files_by_atime.pop(files_by_atime.minKey()):
+                        files_by_atime[t] = []
-                lockfilename = os.path.join(os.path.dirname(file_name),
+                    files_by_atime[t].append(os.path.join(dirname, file_name))
-                                            '.lock')
-                try:
+            logger.debug("%s   blob cache size: %s", thread.get_ident(), size)
-                    lock = zc.lockfile.LockFile(lockfilename)
-                except zc.lockfile.LockError:
+            if size <= target:
-                    logger.info("Skipping locked %s",
+                if os.path.isfile(attempt_path):
-                                os.path.basename(file_name))
+                    os.remove(attempt_path)
-                    continue  # In use, skip
+                    continue
+                logger.debug("%s   -->", thread.get_ident())
+                break
-                try:
+            while size > target and files_by_atime:
-                    fsize = os.stat(file_name).st_size
+                for file_name in files_by_atime.pop(files_by_atime.minKey()):
+                    file_name = os.path.join(blob_dir, file_name)
+                    lockfilename = os.path.join(os.path.dirname(file_name),
+                                                '.lock')
                    try:
-                        ZODB.blob.remove_committed(file_name)
+                        lock = zc.lockfile.LockFile(lockfilename)
-                    except OSError, v:
+                    except zc.lockfile.LockError:
-                        pass # probably open on windows
+                        logger.debug("%s Skipping locked %s",
-                    else:
+                                     thread.get_ident(),
-                        size -= fsize
+                                     os.path.basename(file_name))
-                finally:
+                        continue  # In use, skip
-                    lock.close()
-        logger.info("reduced blob cache size: %s", size)
+                    try:
+                        fsize = os.stat(file_name).st_size
+                        try:
+                            ZODB.blob.remove_committed(file_name)
+                        except OSError, v:
+                            pass # probably open on windows
+                        else:
+                            size -= fsize
+                    finally:
+                        lock.close()
+                    if size <= target:
+                        break
+            logger.debug("%s   reduced blob cache size: %s",
+                         thread.get_ident(), size)
    finally:
        check_lock.close()

--- a/src/ZEO/tests/zeo_blob_cache.test
+++ b/src/ZEO/tests/zeo_blob_cache.test
@@ -33,6 +33,11 @@ from the server before the cache size is checked. The
 blob_cache_size_check option defaults to 100. We passed 10, to check
 after writing 10% of the target size.
+.. We're going to wait for any threads we started to finish, so...
+   >>> import threading
+   >>> old_threads = list(threading.enumerate())
 We want to check for name collections in the blob cache dir. We'll try
 to provoke name collections by reducing the number of cache directory
 subdirectories.
@@ -66,11 +71,14 @@ directory.
    ...                      if os.path.exists(os.path.join(base, f)):
    ...                          raise
    ...     return size
-    >>> db.storage._check_blob_size_thread.join()
-    >>> cache_size('blobs') < 5000
+    >>> def check():
-    True
+    ...     return cache_size('blobs') < 5000
+    >>> def onfail():
+    ...     return cache_size('blobs')
+    >>> from ZEO.tests.forker import wait_until
+    >>> wait_until("size is reduced", check, 99, onfail)
 If we read all of the blobs, data will be downloaded again, as
 necessary, but the cache size will remain not much bigger than the
@@ -81,37 +89,26 @@ target:
    ...     if data != chr(i)*100:
    ...         print 'bad data', `chr(i)`, `data`
-    >>> db.storage._check_blob_size_thread.join()
+    >>> wait_until("size is reduced", check, 99, onfail)
-    >>> cache_size('blobs') < 5000
-    True
    >>> for i in range(1, 101):
    ...     data = conn.root()[i].open().read()
    ...     if data != chr(i)*100:
    ...         print 'bad data', `chr(i)`, `data`
-    >>> db.storage._check_blob_size_thread.join()
    >>> for i in range(1, 101):
    ...     data = conn.root()[i].open('c').read()
    ...     if data != chr(i)*100:
    ...         print 'bad data', `chr(i)`, `data`
-    >>> db.storage._check_blob_size_thread.join()
+    >>> wait_until("size is reduced", check, 99, onfail)
-    >>> cache_size('blobs') < 5000
-    True
    >>> for i in range(1, 101):
    ...     data = open(conn.root()[i].committed(), 'rb').read()
    ...     if data != chr(i)*100:
    ...         print 'bad data', `chr(i)`, `data`
-    >>> db.storage._check_blob_size_thread.join()
+    >>> wait_until("size is reduced", check, 99, onfail)
-    >>> cache_size('blobs') < 5000
-    True
 Now let see if we can stress things a bit.  We'll create many clients
 and get them to pound on the blobs all at once to see if we can
@@ -131,7 +128,6 @@ provoke problems:
    ...         data = conn.root()[i].open('c').read()
    ...         if data != chr(i)*100:
    ...             print 'bad data', `chr(i)`, `data`
-    ...     db._storage._check_blob_size_thread.join()
    ...     db.close()
    >>> threads = [threading.Thread(target=run) for i in range(10)]
@@ -140,12 +136,18 @@ provoke problems:
    >>> for thread in threads:
    ...     thread.start()
    >>> for thread in threads:
-    ...     thread.join()
+    ...     thread.join(99)
+    ...     if thread.isAlive():
+    ...        print "Can't join thread."
-    >>> cache_size('blobs') < 5000
+    >>> wait_until("size is reduced", check, 99, onfail)
-    True
 .. cleanup
+    >>> for thread in threading.enumerate():
+    ...     if thread not in old_threads:
+    ...        thread.join(33)
    >>> db.close()
    >>> ZEO.ClientStorage.BlobCacheLayout.size = orig_blob_cache_layout_size