############################################################################## # # Copyright (c) 2001, 2002 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE # ############################################################################## """Database objects $Id$""" import cPickle, cStringIO, sys import threading from time import time, ctime import logging from ZODB.broken import find_global from ZODB.utils import z64 from ZODB.Connection import Connection from ZODB.serialize import referencesf from ZODB.utils import WeakSet from zope.interface import implements from ZODB.interfaces import IDatabase import transaction logger = logging.getLogger('ZODB.DB') class _ConnectionPool(object): """Manage a pool of connections. CAUTION: Methods should be called under the protection of a lock. This class does no locking of its own. There's no limit on the number of connections this can keep track of, but a warning is logged if there are more than pool_size active connections, and a critical problem if more than twice pool_size. New connections are registered via push(). This will log a message if "too many" connections are active. When a connection is explicitly closed, tell the pool via repush(). That adds the connection to a stack of connections available for reuse, and throws away the oldest stack entries if the stack is too large. pop() pops this stack. When a connection is obtained via pop(), the pool holds only a weak reference to it thereafter. It's not necessary to inform the pool if the connection goes away. A connection handed out by pop() counts against pool_size only so long as it exists, and provided it isn't repush()'ed. A weak reference is retained so that DB methods like connectionDebugInfo() can still gather statistics. """ def __init__(self, pool_size): # The largest # of connections we expect to see alive simultaneously. self.pool_size = pool_size # A weak set of all connections we've seen. A connection vanishes # from this set if pop() hands it out, it's not reregistered via # repush(), and it becomes unreachable. self.all = WeakSet() # A stack of connections available to hand out. This is a subset # of self.all. push() and repush() add to this, and may remove # the oldest available connections if the pool is too large. # pop() pops this stack. There are never more than pool_size entries # in this stack. # In Python 2.4, a collections.deque would make more sense than # a list (we push only "on the right", but may pop from both ends). self.available = [] # Change our belief about the expected maximum # of live connections. # If the pool_size is smaller than the current value, this may discard # the oldest available connections. def set_pool_size(self, pool_size): self.pool_size = pool_size self._reduce_size() # Register a new available connection. We must not know about c already. # c will be pushed onto the available stack even if we're over the # pool size limit. def push(self, c): assert c not in self.all assert c not in self.available self._reduce_size(strictly_less=True) self.all.add(c) self.available.append(c) n, limit = len(self.all), self.pool_size if n > limit: reporter = logger.warn if n > 2 * limit: reporter = logger.critical reporter("DB.open() has %s open connections with a pool_size " "of %s", n, limit) # Reregister an available connection formerly obtained via pop(). This # pushes it on the stack of available connections, and may discard # older available connections. def repush(self, c): assert c in self.all assert c not in self.available self._reduce_size(strictly_less=True) self.available.append(c) # Throw away the oldest available connections until we're under our # target size (strictly_less=False) or no more than that (strictly_less= # True, the default). def _reduce_size(self, strictly_less=False): target = self.pool_size - bool(strictly_less) while len(self.available) > target: c = self.available.pop(0) self.all.remove(c) # While application code may still hold a reference to `c`, # there's little useful that can be done with this Connection # anymore. Its cache may be holding on to limited resources, # and we replace the cache with an empty one now so that we # don't have to wait for gc to reclaim it. Note that it's not # possible for DB.open() to return `c` again: `c` can never # be in an open state again. # TODO: Perhaps it would be better to break the reference # cycles between `c` and `c._cache`, so that refcounting reclaims # both right now. But if user code _does_ have a strong # reference to `c` now, breaking the cycle would not reclaim `c` # now, and `c` would be left in a user-visible crazy state. c._resetCache() # Pop an available connection and return it, or return None if none are # available. In the latter case, the caller should create a new # connection, register it via push(), and call pop() again. The # caller is responsible for serializing this sequence. def pop(self): result = None if self.available: result = self.available.pop() # Leave it in self.all, so we can still get at it for statistics # while it's alive. assert result in self.all return result # For every live connection c, invoke f(c). def map(self, f): self.all.map(f) class DB(object): """The Object Database ------------------- The DB class coordinates the activities of multiple database Connection instances. Most of the work is done by the Connections created via the open method. The DB instance manages a pool of connections. If a connection is closed, it is returned to the pool and its object cache is preserved. A subsequent call to open() will reuse the connection. There is no hard limit on the pool size. If more than `pool_size` connections are opened, a warning is logged, and if more than twice that many, a critical problem is logged. The class variable 'klass' is used by open() to create database connections. It is set to Connection, but a subclass could override it to provide a different connection implementation. The database provides a few methods intended for application code -- open, close, undo, and pack -- and a large collection of methods for inspecting the database and its connections' caches. :Cvariables: - `klass`: Class used by L{open} to create database connections :Groups: - `User Methods`: __init__, open, close, undo, pack, classFactory - `Inspection Methods`: getName, getSize, objectCount, getActivityMonitor, setActivityMonitor - `Connection Pool Methods`: getPoolSize, getVersionPoolSize, removeVersionPool, setPoolSize, setVersionPoolSize - `Transaction Methods`: invalidate - `Other Methods`: lastTransaction, connectionDebugInfo - `Version Methods`: modifiedInVersion, abortVersion, commitVersion, versionEmpty - `Cache Inspection Methods`: cacheDetail, cacheExtremeDetail, cacheFullSweep, cacheLastGCTime, cacheMinimize, cacheSize, cacheDetailSize, getCacheSize, getVersionCacheSize, setCacheSize, setVersionCacheSize """ implements(IDatabase) klass = Connection # Class to use for connections _activity_monitor = None def __init__(self, storage, pool_size=7, cache_size=400, version_pool_size=3, version_cache_size=100, database_name='unnamed', databases=None, ): """Create an object database. :Parameters: - `storage`: the storage used by the database, e.g. FileStorage - `pool_size`: expected maximum number of open connections - `cache_size`: target size of Connection object cache - `version_pool_size`: expected maximum number of connections (per version) - `version_cache_size`: target size of Connection object cache for version connections """ # Allocate lock. x = threading.RLock() self._a = x.acquire self._r = x.release # Setup connection pools and cache info # _pools maps a version string to a _ConnectionPool object. self._pools = {} self._pool_size = pool_size self._cache_size = cache_size self._version_pool_size = version_pool_size self._version_cache_size = version_cache_size self._miv_cache = {} # Setup storage self._storage=storage storage.registerDB(self, None) if not hasattr(storage,'tpc_vote'): storage.tpc_vote = lambda *args: None try: storage.load(z64,'') except KeyError: # Create the database's root in the storage if it doesn't exist from persistent.mapping import PersistentMapping root = PersistentMapping() # Manually create a pickle for the root to put in the storage. # The pickle must be in the special ZODB format. file = cStringIO.StringIO() p = cPickle.Pickler(file, 1) p.dump((root.__class__, None)) p.dump(root.__getstate__()) t = transaction.Transaction() t.description = 'initial database creation' storage.tpc_begin(t) storage.store(z64, None, file.getvalue(), '', t) storage.tpc_vote(t) storage.tpc_finish(t) # Multi-database setup. if databases is None: databases = {} self.databases = databases self.database_name = database_name if database_name in databases: raise ValueError("database_name %r already in databases" % database_name) databases[database_name] = self # Pass through methods: for m in ['history', 'supportsUndo', 'supportsVersions', 'undoLog', 'versionEmpty', 'versions']: setattr(self, m, getattr(storage, m)) if hasattr(storage, 'undoInfo'): self.undoInfo = storage.undoInfo # This is called by Connection.close(). def _returnToPool(self, connection): """Return a connection to the pool. connection._db must be self on entry. """ self._a() try: assert connection._db is self connection._opened = None am = self._activity_monitor if am is not None: am.closedConnection(connection) version = connection._version try: pool = self._pools[version] except KeyError: # No such version. We must have deleted the pool. # Just let the connection go. # We need to break circular refs to make it really go. # TODO: Figure out exactly which objects are involved in the # cycle. connection.__dict__.clear() return pool.repush(connection) finally: self._r() # Call f(c) for all connections c in all pools in all versions. def _connectionMap(self, f): self._a() try: for pool in self._pools.values(): pool.map(f) finally: self._r() def abortVersion(self, version, txn=None): if txn is None: txn = transaction.get() txn.register(AbortVersion(self, version)) def cacheDetail(self): """Return information on objects in the various caches Organized by class. """ detail = {} def f(con, detail=detail): for oid, ob in con._cache.items(): module = getattr(ob.__class__, '__module__', '') module = module and '%s.' % module or '' c = "%s%s" % (module, ob.__class__.__name__) if c in detail: detail[c] += 1 else: detail[c] = 1 self._connectionMap(f) detail = detail.items() detail.sort() return detail def cacheExtremeDetail(self): detail = [] conn_no = [0] # A mutable reference to a counter def f(con, detail=detail, rc=sys.getrefcount, conn_no=conn_no): conn_no[0] += 1 cn = conn_no[0] for oid, ob in con._cache_items(): id = '' if hasattr(ob, '__dict__'): d = ob.__dict__ if d.has_key('id'): id = d['id'] elif d.has_key('__name__'): id = d['__name__'] module = getattr(ob.__class__, '__module__', '') module = module and ('%s.' % module) or '' # What refcount ('rc') should we return? The intent is # that we return the true Python refcount, but as if the # cache didn't exist. This routine adds 3 to the true # refcount: 1 for binding to name 'ob', another because # ob lives in the con._cache_items() list we're iterating # over, and calling sys.getrefcount(ob) boosts ob's # count by 1 too. So the true refcount is 3 less than # sys.getrefcount(ob) returns. But, in addition to that, # the cache holds an extra reference on non-ghost objects, # and we also want to pretend that doesn't exist. detail.append({ 'conn_no': cn, 'oid': oid, 'id': id, 'klass': "%s%s" % (module, ob.__class__.__name__), 'rc': rc(ob) - 3 - (ob._p_changed is not None), 'state': ob._p_changed, #'references': con.references(oid), }) self._connectionMap(f) return detail def cacheFullSweep(self): self._connectionMap(lambda c: c._cache.full_sweep()) def cacheLastGCTime(self): m = [0] def f(con, m=m): t = con._cache.cache_last_gc_time if t > m[0]: m[0] = t self._connectionMap(f) return m[0] def cacheMinimize(self): self._connectionMap(lambda c: c._cache.minimize()) def cacheSize(self): m = [0] def f(con, m=m): m[0] += con._cache.cache_non_ghost_count self._connectionMap(f) return m[0] def cacheDetailSize(self): m = [] def f(con, m=m): m.append({'connection': repr(con), 'ngsize': con._cache.cache_non_ghost_count, 'size': len(con._cache)}) self._connectionMap(f) m.sort() return m def close(self): """Close the database and its underlying storage. It is important to close the database, because the storage may flush in-memory data structures to disk when it is closed. Leaving the storage open with the process exits can cause the next open to be slow. What effect does closing the database have on existing connections? Technically, they remain open, but their storage is closed, so they stop behaving usefully. Perhaps close() should also close all the Connections. """ self._storage.close() def commitVersion(self, source, destination='', txn=None): if txn is None: txn = transaction.get() txn.register(CommitVersion(self, source, destination)) def getCacheSize(self): return self._cache_size def lastTransaction(self): return self._storage.lastTransaction() def getName(self): return self._storage.getName() def getPoolSize(self): return self._pool_size def getSize(self): return self._storage.getSize() def getVersionCacheSize(self): return self._version_cache_size def getVersionPoolSize(self): return self._version_pool_size def invalidate(self, tid, oids, connection=None, version=''): """Invalidate references to a given oid. This is used to indicate that one of the connections has committed a change to the object. The connection commiting the change should be passed in to prevent useless (but harmless) messages to the connection. """ if connection is not None: version = connection._version # Update modified in version cache for oid in oids.keys(): h = hash(oid) % 131 o = self._miv_cache.get(h, None) if o is not None and o[0]==oid: del self._miv_cache[h] # Notify connections. def inval(c): if (c is not connection and (not version or c._version == version)): c.invalidate(tid, oids) self._connectionMap(inval) def modifiedInVersion(self, oid): h = hash(oid) % 131 cache = self._miv_cache o = cache.get(h, None) if o and o[0] == oid: return o[1] v = self._storage.modifiedInVersion(oid) cache[h] = oid, v return v def objectCount(self): return len(self._storage) def open(self, version='', mvcc=True, transaction_manager=None, synch=True): """Return a database Connection for use by application code. The optional `version` argument can be used to specify that a version connection is desired. Note that the connection pool is managed as a stack, to increase the likelihood that the connection's stack will include useful objects. :Parameters: - `version`: the "version" that all changes will be made in, defaults to no version. - `mvcc`: boolean indicating whether MVCC is enabled - `transaction_manager`: transaction manager to use. None means use the default transaction manager. - `synch`: boolean indicating whether Connection should register for afterCompletion() calls. """ self._a() try: # pool <- the _ConnectionPool for this version pool = self._pools.get(version) if pool is None: if version: size = self._version_pool_size else: size = self._pool_size self._pools[version] = pool = _ConnectionPool(size) assert pool is not None # result <- a connection result = pool.pop() if result is None: if version: size = self._version_cache_size else: size = self._cache_size c = self.klass(self, version, size) pool.push(c) result = pool.pop() assert result is not None # Tell the connection it belongs to self. result.open(transaction_manager, mvcc, synch) # A good time to do some cache cleanup. self._connectionMap(lambda c: c.cacheGC()) return result finally: self._r() def removeVersionPool(self, version): try: del self._pools[version] except KeyError: pass def connectionDebugInfo(self): result = [] t = time() def get_info(c): # `result`, `time` and `version` are lexically inherited. o = c._opened d = c.getDebugInfo() if d: if len(d) == 1: d = d[0] else: d = '' d = "%s (%s)" % (d, len(c._cache)) result.append({ 'opened': o and ("%s (%.2fs)" % (ctime(o), t-o)), 'info': d, 'version': version, }) for version, pool in self._pools.items(): pool.map(get_info) return result def getActivityMonitor(self): return self._activity_monitor def pack(self, t=None, days=0): """Pack the storage, deleting unused object revisions. A pack is always performed relative to a particular time, by default the current time. All object revisions that are not reachable as of the pack time are deleted from the storage. The cost of this operation varies by storage, but it is usually an expensive operation. There are two optional arguments that can be used to set the pack time: t, pack time in seconds since the epcoh, and days, the number of days to subtract from t or from the current time if t is not specified. """ if t is None: t = time() t -= days * 86400 try: self._storage.pack(t, referencesf) except: logger.error("packing", exc_info=True) raise def setActivityMonitor(self, am): self._activity_monitor = am def classFactory(self, connection, modulename, globalname): # Zope will rebind this method to arbitrary user code at runtime. return find_global(modulename, globalname) def setCacheSize(self, size): self._a() try: self._cache_size = size pool = self._pools.get('') if pool is not None: def setsize(c): c._cache.cache_size = size pool.map(setsize) finally: self._r() def setVersionCacheSize(self, size): self._a() try: self._version_cache_size = size def setsize(c): c._cache.cache_size = size for version, pool in self._pools.items(): if version: pool.map(setsize) finally: self._r() def setPoolSize(self, size): self._pool_size = size self._reset_pool_sizes(size, for_versions=False) def setVersionPoolSize(self, size): self._version_pool_size = size self._reset_pool_sizes(size, for_versions=True) def _reset_pool_sizes(self, size, for_versions=False): self._a() try: for version, pool in self._pools.items(): if (version != '') == for_versions: pool.set_pool_size(size) finally: self._r() def undo(self, id, txn=None): """Undo a transaction identified by id. A transaction can be undone if all of the objects involved in the transaction were not modified subsequently, if any modifications can be resolved by conflict resolution, or if subsequent changes resulted in the same object state. The value of id should be generated by calling undoLog() or undoInfo(). The value of id is not the same as a transaction id used by other methods; it is unique to undo(). :Parameters: - `id`: a storage-specific transaction identifier - `txn`: transaction context to use for undo(). By default, uses the current transaction. """ if txn is None: txn = transaction.get() txn.register(TransactionalUndo(self, id)) def versionEmpty(self, version): return self._storage.versionEmpty(version) resource_counter_lock = threading.Lock() resource_counter = 0 class ResourceManager(object): """Transaction participation for a version or undo resource.""" def __init__(self, db): self._db = db # Delegate the actual 2PC methods to the storage self.tpc_vote = self._db._storage.tpc_vote self.tpc_finish = self._db._storage.tpc_finish self.tpc_abort = self._db._storage.tpc_abort # Get a number from a simple thread-safe counter, then # increment it, for the purpose of sorting ResourceManagers by # creation order. This ensures that multiple ResourceManagers # within a transaction commit in a predictable sequence. resource_counter_lock.acquire() try: global resource_counter self._count = resource_counter resource_counter += 1 finally: resource_counter_lock.release() def sortKey(self): return "%s:%016x" % (self._db._storage.sortKey(), self._count) def tpc_begin(self, txn, sub=False): if sub: raise ValueError("doesn't support sub-transactions") self._db._storage.tpc_begin(txn) # The object registers itself with the txn manager, so the ob # argument to the methods below is self. def abort(self, obj, txn): pass def commit(self, obj, txn): pass class CommitVersion(ResourceManager): def __init__(self, db, version, dest=''): super(CommitVersion, self).__init__(db) self._version = version self._dest = dest def commit(self, ob, t): dest = self._dest tid, oids = self._db._storage.commitVersion(self._version, self._dest, t) oids = dict.fromkeys(oids, 1) self._db.invalidate(tid, oids, version=self._dest) if self._dest: # the code above just invalidated the dest version. # now we need to invalidate the source! self._db.invalidate(tid, oids, version=self._version) class AbortVersion(ResourceManager): def __init__(self, db, version): super(AbortVersion, self).__init__(db) self._version = version def commit(self, ob, t): tid, oids = self._db._storage.abortVersion(self._version, t) self._db.invalidate(tid, dict.fromkeys(oids, 1), version=self._version) class TransactionalUndo(ResourceManager): def __init__(self, db, tid): super(TransactionalUndo, self).__init__(db) self._tid = tid def commit(self, ob, t): tid, oids = self._db._storage.undo(self._tid, t) self._db.invalidate(tid, dict.fromkeys(oids, 1))