Commit c3ce017f authored by Jim Fulton's avatar Jim Fulton

Fleshed out the storage interfaces.

parent a600d02d
...@@ -18,6 +18,7 @@ $Id$ ...@@ -18,6 +18,7 @@ $Id$
from zope.interface import Interface, Attribute from zope.interface import Interface, Attribute
class IConnection(Interface): class IConnection(Interface):
"""Connection to ZODB for loading and storing objects. """Connection to ZODB for loading and storing objects.
...@@ -289,7 +290,6 @@ class IConnection(Interface): ...@@ -289,7 +290,6 @@ class IConnection(Interface):
This invalidates *all* objects in the cache. If the connection This invalidates *all* objects in the cache. If the connection
is open, subsequent reads will fail until a new transaction is open, subsequent reads will fail until a new transaction
begins or until the connection os reopned. begins or until the connection os reopned.
""" """
class IStorageDB(Interface): class IStorageDB(Interface):
...@@ -345,37 +345,6 @@ class IDatabase(IStorageDB): ...@@ -345,37 +345,6 @@ class IDatabase(IStorageDB):
TODO: This interface is incomplete. TODO: This interface is incomplete.
""" """
## __init__ methods don't belong in interfaces:
##
## def __init__(storage,
## pool_size=7,
## cache_size=400,
## version_pool_size=3,
## version_cache_size=100,
## database_name='unnamed',
## databases=None,
## ):
## """Create an object database.
## storage: the storage used by the database, e.g. FileStorage
## pool_size: expected maximum number of open connections
## cache_size: target size of Connection object cache, in number of
## objects
## version_pool_size: expected maximum number of connections (per
## version)
## version_cache_size: target size of Connection object cache for
## version connections, in number of objects
## database_name: when using a multi-database, the name of this DB
## within the database group. It's a (detected) error if databases
## is specified too and database_name is already a key in it.
## This becomes the value of the DB's database_name attribute.
## databases: when using a multi-database, a mapping to use as the
## binding of this DB's .databases attribute. It's intended
## that the second and following DB's added to a multi-database
## pass the .databases attribute set on the first DB added to the
## collection.
## """
databases = Attribute("""\ databases = Attribute("""\
A mapping from database name to DB (database) object. A mapping from database name to DB (database) object.
...@@ -386,119 +355,456 @@ class IDatabase(IStorageDB): ...@@ -386,119 +355,456 @@ class IDatabase(IStorageDB):
entry. entry.
""") """)
def invalidateCache(): def open(version='',
"""Invalidate all objects in the database object caches mvcc=True,
transaction_manager=None,
synch=True
):
"""Return an IConnection object for use by application code.
version: the "version" that all changes will be made
in, defaults to no version.
mvcc: boolean indicating whether MVCC is enabled
transaction_manager: transaction manager to use. None means
use the default transaction manager.
synch: boolean indicating whether Connection should
register for afterCompletion() calls.
Note that the connection pool is managed as a stack, to
increase the likelihood that the connection's stack will
include useful objects.
"""
invalidateCache will be called on each of the database's connections. # TODO: Should this method be moved into some subinterface?
def pack(t=None, days=0):
"""Pack the storage, deleting unused object revisions.
A pack is always performed relative to a particular time, by
default the current time. All object revisions that are not
reachable as of the pack time are deleted from the storage.
The cost of this operation varies by storage, but it is
usually an expensive operation.
There are two optional arguments that can be used to set the
pack time: t, pack time in seconds since the epcoh, and days,
the number of days to subtract from t or from the current
time if t is not specified.
"""
# TODO: Should this method be moved into some subinterface?
def undo(id, txn=None):
"""Undo a transaction identified by id.
A transaction can be undone if all of the objects involved in
the transaction were not modified subsequently, if any
modifications can be resolved by conflict resolution, or if
subsequent changes resulted in the same object state.
The value of id should be generated by calling undoLog()
or undoInfo(). The value of id is not the same as a
transaction id used by other methods; it is unique to undo().
id: a storage-specific transaction identifier
txn: transaction context to use for undo().
By default, uses the current transaction.
"""
def close():
"""Close the database and its underlying storage.
It is important to close the database, because the storage may
flush in-memory data structures to disk when it is closed.
Leaving the storage open with the process exits can cause the
next open to be slow.
What effect does closing the database have on existing
connections? Technically, they remain open, but their storage
is closed, so they stop behaving usefully. Perhaps close()
should also close all the Connections.
""" """
class IStorage(Interface): class IStorage(Interface):
"""A storage is responsible for storing and retrieving data of objects. """A storage is responsible for storing and retrieving data of objects.
""" """
## What follows is the union of methods found across various storage def close():
## implementations. Exactly what "the storage API" is and means has """Close the storage.
## become fuzzy over time. Methods should be uncommented here, or """
## even deleted, as the storage API regains a crisp definition.
def getName():
## def load(oid, version): """The name of the storage
## """TODO"""
## The format and interpretation of this name is storage
## def close(): dependent. It could be a file name, a database name, etc.
## """TODO"""
## This is used soley for informational purposes.
## def cleanup(): """
## """TODO"""
## def getSize():
## def lastSerial(): """An approximate size of the database, in bytes.
## """TODO"""
## This is used soley for informational purposes.
## def lastTransaction(): """
## """TODO"""
## def history(oid, version, size=1):
## def lastTid(oid): """Return a sequence of history information dictionaries.
## """Return last serialno committed for object oid."""
## Up to size objects (including no objects) may be returned.
## def loadSerial(oid, serial):
## """TODO""" The information provides a log of the changes made to the
## object. Data are reported in reverse chronological order.
## def loadBefore(oid, tid):
## """TODO""" Each dictionary has the following keys:
##
## def iterator(start=None, stop=None): time
## """TODO""" UTC seconds since the epoch (as in time.time) that the
## object revision was committed.
## def sortKey(): tid
## """TODO""" The transaction identifier of the transaction that
## committed the version.
## def getName(): version
## """TODO""" The version that the revision is in. If the storage
## doesn't support versions, then this must be an empty
## def getSize(): string.
## """TODO""" user_name
## The user identifier, if any (or an empty string) of the
## def history(oid, version, length=1, filter=None): user on whos behalf the revision was committed.
## """TODO""" description
## The transaction description for the transaction that
## def new_oid(): committed the revision.
## """TODO""" size
## The size of the revision data record.
## def set_max_oid(possible_new_max_oid):
## """TODO""" If the transaction had extension items, then these items are
## also included if they don't conflict with the keys above.
## def registerDB(db): """
## """TODO"""
## def isReadOnly():
## def isReadOnly(): """Test whether a storage allows committing new transactions
## """TODO"""
## For a given storage instance, this method always returns the
## def supportsUndo(): same value. Read-only-ness is a static property of a storage.
## """TODO""" """
##
## def supportsVersions(): def lastTransaction():
## """TODO""" """Return the id of the last committed transaction
## """
## def tpc_abort(transaction):
## """TODO""" def __len__():
## """The approximate number of objects in the storage
## def tpc_begin(transaction):
## """TODO""" This is used soley for informational purposes.
## """
## def tpc_vote(transaction):
## """TODO""" def load(oid, version):
## """Load data for an object id and version
## def tpc_finish(transaction, f=None):
## """TODO""" A data record and serial are returned. The serial is a
## transaction identifier of the transaction that wrote the data
## def getSerial(oid): record.
## """TODO"""
## A POSKeyError is raised if there is no record for the object
## def loadSerial(oid, serial): id and version.
## """TODO"""
## Storages that don't support versions must ignore the version
## def loadBefore(oid, tid): argument.
## """TODO""" """
##
## def getExtensionMethods(): def loadBefore(oid, tid):
## """TODO""" """Load the object data written before a transaction id
##
## def copyTransactionsFrom(): If there isn't data before the object before the given
## """TODO""" transaction, then None is returned, otherwise three values are
## returned:
## def store(oid, oldserial, data, version, transaction):
## """ - The data record
##
## may return the new serial or not - The transaction id of the data record
## """
- The transaction id of the following revision, if any, or None.
"""
def loadSerial(oid, serial):
"""Load the object record for the give transaction id
If a matching data record can be found, it is returned,
otherwise, POSKeyError is raised.
"""
def new_oid():
"""Allocate a new object id.
The object id returned is reserved at least as long as the
storage is opened.
The return value is a string.
"""
def pack(pack_time, referencesf):
"""Pack the storage
It is up to the storage to interpret this call, however, the
general idea is that the storage free space by:
- discarding object revisions that were old and not current as of the
given pack time.
- garbage collecting objects that aren't reachable from the
root object via revisions remaining after discarding
revisions that were not current as of the pack time.
The pack time is given as a UTC time in seconds since the
empoch.
The second argument is a function that should be used to
extract object references from database records. This is
needed to determine which objects are referenced from object
revisions.
"""
def registerDB(db):
"""Register an IStorageDB.
Note that, for historical reasons, an implementation may
require a second argument, however, if required, the None will
be passed as the second argument.
"""
def sortKey():
"""Sort key used to order distributed transactions
When a transaction involved multiple storages, 2-phase commit
operations are applied in sort-key order. This must be unique
among storages used in a transaction. Obviously, the storage
can't assure this, but it should construct the sort key so it
has a reasonable chance of being unique.
"""
def store(oid, serial, data, version, transaction):
"""Store data for the object id, oid.
Arguments:
oid
The object identifier. This is either a string
consisting of 8 nulls or a string previously returned by
new_oid.
serial
The serial of the data that was read when the object was
loaded from the database. If the object was created in
the current transaction this will be a string consisting
of 8 nulls.
data
The data record. This is opaque to the storage.
version
The version to store the data is. If the storage doesn't
support versions, this should be an empty string and the
storage is allowed to ignore it.
transaction
A transaction object. This should match the current
transaction for the storage, set by tpc_begin.
The new serial for the object is returned, but not necessarily
immediately. It may be returned directly, or un a subsequent
store or tpc_vote call.
The return value may be:
- None
- A new serial (string) for the object, or
- An iterable of object-id and serial pairs giving new serials
for objects.
"""
def tpc_abort(transaction):
"""Abort the transaction.
Any changes made by the transaction are discarded.
This call is ignored is the storage is not participating in
two-phase commit or if the given transaction is not the same
as the transaction the storage is commiting.
"""
def tpc_begin(transaction):
"""Begin the two-phase commit process.
If storage is already participating in a two-phase commit
using the same transaction, the call is ignored.
If the storage is already participating in a two-phase commit
using a different transaction, the call blocks until the
current transaction ends (commits or aborts).
"""
def tpc_finish(transaction, func = lambda: None):
"""Finish the transaction, making any transaction changes permanent.
Changes must be made permanent at this point.
This call is ignored if the storage isn't participating in
two-phase commit or if it is commiting a different
transaction. Failure of this method is extremely serious.
"""
def tpc_vote(transaction):
"""Provide a storage with an opportunity to veto a transaction
This call is ignored if the storage isn't participating in
two-phase commit or if it is commiting a different
transaction. Failure of this method is extremely serious.
If a transaction can be committed by a storage, then the
method should return. If a transaction cannot be committed,
then an exception should be raised. If this method returns
without an error, then there must not be an error if
tpc_finish or tpc_abort is called subsequently.
The return value can be either None or a sequence of object-id
and serial pairs giving new serials for objects who's ids were
passed to previous store calls in the same transaction.
After the tpc_vote call, bew serials must have been returned,
either from tpc_vote or store for objects passed to store.
"""
class IStorageRestoreable(IStorage):
def tpc_begin(transaction, tid=None):
"""Begin the two-phase commit process.
If storage is already participating in a two-phase commit
using the same transaction, the call is ignored.
If the storage is already participating in a two-phase commit
using a different transaction, the call blocks until the
current transaction ends (commits or aborts).
If a transaction id is given, then the transaction will use
the given id rather than generating a new id. This is used
when copying already committed transactions from another
storage.
"""
# Note that the current implementation also accepts a status.
# This is an artifact of:
# - Earlier use of an undo status to undo revisions in place,
# and,
# - Incorrect pack garbage-collection algorithms (possibly
# including the existing FileStorage implementation), that
# failed to take into account records after the pack time.
def restore(oid, serial, data, version, prev_txn, transaction):
"""Write data already committed in a separate database
The restore method is used when copying data from one database
to a replica of the database. It differs from store in that
the data have already been committed, so there is no check for
conflicts and no new transaction is is used for the data.
Arguments:
oid
The object id for the record
serial
The transaction identifier that originally committed this object.
data
The record data. This will be None if the transaction
undid the creation of the object.
version
The version identifier for the record
prev_txn
The identifier of a previous transaction that held the
object data. The target storage can sometimes use this
as a hint to save space.
transaction
The current transaction.
Nothing is returned.
"""
class IStorageRecordInformation(Interface):
"""Provide information about a single storage record
"""
oid = Attribute("The object id")
version = Attribute("The version")
data = Attribute("The data record")
class IStorageTransactionInformation(Interface):
"""Provide information about a storage transaction
"""
tid = Attribute("Transaction id")
status = Attribute("Transaction Status") # XXX what are valid values?
user = Attribute("Transaction user")
description = Attribute("Transaction Description")
extension = Attribute("Transaction extension data")
def __iter__():
"""Return an iterable of IStorageTransactionInformation
"""
class IStorageIteration(Interface):
"""API for iterating over the contents of a storage
Note that this is a future API. Some storages now provide an
approximation of this.
"""
def iterator(start=None, stop=None):
"""Return an IStorageTransactionInformation iterator.
An IStorageTransactionInformation iterator is returned for
iterating over the transactions in the storage.
If the start argument is not None, then iteration will start
with the first transaction whos identifier is greater than or
equal to start.
If the stop argument is not None, then iteration will end with
the last transaction whos identifier is less than or equal to
start.
"""
class IStorageUndoable(IStorage): class IStorageUndoable(IStorage):
"""A storage supporting transactional undo. """A storage supporting transactional undo.
""" """
def undo(transaction_id, txn): def supportsUndo():
"""TODO""" """Return True, indicating that the storage supports undo.
"""
def undo(transaction_id, transaction):
"""Undo the transaction corresponding to the given transaction id.
The transaction id is a value returned from undoInfo or
undoLog, which may not be a stored transaction identifier as
used elsewhere in the storage APIs.
This method must only be called in the first phase of
two-phase commit (after tpc_begin but before tpc_vote). It
returns a serial (transaction id) and a sequence of object ids
for objects affected by the transaction.
"""
# Used by DB (Actually, by TransactionalUndo)
def undoLog(first, last, filter=(lambda desc: True)): def undoLog(first, last, filter=None):
"""Return a sequence of descriptions for undoable transactions. """Return a sequence of descriptions for undoable transactions.
Application code should call undoLog() on a DB instance instead of on Application code should call undoLog() on a DB instance instead of on
...@@ -551,8 +857,9 @@ class IStorageUndoable(IStorage): ...@@ -551,8 +857,9 @@ class IStorageUndoable(IStorage):
could be gotten by passing the positive first-last for could be gotten by passing the positive first-last for
`last` instead. `last` instead.
""" """
# DB pass through
def undoInfo(first, last, specification=None): def undoInfo(first=0, last=-20, specification=None):
"""Return a sequence of descriptions for undoable transactions. """Return a sequence of descriptions for undoable transactions.
This is like `undoLog()`, except for the `specification` argument. This is like `undoLog()`, except for the `specification` argument.
...@@ -567,30 +874,39 @@ class IStorageUndoable(IStorage): ...@@ -567,30 +874,39 @@ class IStorageUndoable(IStorage):
ZEO client to its ZEO server (while a ZEO client ignores any `filter` ZEO client to its ZEO server (while a ZEO client ignores any `filter`
argument passed to `undoLog()`). argument passed to `undoLog()`).
""" """
# DB pass-through
class IStoragePackable(Interface):
def pack(t, referencesf): def pack(t, referencesf):
"""TODO""" """Pack the storage
class IStorageVersioning(IStorage): Pack and/or garbage-collect the storage. If the storage does
"""A storage supporting versions. not support undo, then t is ignored. All records for objects
""" that are not reachable from the system root object as of time
t, or as of the current time, if undo is not supported, are
## What follows is the union of methods found across various version storage removed from the storage.
## implementations. Exactly what "the storage API" is and means has
## become fuzzy over time. Methods should be uncommented here, or A storage implementation may treat this method as ano-op. A
## even deleted, as the storage API regains a crisp definition. storage implementation may also delay packing and return
immediately. Storage documentation should define the behavior
## def abortVersion(src, transaction): of this method.
## """TODO""" """
## # Called by DB
## def commitVersion(src, dest, transaction):
## """TODO""" class IStorageCurrentRecordIteration(IStorage):
##
## def modifiedInVersion(oid): def record_iternext(next=None):
## """TODO""" """Iterate over the records in a storage
##
## def versionEmpty(version): Use like this:
## """TODO"""
## >>> next = None
## def versions(max=None): >>> while 1:
## """TODO""" ... oid, tid, data, next = storage.record_iternext(next)
... # do things with oid, tid, and data
... if next is None:
... break
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment