Commit c3ce017f authored by Jim Fulton's avatar Jim Fulton

Fleshed out the storage interfaces.

parent a600d02d
......@@ -18,6 +18,7 @@ $Id$
from zope.interface import Interface, Attribute
class IConnection(Interface):
"""Connection to ZODB for loading and storing objects.
......@@ -289,7 +290,6 @@ class IConnection(Interface):
This invalidates *all* objects in the cache. If the connection
is open, subsequent reads will fail until a new transaction
begins or until the connection os reopned.
"""
class IStorageDB(Interface):
......@@ -345,37 +345,6 @@ class IDatabase(IStorageDB):
TODO: This interface is incomplete.
"""
## __init__ methods don't belong in interfaces:
##
## def __init__(storage,
## pool_size=7,
## cache_size=400,
## version_pool_size=3,
## version_cache_size=100,
## database_name='unnamed',
## databases=None,
## ):
## """Create an object database.
## storage: the storage used by the database, e.g. FileStorage
## pool_size: expected maximum number of open connections
## cache_size: target size of Connection object cache, in number of
## objects
## version_pool_size: expected maximum number of connections (per
## version)
## version_cache_size: target size of Connection object cache for
## version connections, in number of objects
## database_name: when using a multi-database, the name of this DB
## within the database group. It's a (detected) error if databases
## is specified too and database_name is already a key in it.
## This becomes the value of the DB's database_name attribute.
## databases: when using a multi-database, a mapping to use as the
## binding of this DB's .databases attribute. It's intended
## that the second and following DB's added to a multi-database
## pass the .databases attribute set on the first DB added to the
## collection.
## """
databases = Attribute("""\
A mapping from database name to DB (database) object.
......@@ -386,119 +355,456 @@ class IDatabase(IStorageDB):
entry.
""")
def invalidateCache():
"""Invalidate all objects in the database object caches
def open(version='',
mvcc=True,
transaction_manager=None,
synch=True
):
"""Return an IConnection object for use by application code.
version: the "version" that all changes will be made
in, defaults to no version.
mvcc: boolean indicating whether MVCC is enabled
transaction_manager: transaction manager to use. None means
use the default transaction manager.
synch: boolean indicating whether Connection should
register for afterCompletion() calls.
Note that the connection pool is managed as a stack, to
increase the likelihood that the connection's stack will
include useful objects.
"""
invalidateCache will be called on each of the database's connections.
# TODO: Should this method be moved into some subinterface?
def pack(t=None, days=0):
"""Pack the storage, deleting unused object revisions.
A pack is always performed relative to a particular time, by
default the current time. All object revisions that are not
reachable as of the pack time are deleted from the storage.
The cost of this operation varies by storage, but it is
usually an expensive operation.
There are two optional arguments that can be used to set the
pack time: t, pack time in seconds since the epcoh, and days,
the number of days to subtract from t or from the current
time if t is not specified.
"""
# TODO: Should this method be moved into some subinterface?
def undo(id, txn=None):
"""Undo a transaction identified by id.
A transaction can be undone if all of the objects involved in
the transaction were not modified subsequently, if any
modifications can be resolved by conflict resolution, or if
subsequent changes resulted in the same object state.
The value of id should be generated by calling undoLog()
or undoInfo(). The value of id is not the same as a
transaction id used by other methods; it is unique to undo().
id: a storage-specific transaction identifier
txn: transaction context to use for undo().
By default, uses the current transaction.
"""
def close():
"""Close the database and its underlying storage.
It is important to close the database, because the storage may
flush in-memory data structures to disk when it is closed.
Leaving the storage open with the process exits can cause the
next open to be slow.
What effect does closing the database have on existing
connections? Technically, they remain open, but their storage
is closed, so they stop behaving usefully. Perhaps close()
should also close all the Connections.
"""
class IStorage(Interface):
"""A storage is responsible for storing and retrieving data of objects.
"""
## What follows is the union of methods found across various storage
## implementations. Exactly what "the storage API" is and means has
## become fuzzy over time. Methods should be uncommented here, or
## even deleted, as the storage API regains a crisp definition.
## def load(oid, version):
## """TODO"""
##
## def close():
## """TODO"""
##
## def cleanup():
## """TODO"""
##
## def lastSerial():
## """TODO"""
##
## def lastTransaction():
## """TODO"""
##
## def lastTid(oid):
## """Return last serialno committed for object oid."""
##
## def loadSerial(oid, serial):
## """TODO"""
##
## def loadBefore(oid, tid):
## """TODO"""
##
## def iterator(start=None, stop=None):
## """TODO"""
##
## def sortKey():
## """TODO"""
##
## def getName():
## """TODO"""
##
## def getSize():
## """TODO"""
##
## def history(oid, version, length=1, filter=None):
## """TODO"""
##
## def new_oid():
## """TODO"""
##
## def set_max_oid(possible_new_max_oid):
## """TODO"""
##
## def registerDB(db):
## """TODO"""
##
## def isReadOnly():
## """TODO"""
##
## def supportsUndo():
## """TODO"""
##
## def supportsVersions():
## """TODO"""
##
## def tpc_abort(transaction):
## """TODO"""
##
## def tpc_begin(transaction):
## """TODO"""
##
## def tpc_vote(transaction):
## """TODO"""
##
## def tpc_finish(transaction, f=None):
## """TODO"""
##
## def getSerial(oid):
## """TODO"""
##
## def loadSerial(oid, serial):
## """TODO"""
##
## def loadBefore(oid, tid):
## """TODO"""
##
## def getExtensionMethods():
## """TODO"""
##
## def copyTransactionsFrom():
## """TODO"""
##
## def store(oid, oldserial, data, version, transaction):
## """
##
## may return the new serial or not
## """
def close():
"""Close the storage.
"""
def getName():
"""The name of the storage
The format and interpretation of this name is storage
dependent. It could be a file name, a database name, etc.
This is used soley for informational purposes.
"""
def getSize():
"""An approximate size of the database, in bytes.
This is used soley for informational purposes.
"""
def history(oid, version, size=1):
"""Return a sequence of history information dictionaries.
Up to size objects (including no objects) may be returned.
The information provides a log of the changes made to the
object. Data are reported in reverse chronological order.
Each dictionary has the following keys:
time
UTC seconds since the epoch (as in time.time) that the
object revision was committed.
tid
The transaction identifier of the transaction that
committed the version.
version
The version that the revision is in. If the storage
doesn't support versions, then this must be an empty
string.
user_name
The user identifier, if any (or an empty string) of the
user on whos behalf the revision was committed.
description
The transaction description for the transaction that
committed the revision.
size
The size of the revision data record.
If the transaction had extension items, then these items are
also included if they don't conflict with the keys above.
"""
def isReadOnly():
"""Test whether a storage allows committing new transactions
For a given storage instance, this method always returns the
same value. Read-only-ness is a static property of a storage.
"""
def lastTransaction():
"""Return the id of the last committed transaction
"""
def __len__():
"""The approximate number of objects in the storage
This is used soley for informational purposes.
"""
def load(oid, version):
"""Load data for an object id and version
A data record and serial are returned. The serial is a
transaction identifier of the transaction that wrote the data
record.
A POSKeyError is raised if there is no record for the object
id and version.
Storages that don't support versions must ignore the version
argument.
"""
def loadBefore(oid, tid):
"""Load the object data written before a transaction id
If there isn't data before the object before the given
transaction, then None is returned, otherwise three values are
returned:
- The data record
- The transaction id of the data record
- The transaction id of the following revision, if any, or None.
"""
def loadSerial(oid, serial):
"""Load the object record for the give transaction id
If a matching data record can be found, it is returned,
otherwise, POSKeyError is raised.
"""
def new_oid():
"""Allocate a new object id.
The object id returned is reserved at least as long as the
storage is opened.
The return value is a string.
"""
def pack(pack_time, referencesf):
"""Pack the storage
It is up to the storage to interpret this call, however, the
general idea is that the storage free space by:
- discarding object revisions that were old and not current as of the
given pack time.
- garbage collecting objects that aren't reachable from the
root object via revisions remaining after discarding
revisions that were not current as of the pack time.
The pack time is given as a UTC time in seconds since the
empoch.
The second argument is a function that should be used to
extract object references from database records. This is
needed to determine which objects are referenced from object
revisions.
"""
def registerDB(db):
"""Register an IStorageDB.
Note that, for historical reasons, an implementation may
require a second argument, however, if required, the None will
be passed as the second argument.
"""
def sortKey():
"""Sort key used to order distributed transactions
When a transaction involved multiple storages, 2-phase commit
operations are applied in sort-key order. This must be unique
among storages used in a transaction. Obviously, the storage
can't assure this, but it should construct the sort key so it
has a reasonable chance of being unique.
"""
def store(oid, serial, data, version, transaction):
"""Store data for the object id, oid.
Arguments:
oid
The object identifier. This is either a string
consisting of 8 nulls or a string previously returned by
new_oid.
serial
The serial of the data that was read when the object was
loaded from the database. If the object was created in
the current transaction this will be a string consisting
of 8 nulls.
data
The data record. This is opaque to the storage.
version
The version to store the data is. If the storage doesn't
support versions, this should be an empty string and the
storage is allowed to ignore it.
transaction
A transaction object. This should match the current
transaction for the storage, set by tpc_begin.
The new serial for the object is returned, but not necessarily
immediately. It may be returned directly, or un a subsequent
store or tpc_vote call.
The return value may be:
- None
- A new serial (string) for the object, or
- An iterable of object-id and serial pairs giving new serials
for objects.
"""
def tpc_abort(transaction):
"""Abort the transaction.
Any changes made by the transaction are discarded.
This call is ignored is the storage is not participating in
two-phase commit or if the given transaction is not the same
as the transaction the storage is commiting.
"""
def tpc_begin(transaction):
"""Begin the two-phase commit process.
If storage is already participating in a two-phase commit
using the same transaction, the call is ignored.
If the storage is already participating in a two-phase commit
using a different transaction, the call blocks until the
current transaction ends (commits or aborts).
"""
def tpc_finish(transaction, func = lambda: None):
"""Finish the transaction, making any transaction changes permanent.
Changes must be made permanent at this point.
This call is ignored if the storage isn't participating in
two-phase commit or if it is commiting a different
transaction. Failure of this method is extremely serious.
"""
def tpc_vote(transaction):
"""Provide a storage with an opportunity to veto a transaction
This call is ignored if the storage isn't participating in
two-phase commit or if it is commiting a different
transaction. Failure of this method is extremely serious.
If a transaction can be committed by a storage, then the
method should return. If a transaction cannot be committed,
then an exception should be raised. If this method returns
without an error, then there must not be an error if
tpc_finish or tpc_abort is called subsequently.
The return value can be either None or a sequence of object-id
and serial pairs giving new serials for objects who's ids were
passed to previous store calls in the same transaction.
After the tpc_vote call, bew serials must have been returned,
either from tpc_vote or store for objects passed to store.
"""
class IStorageRestoreable(IStorage):
def tpc_begin(transaction, tid=None):
"""Begin the two-phase commit process.
If storage is already participating in a two-phase commit
using the same transaction, the call is ignored.
If the storage is already participating in a two-phase commit
using a different transaction, the call blocks until the
current transaction ends (commits or aborts).
If a transaction id is given, then the transaction will use
the given id rather than generating a new id. This is used
when copying already committed transactions from another
storage.
"""
# Note that the current implementation also accepts a status.
# This is an artifact of:
# - Earlier use of an undo status to undo revisions in place,
# and,
# - Incorrect pack garbage-collection algorithms (possibly
# including the existing FileStorage implementation), that
# failed to take into account records after the pack time.
def restore(oid, serial, data, version, prev_txn, transaction):
"""Write data already committed in a separate database
The restore method is used when copying data from one database
to a replica of the database. It differs from store in that
the data have already been committed, so there is no check for
conflicts and no new transaction is is used for the data.
Arguments:
oid
The object id for the record
serial
The transaction identifier that originally committed this object.
data
The record data. This will be None if the transaction
undid the creation of the object.
version
The version identifier for the record
prev_txn
The identifier of a previous transaction that held the
object data. The target storage can sometimes use this
as a hint to save space.
transaction
The current transaction.
Nothing is returned.
"""
class IStorageRecordInformation(Interface):
"""Provide information about a single storage record
"""
oid = Attribute("The object id")
version = Attribute("The version")
data = Attribute("The data record")
class IStorageTransactionInformation(Interface):
"""Provide information about a storage transaction
"""
tid = Attribute("Transaction id")
status = Attribute("Transaction Status") # XXX what are valid values?
user = Attribute("Transaction user")
description = Attribute("Transaction Description")
extension = Attribute("Transaction extension data")
def __iter__():
"""Return an iterable of IStorageTransactionInformation
"""
class IStorageIteration(Interface):
"""API for iterating over the contents of a storage
Note that this is a future API. Some storages now provide an
approximation of this.
"""
def iterator(start=None, stop=None):
"""Return an IStorageTransactionInformation iterator.
An IStorageTransactionInformation iterator is returned for
iterating over the transactions in the storage.
If the start argument is not None, then iteration will start
with the first transaction whos identifier is greater than or
equal to start.
If the stop argument is not None, then iteration will end with
the last transaction whos identifier is less than or equal to
start.
"""
class IStorageUndoable(IStorage):
"""A storage supporting transactional undo.
"""
def undo(transaction_id, txn):
"""TODO"""
def supportsUndo():
"""Return True, indicating that the storage supports undo.
"""
def undo(transaction_id, transaction):
"""Undo the transaction corresponding to the given transaction id.
The transaction id is a value returned from undoInfo or
undoLog, which may not be a stored transaction identifier as
used elsewhere in the storage APIs.
This method must only be called in the first phase of
two-phase commit (after tpc_begin but before tpc_vote). It
returns a serial (transaction id) and a sequence of object ids
for objects affected by the transaction.
"""
# Used by DB (Actually, by TransactionalUndo)
def undoLog(first, last, filter=(lambda desc: True)):
def undoLog(first, last, filter=None):
"""Return a sequence of descriptions for undoable transactions.
Application code should call undoLog() on a DB instance instead of on
......@@ -551,8 +857,9 @@ class IStorageUndoable(IStorage):
could be gotten by passing the positive first-last for
`last` instead.
"""
# DB pass through
def undoInfo(first, last, specification=None):
def undoInfo(first=0, last=-20, specification=None):
"""Return a sequence of descriptions for undoable transactions.
This is like `undoLog()`, except for the `specification` argument.
......@@ -567,30 +874,39 @@ class IStorageUndoable(IStorage):
ZEO client to its ZEO server (while a ZEO client ignores any `filter`
argument passed to `undoLog()`).
"""
# DB pass-through
class IStoragePackable(Interface):
def pack(t, referencesf):
"""TODO"""
class IStorageVersioning(IStorage):
"""A storage supporting versions.
"""
## What follows is the union of methods found across various version storage
## implementations. Exactly what "the storage API" is and means has
## become fuzzy over time. Methods should be uncommented here, or
## even deleted, as the storage API regains a crisp definition.
## def abortVersion(src, transaction):
## """TODO"""
##
## def commitVersion(src, dest, transaction):
## """TODO"""
##
## def modifiedInVersion(oid):
## """TODO"""
##
## def versionEmpty(version):
## """TODO"""
##
## def versions(max=None):
## """TODO"""
"""Pack the storage
Pack and/or garbage-collect the storage. If the storage does
not support undo, then t is ignored. All records for objects
that are not reachable from the system root object as of time
t, or as of the current time, if undo is not supported, are
removed from the storage.
A storage implementation may treat this method as ano-op. A
storage implementation may also delay packing and return
immediately. Storage documentation should define the behavior
of this method.
"""
# Called by DB
class IStorageCurrentRecordIteration(IStorage):
def record_iternext(next=None):
"""Iterate over the records in a storage
Use like this:
>>> next = None
>>> while 1:
... oid, tid, data, next = storage.record_iternext(next)
... # do things with oid, tid, and data
... if next is None:
... break
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment