Commit f192110f authored by Jim Fulton's avatar Jim Fulton

Merge remote-tracking branch 'zodbdocs/merge-zodb' into merge-zodbdocs

parents becda891 868ac93f
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = bin/sphinx-build
PAPER =
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html web pickle htmlhelp latex changes linkcheck
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " changes to make an overview over all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
clean:
-rm -rf build/*
html:
mkdir -p build/html build/doctrees
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html
@echo
@echo "Build finished. The HTML pages are in build/html."
pickle:
mkdir -p build/pickle build/doctrees
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle
@echo
@echo "Build finished; now you can process the pickle files."
web: pickle
json:
mkdir -p build/json build/doctrees
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
mkdir -p build/htmlhelp build/doctrees
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in build/htmlhelp."
latex:
mkdir -p build/latex build/doctrees
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
@echo
@echo "Build finished; the LaTeX files are in build/latex."
@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
"run these through (pdf)latex."
changes:
mkdir -p build/changes build/doctrees
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes
@echo
@echo "The overview file is in build/changes."
linkcheck:
mkdir -p build/linkcheck build/doctrees
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in build/linkcheck/output.txt."
==================
ZODB documentation
==================
``zodbdocs`` is the source documentation for the website http://zodb.org. It
contains all ZODB relevant documentation like "ZODB/ZEO Programming Guide",
some ZODB articles and links to the ZODB release notes.
Building the documentation
--------------------------
All documentation is formatted as restructured text. To generate HTML using
Sphinx, use the following::
python bootstrap.py
./bin/buildout
make html
# -*- coding: utf-8 -*-
#
# ZODB documentation and articles documentation build configuration file, created by
# sphinx-quickstart on Sat Feb 21 09:17:33 2009.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# The contents of this file are pickled, so don't put values in the namespace
# that aren't pickleable (module imports are okay, they're removed automatically).
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If your extensions are in another directory, add it here. If the directory
# is relative to the documentation root, use os.path.abspath to make it
# absolute, like shown here.
#sys.path.append(os.path.abspath('.'))
# General configuration
# ---------------------
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc',
'j1m.sphinxautointerface',
'j1m.sphinxautozconfig']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['.templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'ZODB'
copyright = u'2009-2016, Zope Foundation'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '5.0'
# The full version, including alpha/beta/rc tags.
#release = '3.10.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
exclude_patterns = [
'README.rst',
'eggs/**',
]
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# Options for HTML output
# -----------------------
# The style sheet to use for HTML and HTML Help pages. A file of that name
# must exist either in Sphinx' static/ path, or in one of the custom paths
# given in html_static_path.
#html_style = 'default.css'
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = 'zodb.png'
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = 'zodb.ico'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['.static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_use_modindex = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, the reST sources are included in the HTML build as _sources/<name>.
#html_copy_source = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = ''
# Output file base name for HTML help builder.
htmlhelp_basename = 'ZODBdocumentationandarticlesdoc'
# Options for LaTeX output
# ------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, document class [howto/manual]).
latex_documents = [
('index', 'ZODBdocumentationandarticles.tex', ur'ZODB documentation and articles',
ur'Zope Developer Community', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_use_modindex = True
An overview of the ZODB (by Laurence Rowe)
==========================================
ZODB in comparison to relational databases, transactions, scalability and best
practice. Originally delivered to the Plone Conference 2007, Naples.
Comparison to other database types
----------------------------------
**Relational Databases** are great at handling large quantities of homogenous
data. If you’re building a ledger system a Relational Database is a great fit.
But Relational Databases only support hierarchical data structures to a
limited degree. Using foreign-key relationships must refer to a single table,
so only a single type can be contained.
**Hierarchical databases** (such as LDAP or a filesystem) are much more
suitable for modelling the flexible containment hierarchies required for
content management applications. But most of these systems do not support
transactional semantics. ORMs such as `SQLAlchemy
<http://www.sqlalchemy.org>`_. make working with Relational Databases in an
object orientated manner much more pleasant. But they don’t overcome the
restrictions inherent in a relational model.
The **ZODB** is an (almost) transparent python object persistence system,
heavily influenced by Smalltalk. As an Object-Orientated Database it gives you
the flexibility to build a data model fit your application. For the most part
you don’t have to worry about persistency - you only work with python objects
and it just happens in the background.
Of course this power comes at a price. While changing the methods your classes
provide is not a problem, changing attributes can necessitate writing a
migration script, as you would with a relational schema change. With ZODB
obejcts though explicit schema migrations are not enforced, which can bite you
later.
Transactions
------------
The ZODB has a transactional support at its core. Transactions provide
concurrency control and atomicity. Transactions are executed as if they have
exclusive access to the data, so as an application developer you don’t have to
worry about threading. Of course there is nothing to prevent two simultaneous
conflicting requests, So checks are made at transaction commit time to ensure
consistency.
Since Zope 2.8 ZODB has implemented **Multi Version Concurrency Control**.
This means no more ReadConflictErrors, each transaction is guaranteed to be
able to load any object as it was when the transaction begun.
You may still see (Write) **ConflictErrors**. These can be minimised using
data structures that support conflict resolution, primarily B-Trees in the
BTrees library. These scalable data structures are used in Large Plone Folders
and many parts of Zope. One downside is that they don’t support user definable
ordering.
The hot points for ConflictErrors are the catalogue indexes. Some of the
indexes do not support conflict resolution and you will see ConflictErrors
under write-intensive loads. On solution is to defer catalogue updates using
`QueueCatalog <http://pypi.python.org/pypi/Products.QueueCatalog>`_
(`PloneQueueCatalog
<http://pypi.python.org/pypi/Products.PloneQueueCatalog>`_), which allows
indexing operations to be serialized using a seperate ZEO client. This can
bring big performance benefits as request retries are reduced, but the
downside is that index updates are no longer reflected immediately in the
application. Another alternative is to offload text indexing to a dedicated
search engine using `collective.solr
<http://pypi.python.org/pypi/collective.solr>`_.
This brings us to **Atomicity**, the other key feature of ZODB transactions. A
transaction will either succeed or fail, your data is never left in an
inconsistent state if an error occurs. This makes Zope a forgiving system to
work with.
You must though be careful with interactions with external systems. If a
ConflictError occurs Zope will attempt to replay a transaction up to three
times. Interactions with an external system should be made through a Data
Manager that participates in the transaction. If you’re talking to a database
use a Zope DA or a SQLAlchemy wrapper like `zope.sqlalchemy
<http://pypi.python.org/pypi/zope.sqlalchemy>`_.
Unfortunately the default MailHost implementation used by Plone is not
transaction aware. With it you can see duplicate emails sent. If this is a
problem use TransactionalMailHost.
Scalability Python is limited to a single CPU by the Global Interpreter Lock,
but that’s ok, ZEO lets us run multiple Zope Application servers sharing a
single database. You should run one Zope client for each processor on your
server. ZEO also lets you connect a debug session to your database at the same
time as your Zope web server, invaluable for debugging.
ZEO tends to be IO bound, so the GIL is not an issue.
ZODB also supports **partitioning**, allowing you to spread data over multiple
storages. However you should be careful about cross database references
(especially when copying and pasting between two databases) as they can be
problematic.
Another common reason to use partitioning is because the ZODB in memory cache
settings are made per database. Separating the catalogue into another storage
lets you set a higher target cache size for catalogue objects than for your
content objects. As much of the Plone interface is catalogue driven this can
have a significant performance benefit, especially on a large site.
.. image:: images/zeo-diagram.png
Storage Options
---------------
**FileStorage** is the default. Everything in one big Data.fs file, which is
essentially a transaction log. Use this unless you have a very good reason not
to.
**DirectoryStorage** (`site <http://dirstorage.sourceforge.net>`_) stores one
file per object revision. Does not require the Data.fs.index to be rebuilt on
an unclean shutdown (which can take a significant time for a large database).
Small number of users.
**RelStorage** (`pypi <http://pypi.python.org/pypi/RelStorage>`_) stores
pickles in a relational database. PostgreSQL, MySQL and Oracle are supported
and no ZEO server is required. You benefit from the faster network layers of
these database adapters. However, conflict resolution is moved to the
application server, which can be bad for worst case performance when you have
high network latency.
BDBStorage, OracleStorage, PGStorage and APE have now fallen by the wayside.
Other features
--------------
**Savepoints** (previously sub-transactions) allow fine grained error control
and objects to be garbage collected during a transaction, saving memory.
Versions are deprecated (and will be removed in ZODB 3.9). The application
layer is responsible for versioning, e.g. CMFEditions / ZopeVersionControl.
**Undo**, don’t rely on it! If your object is indexed it may prove impossible
to undo the transaction (independently) if a later transaction has changed the
same index. Undo is only performed on a single database, so if you have
separated out your catalogue it will get out of sync. Fine for undoing in
portal_skins/custom though.
**BLOBs** are new in ZODB 3.8 / Zope 2.11, bringing efficient large file
support. Great for document management applications.
**Packing** removes old revisions of objects. Similar to `Routine Vacuuming
<http://www.postgresql.org/docs/8.3/static/routine-vacuuming.html>`_ in
PostgreSQL.
Some best practice
------------------
**Don’t write on read**. Your Data.fs should not grow on a read. Beware of
setDefault and avoid inplace migration.
**Keep your code on the filesystem**. Too much stuff in the custom folder will
just lead to pain further down the track. Though this can be very convenient
for getting things done when they are needed yesterday...
**Use scalable data structures** such as BTrees. Keep your content objects
simple, add functionality with adapters and views.
This diff is collapsed.
This diff is collapsed.
ZODB articles
=============
Contents
--------
.. toctree::
:maxdepth: 2
ZODB-overview.rst
ZODB1.rst
ZODB2.rst
old-guide/index
multi-zodb-gc.rst
Other ZODB Resources
--------------------
- IBM developerWorks `Example-driven ZODB
<http://www.ibm.com/developerworks/aix/library/au-zodb/>`_
- `How To Love ZODB and Forget RDBMS
<http://zope.org/Members/adytumsolutions/HowToLoveZODB_PartI>`_
- `Very old ZODB wiki <http://www.zope.org/Members/jim/ZODB/FrontPage>`_
Using zc.zodbdgc (fix PosKeyError's)
====================================
*This article was written by Hanno Schlichting*
The `zc.zodbdgc <http://pypi.python.org/pypi/zc.zodbdgc>`_ library contains two
useful features. On the one hand it supports advanced ZODB packing and garbage
collection approaches and on the other hand it includes the ability to create a
database of all persistent references.
The second feature allows us to debug and repair PosKeyErrors by finding the
persistent object(s) that point to the lost object.
Note: This documentation applies to ZODB 3.9 and later. Earlier versions of the
ZODB are not supported, as they lack the fast storage iteration API's required
by `zc.zodbdgc`.
This documentation does not apply to
`RelStorage <http://pypi.python.org/pypi/RelStorage>`_ which has the same
features built-in, but accessible in different ways. Look at the options for
the `zodbpack` script. The `--prepack` option creates a table containing the
same information as we are creating in the reference database.
Setup
-----
We'll assume you are familiar with a buildout setup. A typical config might
look like this::
[buildout]
parts =
zeo
zeopy
zeo-conf
zodbdgc
refdb-conf
[zeo]
recipe = plone.recipe.zeoserver
zeo-address = 127.0.0.1:8100
blob-storage = ${buildout:directory}/var/blobstorage
pack-gc = false
pack-keep-old = false
[zeopy]
recipe = zc.recipe.egg
eggs =
ZODB3
zc.zodbdgc
interpreter = zeopy
scripts = zeopy
[zeo-conf]
recipe = collective.recipe.template
input = inline:
<zodb main>
<zeoclient>
blob-dir ${buildout:directory}/var/blobstorage
shared-blob-dir yes
server ${zeo:zeo-address}
storage 1
name zeostorage
var ${buildout:directory}/var
</zeoclient>
</zodb>
output = ${buildout:directory}/etc/zeo.conf
[zodbdgc]
recipe = zc.recipe.egg
eggs = zc.zodbdgc
[refdb-conf]
recipe = collective.recipe.template
input = inline:
<zodb main>
<filestorage 1>
path ${buildout:directory}/var/refdb.fs
</filestorage>
</zodb>
output = ${buildout:directory}/etc/refdb.conf
Garbage collection
------------------
We configured the ZEO server to skip garbage collection as part of the normal
pack in the above config (`pack-gc = false`). Instead we use explicit garbage
collection via a different job::
bin/multi-zodb-gc etc/zeo.conf
On larger databases garbage collection can take a couple hours. We can run this
only once a week or even less frequent. All explicitly deleted objects will
still be packed away by the normal pack, so the database doesn't grow
out-of-bound. We can also run the analysis against a database copy, taking away
load from the live database and only write the resulting deletions to the
production database.
Packing
-------
We can do regular packing every day while the ZEO server is running, via::
bin/zeopack
Packing without garbage collection is much faster.
Reference analysis and POSKeyErrors
-----------------------------------
If our database has any POSKeyErrors, we can find and repair those.
Either we already have the oids of lost objects, or we can check the entire
database for any errors. To check everything we run the following command::
$ bin/multi-zodb-check-refs etc/zeo.conf
This can take about 15 to 30 minutes on moderately sized databases of up to
10gb, dependent on disk speed. We'll write down the reported errors, as we'll
need them later on to analyze them.
If there are any lost objects, we can create a reference database to make it
easier to debug and find those lost objects::
$ bin/multi-zodb-check-refs -r var/refdb.fs etc/zeo.conf
This is significantly slower and can take several hours to complete. Once this
is complete we can open the generated database via our interpreter::
$ bin/zeopy
>>> import ZODB.config
>>> db = ZODB.config.databaseFromFile(open('./etc/refdb.conf'))
>>> conn = db.open()
>>> refs = conn.root()['references']
If we've gotten this error report::
!!! main 13184375 ?
POSKeyError: 0xc92d77
We can look up the persistent oid it was referenced from via::
>>> parent = list(refs['main'][13184375])
>>> parent
[13178389]
We can also get the hex representation::
>>> from ZODB.utils import p64
>>> p64(parent[0])
'\x00\x00\x00\x00\x00\xc9\x16\x15'
With this information, we should get back to our actual database and look
up this object. We'll leave the ref db open, as we might need to recursively
look up some more objects, until we get one we can identify and work on.
We could load the parent. In a debug prompt we could do something like::
>>> app._p_jar.get('\x00\x00\x00\x00\x00\xc9\x16\x15')
2010-04-28 14:28:28 ERROR ZODB.Connection Couldn't load state for 0xc91615
Traceback (most recent call last):
...
ZODB.POSException.POSKeyError: 0xc92d77
Gah, this gives us the POSKeyError of course. But we can load the actual data
of the parent, to get an idea of what this is::
>>> app._p_jar.db()._storage.load('\x00\x00\x00\x00\x00\xc9\x16\x15', '')
('cBTrees.IOBTree
IOBucket
q\x01.((J$KT\x02ccopy_reg
_reconstructor
q\x02(cfive.intid.keyreference
KeyReferenceToPersistent
...
Now we can be real evil and create a new fake object in place of the missing
one::
>>> import transaction
>>> transaction.begin()
The persistent oid that was reported missing was ``13184375``::
>>> from ZODB.utils import p64
>>> p64(13184375)
'\x00\x00\x00\x00\x00\xc9-w'
>>> from persistent import Persistent
>>> a = Persistent()
>>> a._p_oid = '\x00\x00\x00\x00\x00\xc9-w'
We cannot use the ``add`` method of the connection, as this would assign the
object a new persistent oid. So we replicate its internals here::
>>> a._p_jar = app._p_jar
>>> app._p_jar._register(a)
>>> app._p_jar._added[a._p_oid] = a
>>> transaction.commit()
Both getting the object as well as its parent will work now::
>>> app._p_jar.get('\x00\x00\x00\x00\x00\xc9-w')
<persistent.Persistent object at 0xa3e348c>
>>> app._p_jar.get('\x00\x00\x00\x00\x00\xc9\x16\x15')
BTrees.IOBTree.IOBucket([(39078692, <five.intid.keyreference...
Once we are finished we should be nice and close all databases::
>>> conn.close()
>>> db.close()
Depending on the class of object that went missing, we might need to use a
different persistent class, like a persistent mapping or a BTree bucket.
In general it's best to remove the parent object and thus our fake object from
the database and rebuild the data structure again via the proper application
level API's.
This directory contains Andrew Kuchling's programmer's guide to ZODB
and ZEO. The tex source was not being updated in the ZODB docs directory
It was originally taken from Andrew's zodb.sf.net project on
SourceForge. Because the original version is no longer updated, this
version [in the zodb docs dir] is best viewed as an independent fork now.
Write section on __setstate__
Continue working on it
Suppress the full GFDL text in the PDF/PS versions
import sys, time, os, random
import transaction
from persistent import Persistent
from ZEO import ClientStorage
import ZODB
from ZODB.POSException import ConflictError
from BTrees import OOBTree
class ChatSession(Persistent):
"""Class for a chat session.
Messages are stored in a B-tree, indexed by the time the message
was created. (Eventually we'd want to throw messages out,
add_message(message) -- add a message to the channel
new_messages() -- return new messages since the last call to
this method
"""
def __init__(self, name):
"""Initialize new chat session.
name -- the channel's name
"""
self.name = name
# Internal attribute: _messages holds all the chat messages.
self._messages = OOBTree.OOBTree()
def new_messages(self):
"Return new messages."
# self._v_last_time is the time of the most recent message
# returned to the user of this class.
if not hasattr(self, '_v_last_time'):
self._v_last_time = 0
new = []
T = self._v_last_time
for T2, message in self._messages.items():
if T2 > T:
new.append( message )
self._v_last_time = T2
return new
def add_message(self, message):
"""Add a message to the channel.
message -- text of the message to be added
"""
while 1:
try:
now = time.time()
self._messages[ now ] = message
transaction.commit()
except ConflictError:
# Conflict occurred; this process should abort,
# wait for a little bit, then try again.
transaction.abort()
time.sleep(.2)
else:
# No ConflictError exception raised, so break
# out of the enclosing while loop.
break
# end while
def get_chat_session(conn, channelname):
"""Return the chat session for a given channel, creating the session
if required."""
# We'll keep a B-tree of sessions, mapping channel names to
# session objects. The B-tree is stored at the ZODB's root under
# the key 'chat_sessions'.
root = conn.root()
if not root.has_key('chat_sessions'):
print 'Creating chat_sessions B-tree'
root['chat_sessions'] = OOBTree.OOBTree()
transaction.commit()
sessions = root['chat_sessions']
# Get a session object corresponding to the channel name, creating
# it if necessary.
if not sessions.has_key( channelname ):
print 'Creating new session:', channelname
sessions[ channelname ] = ChatSession(channelname)
transaction.commit()
session = sessions[ channelname ]
return session
if __name__ == '__main__':
if len(sys.argv) != 2:
print 'Usage: %s <channelname>' % sys.argv[0]
sys.exit(0)
storage = ClientStorage.ClientStorage( ('localhost', 9672) )
db = ZODB.DB( storage )
conn = db.open()
s = session = get_chat_session(conn, sys.argv[1])
messages = ['Hi.', 'Hello', 'Me too', "I'M 3L33T!!!!"]
while 1:
# Send a random message
msg = random.choice(messages)
session.add_message( '%s: pid %i' % (msg,os.getpid() ))
# Display new messages
for msg in session.new_messages():
print msg
# Wait for a few seconds
pause = random.randint( 1, 4 )
time.sleep( pause )
# Use the python docs converter to convert to rst
# Requires http://svn.python.org/projects/doctools/converter
from converter import restwriter, convert_file
import sys
import os
if __name__ == '__main__':
try:
rootdir = sys.argv[1]
destdir = os.path.abspath(sys.argv[2])
except IndexError:
print "usage: convert.py docrootdir destdir"
sys.exit()
os.chdir(rootdir)
class IncludeRewrite:
def get(self, a, b=None):
if os.path.exists(a + '.tex'):
return a + '.rst'
print "UNKNOWN FILE %s" % a
return a
restwriter.includes_mapping = IncludeRewrite()
for infile in os.listdir('.'):
if infile.endswith('.tex'):
convert_file(infile, os.path.join(destdir, infile[:-3]+'rst'))
This diff is collapsed.
===============================
Very old ZODB programming guide
===============================
This guide is based heavily on the work of A. M. Kuchling who wrote the
original guide back in 2002 and which was published under the GNU Free
Documentation License, Version 1.1. See the appendix entitled "GNU Free
Documentation License" for more information.
.. toctree::
:maxdepth: 2
introduction.rst
prog-zodb.rst
zeo.rst
transactions.rst
modules.rst
links.rst
gfdl.rst
.. % Introduction
.. % What is ZODB?
.. % What is ZEO?
.. % OODBs vs. Relational DBs
.. % Other OODBs
Introduction
============
This guide explains how to write Python programs that use the Z Object Database
(ZODB) and Zope Enterprise Objects (ZEO). The latest version of the guide is
always available at `<http://www.zope.org/Wikis/ZODB/guide/index.html>`_.
What is the ZODB?
-----------------
The ZODB is a persistence system for Python objects. Persistent programming
languages provide facilities that automatically write objects to disk and read
them in again when they're required by a running program. By installing the
ZODB, you add such facilities to Python.
It's certainly possible to build your own system for making Python objects
persistent. The usual starting points are the :mod:`pickle` module, for
converting objects into a string representation, and various database modules,
such as the :mod:`gdbm` or :mod:`bsddb` modules, that provide ways to write
strings to disk and read them back. It's straightforward to combine the
:mod:`pickle` module and a database module to store and retrieve objects, and in
fact the :mod:`shelve` module, included in Python's standard library, does this.
The downside is that the programmer has to explicitly manage objects, reading an
object when it's needed and writing it out to disk when the object is no longer
required. The ZODB manages objects for you, keeping them in a cache, writing
them out to disk when they are modified, and dropping them from the cache if
they haven't been used in a while.
OODBs vs. Relational DBs
------------------------
Another way to look at it is that the ZODB is a Python-specific object-oriented
database (OODB). Commercial object databases for C++ or Java often require that
you jump through some hoops, such as using a special preprocessor or avoiding
certain data types. As we'll see, the ZODB has some hoops of its own to jump
through, but in comparison the naturalness of the ZODB is astonishing.
Relational databases (RDBs) are far more common than OODBs. Relational databases
store information in tables; a table consists of any number of rows, each row
containing several columns of information. (Rows are more formally called
relations, which is where the term "relational database" originates.)
Let's look at a concrete example. The example comes from my day job working for
the MEMS Exchange, in a greatly simplified version. The job is to track process
runs, which are lists of manufacturing steps to be performed in a semiconductor
fab. A run is owned by a particular user, and has a name and assigned ID
number. Runs consist of a number of operations; an operation is a single step
to be performed, such as depositing something on a wafer or etching something
off it.
Operations may have parameters, which are additional information required to
perform an operation. For example, if you're depositing something on a wafer,
you need to know two things: 1) what you're depositing, and 2) how much should
be deposited. You might deposit 100 microns of silicon oxide, or 1 micron of
copper.
Mapping these structures to a relational database is straightforward::
CREATE TABLE runs (
int run_id,
varchar owner,
varchar title,
int acct_num,
primary key(run_id)
);
CREATE TABLE operations (
int run_id,
int step_num,
varchar process_id,
PRIMARY KEY(run_id, step_num),
FOREIGN KEY(run_id) REFERENCES runs(run_id),
);
CREATE TABLE parameters (
int run_id,
int step_num,
varchar param_name,
varchar param_value,
PRIMARY KEY(run_id, step_num, param_name)
FOREIGN KEY(run_id, step_num)
REFERENCES operations(run_id, step_num),
);
In Python, you would write three classes named :class:`Run`, :class:`Operation`,
and :class:`Parameter`. I won't present code for defining these classes, since
that code is uninteresting at this point. Each class would contain a single
method to begin with, an :meth:`__init__` method that assigns default values,
such as 0 or ``None``, to each attribute of the class.
It's not difficult to write Python code that will create a :class:`Run` instance
and populate it with the data from the relational tables; with a little more
effort, you can build a straightforward tool, usually called an object-
relational mapper, to do this automatically. (See
`<http://www.amk.ca/python/unmaintained/ordb.html>`_ for a quick hack at a
Python object-relational mapper, and
`<http://www.python.org/workshops/1997-10/proceedings/shprentz.html>`_ for Joel
Shprentz's more successful implementation of the same idea; Unlike mine,
Shprentz's system has been used for actual work.)
However, it is difficult to make an object-relational mapper reasonably quick; a
simple-minded implementation like mine is quite slow because it has to do
several queries to access all of an object's data. Higher performance object-
relational mappers cache objects to improve performance, only performing SQL
queries when they actually need to.
That helps if you want to access run number 123 all of a sudden. But what if
you want to find all runs where a step has a parameter named 'thickness' with a
value of 2.0? In the relational version, you have two unappealing choices:
#. Write a specialized SQL query for this case: ``SELECT run_id FROM operations
WHERE param_name = 'thickness' AND param_value = 2.0``
If such queries are common, you can end up with lots of specialized queries.
When the database tables get rearranged, all these queries will need to be
modified.
#. An object-relational mapper doesn't help much. Scanning through the runs
means that the the mapper will perform the required SQL queries to read run #1,
and then a simple Python loop can check whether any of its steps have the
parameter you're looking for. Repeat for run #2, 3, and so forth. This does a
vast number of SQL queries, and therefore is incredibly slow.
An object database such as ZODB simply stores internal pointers from object to
object, so reading in a single object is much faster than doing a bunch of SQL
queries and assembling the results. Scanning all runs, therefore, is still
inefficient, but not grossly inefficient.
What is ZEO?
------------
The ZODB comes with a few different classes that implement the :class:`Storage`
interface. Such classes handle the job of writing out Python objects to a
physical storage medium, which can be a disk file (the :class:`FileStorage`
class), a BerkeleyDB file (:class:`BDBFullStorage`), a relational database
(:class:`DCOracleStorage`), or some other medium. ZEO adds
:class:`ClientStorage`, a new :class:`Storage` that doesn't write to physical
media but just forwards all requests across a network to a server. The server,
which is running an instance of the :class:`StorageServer` class, simply acts as
a front-end for some physical :class:`Storage` class. It's a fairly simple
idea, but as we'll see later on in this document, it opens up many
possibilities.
About this guide
----------------
The primary author of this guide works on a project which uses the ZODB and ZEO
as its primary storage technology. We use the ZODB to store process runs and
operations, a catalog of available processes, user information, accounting
information, and other data. Part of the goal of writing this document is to
make our experience more widely available. A few times we've spent hours or
even days trying to figure out a problem, and this guide is an attempt to gather
up the knowledge we've gained so that others don't have to make the same
mistakes we did while learning.
The author's ZODB project is described in a paper available here,
`<http://www.amk.ca/python/writing/mx-architecture/>`_
This document will always be a work in progress. If you wish to suggest
clarifications or additional topics, please send your comments to the
`ZODB-dev mailing list <https://groups.google.com/forum/#!forum/zodb>`_.
Acknowledgements
----------------
Andrew Kuchling wrote the original version of this guide, which provided some of
the first ZODB documentation for Python programmers. His initial version has
been updated over time by Jeremy Hylton and Tim Peters.
I'd like to thank the people who've pointed out inaccuracies and bugs, offered
suggestions on the text, or proposed new topics that should be covered: Jeff
Bauer, Willem Broekema, Thomas Guettler, Chris McDonough, George Runyan.
.. % links.tex
.. % Collection of relevant links
Resources
=========
Introduction to the Zope Object Database, by Jim Fulton: --- Goes into much
greater detail, explaining advanced uses of the ZODB and how it's actually
implemented. A definitive reference, and highly recommended. ---
`<http://www.python.org/workshops/2000-01/proceedings/papers/fulton/zodb3.html>`_
Persistent Programing with ZODB, by Jeremy Hylton and Barry Warsaw: --- Slides
for a tutorial presented at the 10th Python conference. Covers much of the same
ground as this guide, with more details in some areas and less in others. ---
`<http://www.zope.org/Members/bwarsaw/ipc10-slides>`_
This diff is collapsed.
This diff is collapsed.
.. % Transactions and Versioning
.. % Committing and Aborting
.. % Subtransactions
.. % Undoing
.. % Versions
.. % Multithreaded ZODB Programs
Transactions and Versioning
===========================
Committing and Aborting
-----------------------
Changes made during a transaction don't appear in the database until the
transaction commits. This is done by calling the :meth:`commit` method of the
current :class:`Transaction` object, where the latter is obtained from the
:meth:`get` method of the current transaction manager. If the default thread
transaction manager is being used, then ``transaction.commit()`` suffices.
Similarly, a transaction can be explicitly aborted (all changes within the
transaction thrown away) by invoking the :meth:`abort` method of the current
:class:`Transaction` object, or simply ``transaction.abort()`` if using the
default thread transaction manager.
Prior to ZODB 3.3, if a commit failed (meaning the ``commit()`` call raised an
exception), the transaction was implicitly aborted and a new transaction was
implicitly started. This could be very surprising if the exception was
suppressed, and especially if the failing commit was one in a sequence of
subtransaction commits.
So, starting with ZODB 3.3, if a commit fails, all further attempts to commit,
join, or register with the transaction raise
:exc:`ZODB.POSException.TransactionFailedError`. You must explicitly start a
new transaction then, either by calling the :meth:`abort` method of the current
transaction, or by calling the :meth:`begin` method of the current transaction's
transaction manager.
Subtransactions
---------------
Subtransactions can be created within a transaction. Each subtransaction can be
individually committed and aborted, but the changes within a subtransaction are
not truly committed until the containing transaction is committed.
The primary purpose of subtransactions is to decrease the memory usage of
transactions that touch a very large number of objects. Consider a transaction
during which 200,000 objects are modified. All the objects that are modified in
a single transaction have to remain in memory until the transaction is
committed, because the ZODB can't discard them from the object cache. This can
potentially make the memory usage quite large. With subtransactions, a commit
can be be performed at intervals, say, every 10,000 objects. Those 10,000
objects are then written to permanent storage and can be purged from the cache
to free more space.
To commit a subtransaction instead of a full transaction, pass a true value to
the :meth:`commit` or :meth:`abort` method of the :class:`Transaction` object.
::
# Commit a subtransaction
transaction.commit(True)
# Abort a subtransaction
transaction.abort(True)
A new subtransaction is automatically started upon successful committing or
aborting the previous subtransaction.
Undoing Changes
---------------
Some types of :class:`Storage` support undoing a transaction even after it's
been committed. You can tell if this is the case by calling the
:meth:`supportsUndo` method of the :class:`DB` instance, which returns true if
the underlying storage supports undo. Alternatively you can call the
:meth:`supportsUndo` method on the underlying storage instance.
If a database supports undo, then the :meth:`undoLog(start, end[, func])` method
on the :class:`DB` instance returns the log of past transactions, returning
transactions between the times *start* and *end*, measured in seconds from the
epoch. If present, *func* is a function that acts as a filter on the
transactions to be returned; it's passed a dictionary representing each
transaction, and only transactions for which *func* returns true will be
included in the list of transactions returned to the caller of :meth:`undoLog`.
The dictionary contains keys for various properties of the transaction. The
most important keys are ``id``, for the transaction ID, and ``time``, for the
time at which the transaction was committed. ::
>>> print storage.undoLog(0, sys.maxint)
[{'description': '',
'id': 'AzpGEGqU/0QAAAAAAAAGMA',
'time': 981126744.98,
'user_name': ''},
{'description': '',
'id': 'AzpGC/hUOKoAAAAAAAAFDQ',
'time': 981126478.202,
'user_name': ''}
...
To store a description and a user name on a commit, get the current transaction
and call the :meth:`note(text)` method to store a description, and the
:meth:`setUser(user_name)` method to store the user name. While :meth:`setUser`
overwrites the current user name and replaces it with the new value, the
:meth:`note` method always adds the text to the transaction's description, so it
can be called several times to log several different changes made in the course
of a single transaction. ::
transaction.get().setUser('amk')
transaction.get().note('Change ownership')
To undo a transaction, call the :meth:`DB.undo(id)` method, passing it the ID of
the transaction to undo. If the transaction can't be undone, a
:exc:`ZODB.POSException.UndoError` exception will be raised, with the message
"non-undoable transaction". Usually this will happen because later transactions
modified the objects affected by the transaction you're trying to undo.
After you call :meth:`undo` you must commit the transaction for the undo to
actually be applied. [#]_ There is one glitch in the undo process. The thread
that calls undo may not see the changes to the object until it calls
:meth:`Connection.sync` or commits another transaction.
Versions
--------
.. warning::
Versions should be avoided. They're going to be deprecated, replaced by better
approaches to long-running transactions.
While many subtransactions can be contained within a single regular transaction,
it's also possible to contain many regular transactions within a long-running
transaction, called a version in ZODB terminology. Inside a version, any number
of transactions can be created and committed or rolled back, but the changes
within a version are not made visible to other connections to the same ZODB.
Not all storages support versions, but you can test for versioning ability by
calling :meth:`supportsVersions` method of the :class:`DB` instance, which
returns true if the underlying storage supports versioning.
A version can be selected when creating the :class:`Connection` instance using
the :meth:`DB.open([*version*])` method. The *version* argument must be a string
that will be used as the name of the version. ::
vers_conn = db.open(version='Working version')
Transactions can then be committed and aborted using this versioned connection.
Other connections that don't specify a version, or provide a different version
name, will not see changes committed within the version named ``Working
version``. To commit or abort a version, which will either make the changes
visible to all clients or roll them back, call the :meth:`DB.commitVersion` or
:meth:`DB.abortVersion` methods. XXX what are the source and dest arguments for?
The ZODB makes no attempt to reconcile changes between different versions.
Instead, the first version which modifies an object will gain a lock on that
object. Attempting to modify the object from a different version or from an
unversioned connection will cause a :exc:`ZODB.POSException.VersionLockError` to
be raised::
from ZODB.POSException import VersionLockError
try:
transaction.commit()
except VersionLockError, (obj_id, version):
print ('Cannot commit; object %s '
'locked by version %s' % (obj_id, version))
The exception provides the ID of the locked object, and the name of the version
having a lock on it.
Multithreaded ZODB Programs
---------------------------
ZODB databases can be accessed from multithreaded Python programs. The
:class:`Storage` and :class:`DB` instances can be shared among several threads,
as long as individual :class:`Connection` instances are created for each thread.
.. rubric:: Footnotes
.. [#] There are actually two different ways a storage can implement the undo feature.
Most of the storages that ship with ZODB use the transactional form of undo
described in the main text. Some storages may use a non-transactional undo
makes changes visible immediately.
This diff is collapsed.
======================
ZODB programming guide
======================
This guide consists of a collection of topics that should be of
interest to most developers. They're provuded in order of importance,
which is also an order from least to most advanced, but they can be
read in any order.
If you haven't yet, you should read the :ref:`Tutorial <tutorial-label>`.
.. toctree::
:maxdepth: 2
install-and-run
writing-persistent-objects.rst
.. todo:
transaction.rst
storages.rst
configuration.rst
threading.rst
packing-and-garbage-collection.rst
blobs.rst
multi-databases.rst
==========================
Installing and running ZODB
===========================
This topic discusses some boring nitty-gritty details needed to
actually run ZODB.
Installation
============
Installation of ZODB is pretty straightforward using Python's
packaging system. For example, using pip::
pip install ZODB
You may need additional optional packages, such as `ZEO
<https://pypi.python.org/pypi/ZEO>`_ or `RelStorage
<https://pypi.python.org/pypi/RelStorage>`_, depending your deployment
choices.
Configuration
=============
You can set up ZODB in your application using either Python, or
ZODB's configuration language. For simple database setup, and
especially for exploration, the Python APIs are sufficient.
For more complex configurations, you'll probably find ZODB's
configuration language easier to use.
To understand database setup, it's important to understand ZODB's
architecture. ZODB separates database functionality
from storage concerns. When you create a database object,
you specify a storage object for it to use, as in::
import ZODB, ZODB.FileStorage
storage = ZODB.FileStorage.FileStorage('mydata.fs')
db = ZODB.DB(storage)
So when you define a database, you'll also define a storage. In the
example above, we define a :class:`file storage
<ZODB.FileStorage.FileStorage.FileStorage>` and then use it to define
a database.
Sometimes, storages are created through composition. For example, if
we want to save space, we could layer a ``ZlibStorage``
[#zlibstoragefn]_ over the file storage::
import ZODB, ZODB.FileStorage, zc.zlibstorage
storage = ZODB.FileStorage.FileStorage('mydata.fs')
compressed_storage = zc.zlibstorage.ZlibStorage(storage)
db = ZODB.DB(compressed_storage)
`ZlibStorage <https://pypi.python.org/pypi/zc.zlibstorage>`_
compresses database records [#zlib]_.
Python configuration
--------------------
To set up a database with Python, you'll construct a storage using the
:ref:`storage APIs <included-storages-label>`, and then pass the
storage to the :class:`~ZODB.DB` class to create a database, as shown
in the examples in the previous section.
The :class:`~ZODB.DB` class also accepts a string path name as its
storage argument to automatically create a file storage. You can also
pass ``None`` as the storage to automatically use a
:class:`~ZODB.MappingStorage.MappingStorage`, which is convenient when
exploring ZODB::
db = ZODB.DB(None) # Create an in-memory database.
Text configuration
------------------
ZODB supports a text-based configuration language. It uses a syntax
similar to Apache configuration files. The syntax was chosen to be
familiar to site administrators.
ZODB's text configuration uses `ZConfig
<https://pypi.python.org/pypi/ZConfig>`_. You can use ZConfig to
create your application's configuration, but it's more common to
include ZODB configuration strings in their own files or embedded in
simpler configuration files, such as `configarser
<https://docs.python.org/3/library/configparser.html#module-configparser>`_
files.
A database configuration string has a ``zodb`` section wrapping a
storage section, as in::
<zodb>
cache-size-bytes 100MB
<mappingstorage>
</mappingstorage>
</zodb>
.. -> snippet
In the example above, the :ref:`mappingstorage
<mappingstorage-text-configuration>` section defines the storage used
by the database.
To create a database from a string, use
:func:`ZODB.config.databaseFromString`::
>>> import ZODB.config
>>> db = ZODB.config.databaseFromString(snippet)
To load databases from file names or URLs, use
:func:`ZODB.config.databaseFromURL`.
URI-based configuration
-----------------------
Another database configuration option is provided by the `zodburi
<https://pypi.python.org/pypi/zodburi>`_ package. See:
http://docs.pylonsproject.org/projects/zodburi. It's less powerful
than the Python or text configuration options, but allows
configuration to be reduced to a single URI and handles most cases.
Using databases: connections
============================
Once you have a database, you need to get a database connection to to
much of anything. Connections take care of loading and saving objects
and manage object caches. Each connection has it's own cache
[#caches-are-expensive]_.
Getting connections
-------------------
Amongst [#amongst]_ the common ways of getting a connection:
db.open()
The database :meth:`~ZODB.DB.open` method opens a
connection, returning a connection object::
>>> conn = db.open()
It's up to the application to call
:meth:`~ZODB.Connection.Connection.close` when the application is
done using the connection.
If changes are made, the application :ref:`commits transactions
<commit-transactions>` to make them permanent.
db.transaction()
The database :meth:`~ZODB.DB.transaction` method
returns a context manager that can be used with the `python with
statement
<https://docs.python.org/3/reference/compound_stmts.html#grammar-token-with_stmt>`_
to execute a block of code in a transaction::
with db.transaction() as connection:
connection.root.foo = 1
.. -> src
>>> exec(src)
>>> with db.transaction() as connection:
... print connection.root.foo
1
>>> _ = conn.transaction_manager.begin() # get updates on conn
In the example above, we used ``as connection`` to get the database
connection used in the variable ``connection``.
some_object._p_jar
For code that's already running in the context of an open
connection, you can get the current connection as the ``_p_jar``
attribute of some persistent object that was accessed via the
connection.
Getting objects
---------------
Once you have a connection, you access objects by traversing the
object graph from the root object.
The database root object is a mapping object that holds the top level
objects in the database. There should only be a small number of
top-level objects (often only one). You can get the root object by calling a
connection's ``root`` attribute::
>>> root = conn.root()
>>> root
{'foo': 1}
>>> root['foo']
1
For convenience [#root-convenience]_, you can also get top-level
objects by accessing attributes of the connection root object:
>>> conn.root.foo
1
Once you have a top-level object, you use its methods, attributes, or
operations to access other objects and so on to get the objects you
need. Often indexing data structures like BTrees_ are used to
make it possible to search objects in large collections.
.. [#zlibstoragefn] `zc.zlibstorage
<https://pypi.python.org/pypi/zc.zlibstorage>`_ is an optional
package that you need to install separately.
.. [#zlib] ZlibStorage uses the :mod:`zlib` standard module, which
uses the `zlib library <http://www.zlib.net/>`_.
.. [#caches-are-expensive] ZODB can be very efficient at caching data
in memory, especially if your `working set
<https://en.wikipedia.org/wiki/Working_set>`_ is small enough to
fit in memory, because the cache is simply an object tree and
accessing a cached object typically requires no database
interaction. Because each connection has its own cache,
connections can be expensive, depending on their cache sizes. For
this reason, you'll generally want to limit the number of open
connections you have at any one time. Connections are pooled, so
opening a connection is inexpensive.
.. [#amongst] https://www.youtube.com/watch?v=7WJXHY2OXGE
.. [#root-convenience] The ability to access top-level objects of the
database as root attributes is a recent convenience. Originally,
the ``root()`` method was used to access the root object which was
then accessed as a mapping. It's still potentially useful to
access top-level objects using the mapping interface if their names
aren't valid attribute names.
.. _BTrees: https://pythonhosted.org/BTrees/
This diff is collapsed.
=======================
Reference Documentation
=======================
.. toctree::
:maxdepth: 2
zodb.rst
storages.rst
=============
Storage APIs
=============
.. contents::
Storage interfaces
==================
There are various storage implementations that implement standard
storage interfaces. Thet differ primarily in their constructors.
Application code rarely calls storage methods, and those it calls are
generally called indirectly through databases. There are
interface-defined methods that are called internally by ZODB. These
aren't shown below.
IStorage
--------
.. autointerface:: ZODB.interfaces.IStorage
:members: close, getName, getSize, history, isReadOnly, lastTransaction,
__len__, pack, sortKey
IStorageIteration
-----------------
.. autointerface:: ZODB.interfaces.IStorageIteration
IStorageUndoable
----------------
.. autointerface:: ZODB.interfaces.IStorageUndoable
:members: undoLog, undoInfo
IStorageCurrentRecordIteration
------------------------------
.. autointerface:: ZODB.interfaces.IStorageCurrentRecordIteration
IBlobStorage
------------
.. autointerface:: ZODB.interfaces.IBlobStorage
:members: temporaryDirectory
IStorageRecordInformation
-------------------------
.. autointerface:: ZODB.interfaces.IStorageRecordInformation
IStorageTransactionInformation
------------------------------
.. autointerface:: ZODB.interfaces.IStorageTransactionInformation
.. _included-storages-label:
Included storages
=================
FileStorage
-----------
.. autoclass:: ZODB.FileStorage.FileStorage.FileStorage
:members: __init__
.. autointerface:: ZODB.FileStorage.interfaces.IFileStoragePacker
FileStorage text configuration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
File storages are configured using the ``filestorage`` section::
<filestorage>
path Data.fs
</filestorage>
which accepts the following options:
.. zconfigsectionkeys:: ZODB component.xml filestorage
MappingStorage
--------------
.. autoclass:: ZODB.MappingStorage.MappingStorage
:members: __init__
.. _mappingstorage-text-configuration:
MappingStorage text configuration
---------------------------------
File storages are configured using the ``mappingstorage`` section::
<mappingstorage>
</mappingstorage>
Options:
.. zconfigsectionkeys:: ZODB component.xml mappingstorage
DemoStorage
-----------
.. autoclass:: ZODB.DemoStorage.DemoStorage
:members: __init__, push, pop
DemoStorage text configuration
------------------------------
Demo storages are configured using the ``demostorage`` section::
<demostorage>
<filestorage base>
path base.fs
</filestorage>
<mappingstorage changes>
name Changes
</mappingstorage>
</demostorage>
.. -> src
>>> import ZODB.config
>>> storage = ZODB.config.storageFromString(src)
>>> storage.base.getName()
'base.fs'
>>> storage.changes.getName()
'Changes'
``demostorage`` sections can contain up to 2 storage subsections,
named ``base`` and ``changes``, specifying the demo storage's base and
changes storages. See :meth:`ZODB.DemoStorage.DemoStorage.__init__`
for more on the base anc changes storages.
Options:
.. zconfigsectionkeys:: ZODB component.xml demostorage
Noteworthy non-included storages
================================
A number of important ZODB storages are distriubuted separately, including:
RelStorage
`RelStorage <http://relstorage.readthedocs.io/en/latest/>`_
stores data in relational databases. This is especially
useful when you have requirements or existing infrastructure for
storing data in relational databases. Unlike the included storages,
multiple processes can share the same database.
For more imformation, see http://relstorage.readthedocs.io/en/latest/.
ZEO
`ZEO <https://github.com/zopefoundation/ZEO>`_ is a client-server
database implementation for ZODB. To use ZEO, you run a ZEO server,
and use ZEO clients in your application. Unlike the included
storages, multiple processes can share the same database.
For more imformation, see https://github.com/zopefoundation/ZEO.
ZRS
`ZRS <https://github.com/zc/zrs>`_
provides replication from one database to another. It's most
commonly used with ZEO. With ZRS, you create a ZRS primary database
around a :class:`~ZODB.FileStorage.FileStorage.FileStorage` and in a
separate process, you creatre a ZRS secondary storage around any
:interface:`storage <ZODB.interfaces.IStorage>`. As transactions are
committed on the primary, they're copied asynchronously to
secondaries.
For more imformation, see https://github.com/zc/zrs.
zlibstorage
`zlibstorage <https://pypi.python.org/pypi/zc.zlibstorage>`_
compresses database records using the compression
algorithm used by `gzip <http://www.gzip.org/>`_.
For more imformation, see https://pypi.python.org/pypi/zc.zlibstorage.
beforestorage
`beforestorage <https://pypi.python.org/pypi/zc.beforestorage>`_
provides a point-in-time view of a database that might
be changing. This can be useful to provide a non-changing view of a
production database for use with a :class:`~ZODB.DemoStorage.DemoStorage`.
For more imformation, see https://pypi.python.org/pypi/zc.beforestorage.
cipher.encryptingstorage
`cipher.encryptingstorage
<https://pypi.python.org/pypi/cipher.encryptingstorage/>`_ provided
compression and encryption of database records.
For more informayion see,
https://pypi.python.org/pypi/cipher.encryptingstorage/.
=========
ZODB APIs
=========
.. contents::
ZODB module functions
=====================
.. method:: DB(storage, *args, **kw)
Create a databse. See :py:class:`ZODB.DB`.
.. autofunction:: ZODB.connection
Databases
=========
.. autoclass:: ZODB.DB
:members: __init__, open, close, pack,
cacheDetail, cacheExtremeDetail, cacheMinimize,
cacheSize, cacheDetailSize, getCacheSize, getCacheSizeBytes,
lastTransaction, getName, getPoolSize, getSize,
getHistoricalCacheSize, getHistoricalCacheSizeBytes,
getHistoricalPoolSize, getHistoricalTimeout,
objectCount, connectionDebugInfo,
setCacheSize, setCacheSizeBytes,
setHistoricalCacheSize, setHistoricalCacheSizeBytes,
setPoolSize, setHistoricalPoolSize, setHistoricalTimeout,
history,
supportsUndo, undoLog, undoInfo, undoMultiple, undo,
transaction, storage
.. _database-text-configuration:
Database text configuration
---------------------------
Databases are configured with ``zodb`` sections::
<zodb>
cache-size-bytes 100MB
<mappingstorage>
</mappingstorage>
</zodb>
A ``zodb`` section must have a storage sub-section specifying a
storage and any of the following options:
.. zconfigsectionkeys:: ZODB component.xml zodb
.. _multidatabase-text-configuration:
For a multi-database configuration, use multiple ``zodb`` sections and
give the sections names::
<zodb first>
cache-size-bytes 100MB
<mappingstorage>
</mappingstorage>
</zodb>
<zodb second>
<mappingstorage>
</mappingstorage>
</zodb>
.. -> src
>>> import ZODB.config
>>> db = ZODB.config.databaseFromString(src)
>>> sorted(db.databases)
['first', 'second']
>>> db._cache_size_bytes
104857600
When the configuration is loaded, a single database will be returned,
but all of the databases will be available through the returned
database's ``databases`` attribute.
Connections
===========
.. autoclass:: ZODB.Connection.Connection
:members: add, cacheGC, cacheMinimize, close, db, get,
getDebugInfo, get_connection, isReadOnly, oldstate,
onCloseCallback, root, setDebugInfo, sync
TimeStamp (transaction ids)
===========================
.. class:: ZODB.TimeStamp.TimeStamp(year, month, day, hour, minute, seconds)
Create a time-stamp object. Time stamps facilitate the computation
of transaction ids, which are based on times. The arguments are
integers, except for seconds, which may be a floating-point
number. Time stamps have microsecond precision. Time stamps are
implicitly UTC based.
Time stamps are orderable and hashable.
.. method:: day()
Return the time stamp's day.
.. method:: hour()
Return the time stamp's hour.
.. method:: laterThan(other)
Return a timestamp instance which is later than 'other'.
If self already qualifies, return self.
Otherwise, return a new instance one moment later than 'other'.
.. method:: minute()
Return the time stamp's minute.
.. method:: month()
Return the time stamp's month.
.. method:: raw()
Get an 8-byte representatin of the time stamp for use in APIs
that require a time stamp.
.. method:: second()
Return the time stamp's second.
.. method:: timeTime()
Return the time stamp as seconds since the epoc, as used by the
``time`` module.
.. method:: year()
Return the time stamp's year.
Loading configuration
=====================
.. automodule:: ZODB.config
:members: databaseFromString, databaseFromFile, databaseFromURL,
storageFromString, storageFromFile, storageFromURL
.. _tutorial-label:
========
Tutorial
========
This tutorial is intended to guide developers with a step-by-step introduction
of how to develop an application which stores its data in the ZODB.
Introduction
============
To save application data in ZODB, you'll generally define classes that
subclass ``persistent.Persistent``::
# account.py
import persistent
class Account(persistent.Persistent):
def __init__(self):
self.balance = 0.0
def deposit(self, amount):
self.balance += amount
def cash(self, amount):
assert amount < self.balance
self.balance -= amount
This code defines a simple class that holds the balance of a bank
account and provides two methods to manipulate the balance: deposit
and cash.
Subclassing ``Persistent`` provides a number of features:
- The database will automatically track object changes made by setting
attributes [#changed]_.
- Data will be saved in its own database record.
You can save data that doesn't subclass ``Persistent``, but it will be
stored in the database record of whatever persistent object
references it.
- Objects will have unique persistent identity.
Multiple objects can refer to the same persistent object and they'll
continue to refer to the same object even after being saved
and loaded from the database.
Non-persistent objects are essentially owned by their containing
persistent object and if multiple persistent objects refer to the
same non-persistent subobject, they'll (eventually) get their own
copies.
Note that we put the class in a named module. Classes aren't stored
in the ZODB [#persistentclasses]_. They exist on the file system and
their names, consisting of their class and module names, are stored in
the database. It's sometimes tempting to create persistent classes in
scripts or in interactive sessions, but if you do, then their module
name will be ``'__main__'`` and you'll always have to define them that
way.
Installation
============
Before being able to use ZODB we have to install it. A common way to
do this is with pip::
$ pip install ZODB
Creating Databases
==================
When a program wants to use the ZODB it has to establish a connection,
like any other database. For the ZODB we need 3 different parts: a
storage, a database and finally a connection::
import ZODB, ZODB.FileStorage
storage = ZODB.FileStorage.FileStorage('mydata.fs')
db = ZODB.DB(storage)
connection = db.open()
root = connection.root
ZODB has a pluggable storage framework. This means there are a
variety of storage implementations to meet different needs, from
in-memory databases, to databases stored in local files, to databases
on remote database servers, and specialized databases for compression,
encryption, and so on. In the example above, we created a database
that stores its data in a local file, using the ``FileStorage``
class.
Having a storage, we then use it to instantiate a database, which we
then connect to by calling ``open()``. A process with multiple
threads will often have multiple connections to the same database,
with different threads having different connections.
There are a number of convenient shortcuts you can use for some of the
commonly used storages:
- You can pass a file name to the ``DB`` constructor to have it construct
a FileStorage for you::
db = ZODB.DB('mydata.fs')
You can pass None to create an in-memory database::
memory_db = ZODB.DB(None)
- If you're only going to use one connection, you can call the
``connection`` function::
connection = ZODB.connection('mydata.fs')
memory_connection = ZODB.connection(None)
Storing objects
===============
To store an object in the ZODB we simply attach it to any other object
that already lives in the database. Hence, the root object functions
as a boot-strapping point. The root object is meant to serve as a
namespace for top-level objects in your database. We could store
account objects directly on the root object::
import account
# Probably a bad idea:
root.account1 = account.Account()
But if you're going to store many objects, you'll want to use a
collection object [#root]_::
import account, BTrees.OOBTree
root.accounts = BTrees.OOBTree.BTree()
root.accounts['account-1'] = Account()
Another common practice is to store a persistent object in the root of
the database that provides an application-specific root::
root.accounts = AccountManagementApplication()
That can facilitate encapsulation of an application that shares a
database with other applications. This is a little bit like using
modules to avoid namespace colisions in Python programs.
Containers and search
=====================
BTrees provide the core scalable containers and indexing facility for
ZODB. There are different families of BTrees. The most general are
OOBTrees, which have object keys and values. There are specialized
BTrees that support integer keys and values. Integers can be stored
more efficiently, and compared more quickly than objects and they're
often used as application-level object identifiers. It's critical,
when using BTrees, to make sure that its keys have a stable ordering.
ZODB doesn't provide a query engine. The primary way to access
objects in ZODB is by traversing (accessing attributes or items, or
calling methods) other objects. Object traversal is typically much
faster than search.
You can use BTrees to build indexes for efficient search, when
necessary. If your application is search centric, or if you prefer to
approach data access that way, then ZODB might not be the best
technology for you.
Transactions
============
You now have objects in your root object and in your database.
However, they are not permanently stored yet. The ZODB uses
transactions and to make your changes permanent, you have to commit
the transaction::
import transaction
transaction.commit()
Now you can stop and start your application and look at the root object again,
and you will find the data you saved.
If your application makes changes during a transaction and finds that it does
not want to commit those changes, then you can abort the transaction and have
the changes rolled back [#rollback]_ for you::
transaction.abort()
Transactions are a very powerful way to protect the integrity of a
database. Transactions have the property that all of the changes made
in a transaction are saved, or none of them are. If in the midst of a
program, there's an error after making changes, you can simply abort
the transaction (or not commit it) and all of the intermediate changes
you make are automatically discarded.
Memory Management
=================
ZODB manages moving objects in and out of memory for you. The unit of
storage is the persistent object. When you access attributes of a
persistent objects, it's loaded from the database automatically, if
necessary. If too many objects are in memory, then objects used least
recently are evicted [#eviction]_. The maximum number of objects or
bytes in memory is configurable.
Summary
=======
You have seen how to install ZODB and how to open a database in your
application and to start storing objects in it. We also touched the
two simple transaction commands: ``commit`` and ``abort``. The
reference documentation contains sections with more information on the
individual topics.
.. [#changed]
You can manually mark an object as changed by setting its
``_p_changed__`` attribute to ``True``. You might do this if you
update a subobject, such as a standard Python ``list`` or ``set``,
that doesn't subclass ``Persistent``.
.. [#persistentclasses]
Actually, there is semi-experimental support for storing classes in
the database, but applications rarely do this.
.. [#root]
The root object is a fairy simple persistent object that's stored
in a single database record. If you stored many objects in it,
its database record would become very large, causing updates to be
inefficient and causing memory to be used ineffeciently.
Another reason not to store items directly in the root object is
that doing so would make adding a second collection of objects
later awkward.
.. [#rollback]
A caveat is that ZODB can only roll back changes to objects that
have been stored and committed to the database. Objects not
previously committed can't be rolled back because there's no
previous state to roll back to.
.. [#eviction]
Objects aren't actually evicted, but their state is released, so
they take up much less memory and any objects they referenced can
be removed from memory.
This diff is collapsed.
Sphinx
docutils
ZODB
j1m.sphinxautointerface
j1m.sphinxautozconfig
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg width="89px" height="87px" viewBox="0 0 89 87" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:sketch="http://www.bohemiancoding.com/sketch/ns">
<!-- Generator: Sketch 3.3.3 (12072) - http://www.bohemiancoding.com/sketch -->
<title>ZODB_logo</title>
<desc>Created with Sketch.</desc>
<defs></defs>
<g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd" sketch:type="MSPage">
<g id="ZODB_logo" sketch:type="MSLayerGroup" transform="translate(1.000000, 1.000000)" stroke="#1F5913" stroke-width="2">
<path d="M43.5,85 C67.5243866,85 87,65.9721019 87,42.5 C87,19.0278981 67.5243866,0 43.5,0 C19.4756134,0 0,19.0278981 0,42.5 C0,65.9721019 19.4756134,85 43.5,85 Z M22.567182,62.9328646 C11.1442877,51.5100624 11.1442877,32.9900388 22.567092,21.5671466 C33.9898964,10.1443445 52.5100136,10.1442545 63.932818,21.5670566 C75.3557123,32.9899488 75.3557123,51.5099724 63.932908,62.9328646 C52.5101036,74.3556668 33.9899864,74.3557568 22.567182,62.9328646 Z" id="Oval-1" fill="#42982F" sketch:type="MSShapeGroup"></path>
<path d="M9.83640265,14.5898623 L11.8733061,24.310227 L16.8264662,25.5598626 L22.8161699,18.1609813" id="shapePath4-Copy-2" sketch:type="MSShapeGroup" transform="translate(16.326286, 20.074862) rotate(27.000000) translate(-16.326286, -20.074862) "></path>
</g>
</g>
</svg>
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment