Commit 22d13d0b authored by Tim Peters's avatar Tim Peters

Move get_pickle_metadata() into utils.py.

Try to make more sense of the ZODB pickle format "docs".
parent a56b571c
from cPickle import Unpickler
from cStringIO import StringIO
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import md5
import struct
......@@ -7,48 +18,9 @@ from ZODB.FileStorage import FileIterator
from ZODB.FileStorage.format \
import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
from ZODB.TimeStamp import TimeStamp
from ZODB.utils import u64
from ZODB.utils import u64, get_pickle_metadata
from ZODB.tests.StorageTestBase import zodb_unpickle
def get_pickle_metadata(data):
# ZODB's data records contain two pickles. The first is the class
# of the object, the second is the object. We're only trying to
# pick apart the first here, to extract the module and class names.
if data.startswith('(c'): # pickle MARK GLOBAL opcode sequence
global_prefix = 2
elif data.startswith('c'): # pickle GLOBAL opcode
global_prefix = 1
else:
global_prefix = 0
if global_prefix:
# Don't actually unpickle a class, because it will attempt to
# load the class. Just break open the pickle and get the
# module and class from it. The module and the class names are
# given by newline-terminated strings following the GLOBAL opcode.
modname, classname, rest = data.split('\n', 2)
modname = modname[global_prefix:] # strip GLOBAL opcode
return modname, classname
# Else there are a bunch of other possible formats.
f = StringIO(data)
u = Unpickler(f)
try:
class_info = u.load()
except Exception, err:
print "Error", err
return '', ''
if isinstance(class_info, tuple):
if isinstance(class_info[0], tuple):
modname, classname = class_info[0]
else:
modname, classname = class_info
else:
# XXX not sure what to do here
modname = repr(class_info)
classname = ''
return modname, classname
def fsdump(path, file=None, with_offset=1):
i = 0
iter = FileIterator(path)
......
......@@ -34,34 +34,42 @@ The class description can be in a variety of formats, in part to
provide backwards compatibility with earlier versions of Zope. The
two current formats for class description are:
- type(obj)
- type(obj), obj.__getnewargs__()
1. type(obj)
2. type(obj), obj.__getnewargs__()
The second of these options is used if the object has a
__getnewargs__() method. It is intended to support objects like
persistent classes that have custom C layouts that are determined by
arguments to __new__().
The second of these options is used if the object has a __getnewargs__()
method. It is intended to support objects like persistent classes that have
custom C layouts that are determined by arguments to __new__().
The type object is usually stored using the standard pickle mechanism,
which uses a string containing the class's module and name. The type
may itself be a persistent object, in which case a persistent
reference (see below) is used.
The type object is usually stored using the standard pickle mechanism, which
involves the pickle GLOBAL opcode (giving the type's module and name as
strings). The type may itself be a persistent object, in which case a
persistent reference (see below) is used.
Earlier versions of Zope supported several other kinds of class
descriptions. The current serialization code reads these
descriptions, but does not write them.
It's unclear what "usually" means in the last paragraph. There are two
useful places to concentrate confusion about exactly which formats exist:
- BaseObjectReader.getClassName() below returns a dotted "module.class"
string, via actually loading a pickle. This requires that the
implementation of application objects be available.
The four formats are:
- ZODB/utils.py's get_pickle_metadata() tries to return the module and
class names (as strings) without importing any application modules or
classes, via analyzing the pickle.
1. (module name, class name), None
2. (module name, class name), __getinitargs__()
3. class, None
4. class, __getinitargs__()
Earlier versions of Zope supported several other kinds of class
descriptions. The current serialization code reads these descriptions, but
does not write them. The four earlier formats are:
Formats 2 and 4 are used only if the class defines an
__getinitargs__() method. Formats 3 and 4 are used if the class does
not have an __module__ attribute. (I'm not sure when this applies,
but I think it occurs for some but not all ZClasses.)
3. (module name, class name), None
4. (module name, class name), __getinitargs__()
5. class, None
6. class, __getinitargs__()
Formats 4 and 6 are used only if the class defines a __getinitargs__()
method. Formats 5 and 6 are used if the class does not have a __module__
attribute (I'm not sure when this applies, but I think it occurs for some
but not all ZClasses).
Persistent references
......@@ -79,7 +87,6 @@ possible to change the class of a persistent object. If a transaction
changed the class of an object, a new record with new class metadata
would be written but all the old references would still include the
old class.
"""
import cPickle
......
......@@ -16,6 +16,8 @@ import sys
import time
from struct import pack, unpack
from binascii import hexlify
import cPickle as pickle
from cStringIO import StringIO
from persistent.TimeStamp import TimeStamp
......@@ -109,3 +111,52 @@ def positive_id(obj):
result += 1L << 64
assert result >= 0 # else addresses are fatter than 64 bits
return result
# Given a ZODB pickle, return pair of strings (module_name, class_name).
# Do this without importing the module or class object.
# See ZODB/serialize.py's module docstring for the only docs that exist about
# ZODB pickle format. If the code here gets smarter, please update those
# docs to be at least as smart. The code here doesn't appear to make sense
# for what serialize.py calls formats 5 and 6.
def get_pickle_metadata(data):
# ZODB's data records contain two pickles. The first is the class
# of the object, the second is the object. We're only trying to
# pick apart the first here, to extract the module and class names.
if data.startswith('(c'): # pickle MARK GLOBAL opcode sequence
global_prefix = 2
elif data.startswith('c'): # pickle GLOBAL opcode
global_prefix = 1
else:
global_prefix = 0
if global_prefix:
# Formats 1 and 2.
# Don't actually unpickle a class, because it will attempt to
# load the class. Just break open the pickle and get the
# module and class from it. The module and class names are given by
# newline-terminated strings following the GLOBAL opcode.
modname, classname, rest = data.split('\n', 2)
modname = modname[global_prefix:] # strip GLOBAL opcode
return modname, classname
# Else there are a bunch of other possible formats.
f = StringIO(data)
u = pickle.Unpickler(f)
try:
class_info = u.load()
except Exception, err:
print "Error", err
return '', ''
if isinstance(class_info, tuple):
if isinstance(class_info[0], tuple):
# Formats 3 and 4.
modname, classname = class_info[0]
else:
# Formats 5 and 6 (probably) end up here.
modname, classname = class_info
else:
# This isn't a known format.
modname = repr(class_info)
classname = ''
return modname, classname
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment