Commit 22d13d0b authored by Tim Peters's avatar Tim Peters

Move get_pickle_metadata() into utils.py.

Try to make more sense of the ZODB pickle format "docs".
parent a56b571c
from cPickle import Unpickler ##############################################################################
from cStringIO import StringIO #
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import md5 import md5
import struct import struct
...@@ -7,48 +18,9 @@ from ZODB.FileStorage import FileIterator ...@@ -7,48 +18,9 @@ from ZODB.FileStorage import FileIterator
from ZODB.FileStorage.format \ from ZODB.FileStorage.format \
import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
from ZODB.TimeStamp import TimeStamp from ZODB.TimeStamp import TimeStamp
from ZODB.utils import u64 from ZODB.utils import u64, get_pickle_metadata
from ZODB.tests.StorageTestBase import zodb_unpickle from ZODB.tests.StorageTestBase import zodb_unpickle
def get_pickle_metadata(data):
# ZODB's data records contain two pickles. The first is the class
# of the object, the second is the object. We're only trying to
# pick apart the first here, to extract the module and class names.
if data.startswith('(c'): # pickle MARK GLOBAL opcode sequence
global_prefix = 2
elif data.startswith('c'): # pickle GLOBAL opcode
global_prefix = 1
else:
global_prefix = 0
if global_prefix:
# Don't actually unpickle a class, because it will attempt to
# load the class. Just break open the pickle and get the
# module and class from it. The module and the class names are
# given by newline-terminated strings following the GLOBAL opcode.
modname, classname, rest = data.split('\n', 2)
modname = modname[global_prefix:] # strip GLOBAL opcode
return modname, classname
# Else there are a bunch of other possible formats.
f = StringIO(data)
u = Unpickler(f)
try:
class_info = u.load()
except Exception, err:
print "Error", err
return '', ''
if isinstance(class_info, tuple):
if isinstance(class_info[0], tuple):
modname, classname = class_info[0]
else:
modname, classname = class_info
else:
# XXX not sure what to do here
modname = repr(class_info)
classname = ''
return modname, classname
def fsdump(path, file=None, with_offset=1): def fsdump(path, file=None, with_offset=1):
i = 0 i = 0
iter = FileIterator(path) iter = FileIterator(path)
......
...@@ -34,34 +34,42 @@ The class description can be in a variety of formats, in part to ...@@ -34,34 +34,42 @@ The class description can be in a variety of formats, in part to
provide backwards compatibility with earlier versions of Zope. The provide backwards compatibility with earlier versions of Zope. The
two current formats for class description are: two current formats for class description are:
- type(obj) 1. type(obj)
- type(obj), obj.__getnewargs__() 2. type(obj), obj.__getnewargs__()
The second of these options is used if the object has a The second of these options is used if the object has a __getnewargs__()
__getnewargs__() method. It is intended to support objects like method. It is intended to support objects like persistent classes that have
persistent classes that have custom C layouts that are determined by custom C layouts that are determined by arguments to __new__().
arguments to __new__().
The type object is usually stored using the standard pickle mechanism, The type object is usually stored using the standard pickle mechanism, which
which uses a string containing the class's module and name. The type involves the pickle GLOBAL opcode (giving the type's module and name as
may itself be a persistent object, in which case a persistent strings). The type may itself be a persistent object, in which case a
reference (see below) is used. persistent reference (see below) is used.
Earlier versions of Zope supported several other kinds of class It's unclear what "usually" means in the last paragraph. There are two
descriptions. The current serialization code reads these useful places to concentrate confusion about exactly which formats exist:
descriptions, but does not write them.
- BaseObjectReader.getClassName() below returns a dotted "module.class"
string, via actually loading a pickle. This requires that the
implementation of application objects be available.
The four formats are: - ZODB/utils.py's get_pickle_metadata() tries to return the module and
class names (as strings) without importing any application modules or
classes, via analyzing the pickle.
1. (module name, class name), None Earlier versions of Zope supported several other kinds of class
2. (module name, class name), __getinitargs__() descriptions. The current serialization code reads these descriptions, but
3. class, None does not write them. The four earlier formats are:
4. class, __getinitargs__()
Formats 2 and 4 are used only if the class defines an 3. (module name, class name), None
__getinitargs__() method. Formats 3 and 4 are used if the class does 4. (module name, class name), __getinitargs__()
not have an __module__ attribute. (I'm not sure when this applies, 5. class, None
but I think it occurs for some but not all ZClasses.) 6. class, __getinitargs__()
Formats 4 and 6 are used only if the class defines a __getinitargs__()
method. Formats 5 and 6 are used if the class does not have a __module__
attribute (I'm not sure when this applies, but I think it occurs for some
but not all ZClasses).
Persistent references Persistent references
...@@ -79,7 +87,6 @@ possible to change the class of a persistent object. If a transaction ...@@ -79,7 +87,6 @@ possible to change the class of a persistent object. If a transaction
changed the class of an object, a new record with new class metadata changed the class of an object, a new record with new class metadata
would be written but all the old references would still include the would be written but all the old references would still include the
old class. old class.
""" """
import cPickle import cPickle
......
...@@ -16,6 +16,8 @@ import sys ...@@ -16,6 +16,8 @@ import sys
import time import time
from struct import pack, unpack from struct import pack, unpack
from binascii import hexlify from binascii import hexlify
import cPickle as pickle
from cStringIO import StringIO
from persistent.TimeStamp import TimeStamp from persistent.TimeStamp import TimeStamp
...@@ -109,3 +111,52 @@ def positive_id(obj): ...@@ -109,3 +111,52 @@ def positive_id(obj):
result += 1L << 64 result += 1L << 64
assert result >= 0 # else addresses are fatter than 64 bits assert result >= 0 # else addresses are fatter than 64 bits
return result return result
# Given a ZODB pickle, return pair of strings (module_name, class_name).
# Do this without importing the module or class object.
# See ZODB/serialize.py's module docstring for the only docs that exist about
# ZODB pickle format. If the code here gets smarter, please update those
# docs to be at least as smart. The code here doesn't appear to make sense
# for what serialize.py calls formats 5 and 6.
def get_pickle_metadata(data):
# ZODB's data records contain two pickles. The first is the class
# of the object, the second is the object. We're only trying to
# pick apart the first here, to extract the module and class names.
if data.startswith('(c'): # pickle MARK GLOBAL opcode sequence
global_prefix = 2
elif data.startswith('c'): # pickle GLOBAL opcode
global_prefix = 1
else:
global_prefix = 0
if global_prefix:
# Formats 1 and 2.
# Don't actually unpickle a class, because it will attempt to
# load the class. Just break open the pickle and get the
# module and class from it. The module and class names are given by
# newline-terminated strings following the GLOBAL opcode.
modname, classname, rest = data.split('\n', 2)
modname = modname[global_prefix:] # strip GLOBAL opcode
return modname, classname
# Else there are a bunch of other possible formats.
f = StringIO(data)
u = pickle.Unpickler(f)
try:
class_info = u.load()
except Exception, err:
print "Error", err
return '', ''
if isinstance(class_info, tuple):
if isinstance(class_info[0], tuple):
# Formats 3 and 4.
modname, classname = class_info[0]
else:
# Formats 5 and 6 (probably) end up here.
modname, classname = class_info
else:
# This isn't a known format.
modname = repr(class_info)
classname = ''
return modname, classname
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment