Commit d57a9c16 authored by Jérome Perrin's avatar Jérome Perrin

XMLExportImport: support pickle protocol 3 and use it internally on py3

When exporting business templates, we need to build a list of referenced
persistent objects to export them separately in the XML, this is is done
using noload, in a way which does not support pickle protocol 1 (the
persistent ids are None and the assertion in
https://github.com/zopefoundation/ZODB/blob/d698507bb89eeb38c6e655199bc9f54c909dbf4d/src/ZODB/serialize.py#L669
fails), so we need to use pickle protocol 3, at least internally when
building this list in Products.ERP5Type.XMLExportImport.reorderPickle.

In the long term, we probably want to use pickle protocol 3 also
when exporting business templates, although this commit also brings the
full support of protocol 3, this is left for later, so that we keep a
stable format of business template export. Later, once we are ready to
switch to protocol 3 and re-export business templates, it should be
enough to change Products.ERP5Type.XMLExportImport.DEFAULT_PICKLE_PROTOCOL
to 3.

While adding test coverage, it was found that the ad-hoc handling of
boolean in protocol 1 was not implemented correctly and they were
serialized as integers (0 for False and 1 for True), this is also fixed.
parent 733c35fc
Pipeline #35974 failed with stage
in 0 seconds
...@@ -35,6 +35,7 @@ from io import BytesIO ...@@ -35,6 +35,7 @@ from io import BytesIO
from six import StringIO from six import StringIO
from Products.ERP5Type.XMLExportImport import importXML, ppml from Products.ERP5Type.XMLExportImport import importXML, ppml
import six import six
import lxml.etree
class DummyClass: class DummyClass:
...@@ -46,11 +47,21 @@ class DummyClass: ...@@ -46,11 +47,21 @@ class DummyClass:
self.data = [] self.data = []
class DummyPersistentClass:
def __init__(self, v, oid):
self.v = v
self._p_oid = oid
class XMLPickleTestCase(unittest.TestCase): class XMLPickleTestCase(unittest.TestCase):
_pickle_protocol = 3 _pickle_protocol = 3
def dump_to_xml(self, obj): def dump_to_xml(self, obj, persistent_id=None):
pickled_string = pickle.dumps(obj, protocol=self._pickle_protocol) f = BytesIO()
f = BytesIO(pickled_string) pickler = pickle.Pickler(f, protocol=self._pickle_protocol)
if persistent_id:
pickler.persistent_id = persistent_id
pickler.dump(obj)
f.seek(0)
xml = ppml.ToXMLUnpickler(f).load().__str__() xml = ppml.ToXMLUnpickler(f).load().__str__()
self.assertIsInstance(xml, str) self.assertIsInstance(xml, str)
return xml return xml
...@@ -150,6 +161,8 @@ class TestXMLPickle(XMLPickleTestCase): ...@@ -150,6 +161,8 @@ class TestXMLPickle(XMLPickleTestCase):
def test_bytes(self): def test_bytes(self):
self.check_and_load(b"bytes") self.check_and_load(b"bytes")
self.check_and_load(b"long bytes" * 100) self.check_and_load(b"long bytes" * 100)
if six.PY3 or self._pickle_protocol > 1:
# protocol 1 does not keep bytes
self.check_and_load(zodbpickle.binary(b"bytes")) self.check_and_load(zodbpickle.binary(b"bytes"))
self.check_and_load(zodbpickle.binary(b"")) self.check_and_load(zodbpickle.binary(b""))
...@@ -218,6 +231,47 @@ class TestXMLPickle(XMLPickleTestCase): ...@@ -218,6 +231,47 @@ class TestXMLPickle(XMLPickleTestCase):
self.assertEqual(reconstructed, [ref, ref, ref]) self.assertEqual(reconstructed, [ref, ref, ref])
self.assertIs(reconstructed[0], reconstructed[1]) self.assertIs(reconstructed[0], reconstructed[1])
def test_persistent(self):
p1 = DummyPersistentClass(1, b'1')
p2 = DummyPersistentClass(2, b'2')
persistent_ids = []
def persistent_id(obj):
if isinstance(obj, DummyPersistentClass):
persistent_ids.append(obj._p_oid)
return obj._p_oid
xml = self.dump_to_xml(
{'p1': p1, 'p2': p2, 'not p': 'not p'},
persistent_id=persistent_id)
self.assertEqual(sorted(persistent_ids), [b'1', b'2'])
def persistent_load(oid):
persistent_ids.remove(oid)
return oid
obj = self.load_from_xml(xml, persistent_load)
self.assertEqual(obj,
{'p1': b'1', 'p2': b'2', 'not p': 'not p'})
self.assertEqual(persistent_ids, [])
def test_renamed_class(self):
if six.PY2:
from UserList import UserList
else:
from collections import UserList
l = UserList([1, 2])
xml = self.dump_to_xml(l)
if self._pickle_protocol == 1:
self.assertEqual(
lxml.etree.fromstring(xml).xpath('//global[@name="UserList"]/@module'),
["UserList"],
)
self.check_and_load(l)
class TestXMLPickleProtocol1(TestXMLPickle):
_pickle_protocol = 1
class TestXMLPickleStringEncoding(XMLPickleTestCase): class TestXMLPickleStringEncoding(XMLPickleTestCase):
def test_string_base64(self): def test_string_base64(self):
......
...@@ -67,6 +67,10 @@ MARSHALLER_NAMESPACE_URI = 'http://www.erp5.org/namespaces/marshaller' ...@@ -67,6 +67,10 @@ MARSHALLER_NAMESPACE_URI = 'http://www.erp5.org/namespaces/marshaller'
marshaller = Marshaller(namespace_uri=MARSHALLER_NAMESPACE_URI, marshaller = Marshaller(namespace_uri=MARSHALLER_NAMESPACE_URI,
as_tree=True).dumps as_tree=True).dumps
DEFAULT_PICKLE_PROTOCOL = 1
HIGHEST_PICKLE_PROTOCOL = 1 if six.PY2 else 3
class OrderedPickler(Pickler): class OrderedPickler(Pickler):
"""Pickler producing consistent output by saving dicts in order """Pickler producing consistent output by saving dicts in order
""" """
...@@ -250,7 +254,8 @@ from . import ppml ...@@ -250,7 +254,8 @@ from . import ppml
magic=b'<?xm' # importXML(jar, file, clue)} magic=b'<?xm' # importXML(jar, file, clue)}
def reorderPickle(jar, p):
def reorderPickle(jar, p, pickle_protocol):
try: try:
from ZODB._compat import Unpickler, Pickler from ZODB._compat import Unpickler, Pickler
except ImportError: # BBB: ZODB 3.10 except ImportError: # BBB: ZODB 3.10
...@@ -284,7 +289,7 @@ def reorderPickle(jar, p): ...@@ -284,7 +289,7 @@ def reorderPickle(jar, p):
unpickler.persistent_load=persistent_load unpickler.persistent_load=persistent_load
newp=BytesIO() newp=BytesIO()
pickler = OrderedPickler(newp, 3) pickler = OrderedPickler(newp, pickle_protocol)
pickler.persistent_id=persistent_id pickler.persistent_id=persistent_id
classdef = unpickler.load() classdef = unpickler.load()
...@@ -294,7 +299,7 @@ def reorderPickle(jar, p): ...@@ -294,7 +299,7 @@ def reorderPickle(jar, p):
if 0: # debug if 0: # debug
debugp = BytesIO() debugp = BytesIO()
debugpickler = OrderedPickler(debugp, 3) debugpickler = OrderedPickler(debugp, pickle_protocol)
debugpickler.persistent_id = persistent_id debugpickler.persistent_id = persistent_id
debugpickler.dump(obj) debugpickler.dump(obj)
import pickletools import pickletools
...@@ -323,7 +328,8 @@ def XMLrecord(oid, plen, p, id_mapping): ...@@ -323,7 +328,8 @@ def XMLrecord(oid, plen, p, id_mapping):
String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, bytes2str(aka), p) String=' <record id="%s" aka="%s">\n%s </record>\n' % (id, bytes2str(aka), p)
return String return String
def exportXML(jar, oid, file=None):
def exportXML(jar, oid, file=None, pickle_protocol=DEFAULT_PICKLE_PROTOCOL):
# For performance reasons, exportXML does not use 'XMLrecord' anymore to map # For performance reasons, exportXML does not use 'XMLrecord' anymore to map
# oids. This requires to initialize MinimalMapping.marked_reference before # oids. This requires to initialize MinimalMapping.marked_reference before
# any string output, i.e. in ppml.Reference.__init__ # any string output, i.e. in ppml.Reference.__init__
...@@ -339,7 +345,7 @@ def exportXML(jar, oid, file=None): ...@@ -339,7 +345,7 @@ def exportXML(jar, oid, file=None):
p = pickle_dict[oid] p = pickle_dict[oid]
if p is None: if p is None:
p = load(oid)[0] p = load(oid)[0]
p = reorderPickle(jar, p)[1] p = reorderPickle(jar, p, HIGHEST_PICKLE_PROTOCOL)[1]
if len(p) < max_cache[0]: if len(p) < max_cache[0]:
max_cache[0] -= len(p) max_cache[0] -= len(p)
pickle_dict[oid] = p pickle_dict[oid] = p
......
...@@ -525,6 +525,21 @@ class ToXMLUnpickler(Unpickler): ...@@ -525,6 +525,21 @@ class ToXMLUnpickler(Unpickler):
dispatch[NONE] = load_none dispatch[NONE] = load_none
dispatch[NONE[0]] = load_none dispatch[NONE[0]] = load_none
def load_int(self):
line = self.readline()[:-1]
# on protocol 1, bool are saved as int
# https://github.com/python/cpython/blob/b455a5a55cb1fd5bb6178a969e8ebd0e6e91b610/Lib/pickletools.py#L1173-L1179
if line == b'00':
val = Bool(False, self.id_mapping)
elif line == b'01':
val = Bool(True, self.id_mapping)
else:
val = Int(int(line), self.id_mapping)
self.append(val)
if six.PY2:
dispatch[INT] = load_int
dispatch[INT[0]] = load_int
def load_binint(self): def load_binint(self):
self.append(Int(mloads(b'i' + self.read(4)), self.id_mapping)) self.append(Int(mloads(b'i' + self.read(4)), self.id_mapping))
if six.PY2: if six.PY2:
...@@ -543,6 +558,17 @@ class ToXMLUnpickler(Unpickler): ...@@ -543,6 +558,17 @@ class ToXMLUnpickler(Unpickler):
dispatch[BININT2] = load_binint2 dispatch[BININT2] = load_binint2
dispatch[BININT2[0]] = load_binint2 dispatch[BININT2[0]] = load_binint2
def load_long(self):
val = self.readline()[:-1]
if six.PY3:
val = val.decode('ascii')
if val and val[-1] == 'L':
val = val[:-1]
self.append(Long(long_(val, 0), self.id_mapping))
if six.PY2:
dispatch[LONG] = load_long
dispatch[LONG[0]] = load_long
def load_long1(self): def load_long1(self):
n = ord(self.read(1)) n = ord(self.read(1))
data = self.read(n) data = self.read(n)
...@@ -752,12 +778,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -752,12 +778,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[LONG_BINGET] = load_long_binget dispatch[LONG_BINGET] = load_long_binget
dispatch[LONG_BINGET[0]] = load_long_binget dispatch[LONG_BINGET[0]] = load_long_binget
def load_put(self):
self.stack[-1].id=self.idprefix+self.readline()[:-1]
if six.PY2:
dispatch[PUT] = load_put
dispatch[PUT[0]] = load_put
def load_binput(self): def load_binput(self):
i = mloads(b'i' + self.read(1) + b'\000\000\000') i = mloads(b'i' + self.read(1) + b'\000\000\000')
self.stack[-1].id=self.idprefix+repr(i) self.stack[-1].id=self.idprefix+repr(i)
...@@ -772,11 +792,6 @@ class ToXMLUnpickler(Unpickler): ...@@ -772,11 +792,6 @@ class ToXMLUnpickler(Unpickler):
dispatch[LONG_BINPUT] = load_long_binput dispatch[LONG_BINPUT] = load_long_binput
dispatch[LONG_BINPUT[0]] = load_long_binput dispatch[LONG_BINPUT[0]] = load_long_binput
for code in PERSID, INT, LONG, FLOAT, STRING, UNICODE, GET, PUT:
if six.PY2:
dispatch[code] = unsupported_opcode(code)
dispatch[code[0]] = unsupported_opcode(code)
class LogCall: class LogCall:
def __init__(self, func): def __init__(self, func):
self.func = func self.func = func
...@@ -788,6 +803,17 @@ class ToXMLUnpickler(Unpickler): ...@@ -788,6 +803,17 @@ class ToXMLUnpickler(Unpickler):
# for code in dispatch.keys(): # for code in dispatch.keys():
# dispatch[code] = LogCall(dispatch[code]) # dispatch[code] = LogCall(dispatch[code])
for opcode, name in (
(STRING, 'STRING'),
(UNICODE, 'UNICODE'),
(GET, 'GET'),
(PUT, 'PUT'),
):
if six.PY2:
dispatch[opcode] = unsupported_opcode(name)
dispatch[opcode[0]] = unsupported_opcode(name)
def ToXMLload(file): def ToXMLload(file):
return ToXMLUnpickler(file).load() return ToXMLUnpickler(file).load()
......
...@@ -223,11 +223,6 @@ class CodingStyleTestCase(ERP5TypeTestCase): ...@@ -223,11 +223,6 @@ class CodingStyleTestCase(ERP5TypeTestCase):
if log_directory and diff_line_list: if log_directory and diff_line_list:
with open(os.path.join(log_directory, '%s.diff' % self.id()), 'w') as f: with open(os.path.join(log_directory, '%s.diff' % self.id()), 'w') as f:
f.writelines(diff_line_list) f.writelines(diff_line_list)
if diff_files and six.PY3: # TODO zope4py3
warnings.warn(
"Ignoring test_rebuild_business_template until we re-export "
"business templates with protocol 3.")
return
self.assertEqual(diff_files, []) self.assertEqual(diff_files, [])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment