Commit c33f8b52 authored by Kirill Smelkov's avatar Kirill Smelkov

zodbdump: Fix pickle disassembly if state part of zpickle refers to class part

I've tried to run `zodb dump --pretty=zpickledis` on wendelin.core test
data in WCFS(*) and hit the following failure:

    (z-dev) kirr@deca:~/src/wendelin/wendelin.core/wcfs/internal/zdata/testdata$ zodb dump --pretty=zpickledis zblk.fs
    ...
    obj 0000000000000005 685 sha1:865171b709f575b355afd2cc9e1f32b9781c6510
    Traceback (most recent call last):
      File "/home/kirr/src/wendelin/venv/z-dev/bin/zodb", line 11, in <module>
        load_entry_point('zodbtools', 'console_scripts', 'zodb')()
      File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodb.py", line 129, in main
        return command_module.main(argv)
      File "<decorator-gen-3>", line 2, in main
      File "/home/kirr/src/wendelin/venv/z-dev/lib/python2.7/site-packages/golang/__init__.py", line 103, in _
        return f(*argv, **kw)
      File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodbdump.py", line 341, in main
        zodbdump(stor, tidmin, tidmax, hashonly, pretty)
      File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodbdump.py", line 167, in zodbdump
        pickletools.dis(dataf, disf) # state
      File "/usr/lib/python2.7/pickletools.py", line 2005, in dis
        raise ValueError(errormsg)
    ValueError: memo key 1 has never been stored into

The problem turned out to be due to that state part of zpickle is
referring to another object with the same class as already saved
in class part of zpickle, so that class was being referred to via GET
matching corresponding PUT done in the class part, but our zpickledis
handler did not shared the memo in between those two parts and so the
GET became unmatched.

In more details the problem is illustrated by the following zpickle that
corresponds to Object.value referring to the same Object. The first part
of zpickle contains class part and refers to __main__.Object global
with putting it into memo[1]. The second part of zpickle contains state
part and refers to that object by `(Object, 7) PERSID` where Object is
retrieved via memo[1] GET:

    obj 0000000000000007 41 sha1:7108c96ccb9cbeaab1164d533174c300e51309f9
          0: \x80 PROTO      2
          2: c    GLOBAL     '__main__ Object'
         19: q    BINPUT     1                   <-- NOTE
         21: .    STOP
      highest protocol among opcodes = 2
         22: \x80 PROTO      2
         24: U    SHORT_BINSTRING '\x00\x00\x00\x00\x00\x00\x00\x07'
         34: q    BINPUT     2
         36: h    BINGET     1                   <-- NOTE
         38: \x86 TUPLE2
         39: Q    BINPERSID
         40: .    STOP
      highest protocol among opcodes = 2

To handle such zpickles well we need to share the memo when dumping
class and state disassemblies similarly to how ZODB does in its
ObjectWriter._dump:

https://github.com/zopefoundation/ZODB/blob/5.8.1-0-g72cebe6bc/src/ZODB/serialize.py#L436-L443

Pickletools.dis has explicit support for using shared memo - originally
added in https://github.com/python/cpython/commit/62235e701e37 and
likely motivated by ZODB use-case.

(*) https://lab.nexedi.com/nexedi/wendelin.core/-/blob/07087ec8/wcfs/internal/zdata/testdata/zblk.fs
    generated by nexedi/wendelin.core@2c152d41

/cc @jerome
parent 513c296c
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2017-2023 Nexedi SA and Contributors. # Copyright (C) 2017-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -230,6 +230,14 @@ def _gen_testdb(outfs_path, zext): ...@@ -230,6 +230,14 @@ def _gen_testdb(outfs_path, zext):
break break
# create a cyclic object -> object reference
# pretty=zpickledis used not to handle this well because in ZODB pickle the reference
# referes to referred type by GET that is prepared by PUT in class part of the pickle.
name = random.choice(list(root.keys()))
obj = root[name]
obj.value = obj
commit(u"user", u"cyclic reference", ext("cycle"))
# delete an object # delete an object
name = random.choice(list(root.keys())) name = random.choice(list(root.keys()))
obj = root[name] obj = root[name]
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2019-2023 Nexedi SA and Contributors. # Copyright (C) 2019-2024 Nexedi SA and Contributors.
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
...@@ -40,7 +40,7 @@ def test_zodbanalyze(tmpdir, capsys): ...@@ -40,7 +40,7 @@ def test_zodbanalyze(tmpdir, capsys):
csv=False, csv=False,
) )
captured = capsys.readouterr() captured = capsys.readouterr()
assert "Processed 68 records in 65 transactions" in captured.out assert "Processed 70 records in 67 transactions" in captured.out
assert captured.err == "" assert captured.err == ""
# csv output # csv output
...@@ -57,8 +57,8 @@ def test_zodbanalyze(tmpdir, capsys): ...@@ -57,8 +57,8 @@ def test_zodbanalyze(tmpdir, capsys):
captured = capsys.readouterr() captured = capsys.readouterr()
assert ( assert (
"""Class Name,T.Count,T.Bytes,Pct,AvgSize,C.Count,C.Bytes,O.Count,O.Bytes """Class Name,T.Count,T.Bytes,Pct,AvgSize,C.Count,C.Bytes,O.Count,O.Bytes
persistent.mapping.PersistentMapping,3,639,23.194192%,213.000000,1,213,2,426 persistent.mapping.PersistentMapping,3,639,22.468354%,213.000000,1,213,2,426
__main__.Object,63,2116,76.805808%,33.587302,9,303,54,1813 __main__.Object,65,2205,77.531646%,33.923077,9,310,56,1895
""" """
== captured.out == captured.out
) )
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2016-2022 Nexedi SA and Contributors. # Copyright (C) 2016-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -161,10 +161,12 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream ...@@ -161,10 +161,12 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream
out.write(obj.data) out.write(obj.data)
elif pretty == 'zpickledis': elif pretty == 'zpickledis':
# https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29 # https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29
# https://github.com/zopefoundation/ZODB/blob/5.8.1-0-g72cebe6bc/src/ZODB/serialize.py#L436-L443
dataf = BytesIO(obj.data) dataf = BytesIO(obj.data)
disf = StringIO() disf = StringIO()
pickletools.dis(dataf, disf) # class memo = {} # memo is shared in between class and state
pickletools.dis(dataf, disf) # state pickletools.dis(dataf, disf, memo) # class
pickletools.dis(dataf, disf, memo) # state
out.write(b(indent(disf.getvalue(), " "))) out.write(b(indent(disf.getvalue(), " ")))
extra = dataf.read() extra = dataf.read()
if len(extra) > 0: if len(extra) > 0:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment