Commit 80559a94 authored by Jérome Perrin's avatar Jérome Perrin Committed by Kirill Smelkov

zodbdump: support --pretty option with a format to show pickles disassembly

Showing pickle disassembly can sometimes be useful to analyse details of
the pickle content. We realized that in some data structures used in
ERP5 the same string was saved multiple times in the same pickle and by
using the exact same string (ie. for which `s1 is s2` is True), the
pickle will have the string only once and pickles are a bit smaller. For
more reference, the context was
nexedi/erp5!1560 (comment 154825)

This introduces a new --pretty option that we will be able to extend
later with more output formats.
Co-authored-by: Kirill Smelkov's avatarKirill Smelkov <kirr@nexedi.com>
Reviewed-on: nexedi/zodbtools!22
parent aa7e1966
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2017-2020 Nexedi SA and Contributors. # Copyright (C) 2017-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -31,19 +31,20 @@ from io import BytesIO ...@@ -31,19 +31,20 @@ from io import BytesIO
from os.path import dirname from os.path import dirname
from zodbtools.test.testutil import zext_supported from zodbtools.test.testutil import zext_supported
from pytest import raises, xfail from pytest import mark, raises, xfail
# verify zodbdump output against golden # verify zodbdump output against golden
def test_zodbdump(zext): @mark.parametrize('pretty', ('raw', 'zpickledis'))
def test_zodbdump(zext, pretty):
tdir = dirname(__file__) tdir = dirname(__file__)
zkind = '_!zext' if zext.disabled else '' zkind = '_!zext' if zext.disabled else ''
stor = FileStorage('%s/testdata/1%s.fs' % (tdir, zkind), read_only=True) stor = FileStorage('%s/testdata/1%s.fs' % (tdir, zkind), read_only=True)
with open('%s/testdata/1%s.zdump.ok' % (tdir, zkind), 'rb') as f: with open('%s/testdata/1%s.zdump.%s.ok' % (tdir, zkind, pretty), 'rb') as f:
dumpok = f.read() dumpok = f.read()
out = BytesIO() out = BytesIO()
zodbdump(stor, None, None, out=out) zodbdump(stor, None, None, pretty=pretty, out=out)
assert out.getvalue() == dumpok assert out.getvalue() == dumpok
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2021 Nexedi SA and Contributors. # Copyright (C) 2021-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -40,7 +40,7 @@ def test_zodbrestore(zext): ...@@ -40,7 +40,7 @@ def test_zodbrestore(zext):
tdata = dirname(__file__) + "/testdata" tdata = dirname(__file__) + "/testdata"
@func @func
def _(): def _():
zdump = open("%s/1%s.zdump.ok" % (tdata, zkind), 'rb') zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
defer(zdump.close) defer(zdump.close)
stor = storageFromURL('%s/2.fs' % tmpd) stor = storageFromURL('%s/2.fs' % tmpd)
......
This diff is collapsed.
This diff is collapsed.
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2016-2021 Nexedi SA and Contributors. # Copyright (C) 2016-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -32,7 +32,15 @@ format where object data is output as raw binary and everything else is text. ...@@ -32,7 +32,15 @@ format where object data is output as raw binary and everything else is text.
There is also shortened mode activated via --hashonly where only hash of object There is also shortened mode activated via --hashonly where only hash of object
data is printed without content. data is printed without content.
Dump format: Alternatively, the dump can be produced in other "pretty" formats, that zodb
restore will not be able to restore, but that are more suitable for analysis.
The output format can be selected with --pretty "format" option. The following
formats are available:
raw default zodb dump format
zpickledis display the disassembled pickles, using pickletools.dis.
Raw dump format:
txn <tid> <status|quote> txn <tid> <status|quote>
user <user|quote> user <user|quote>
...@@ -59,7 +67,7 @@ from zodbtools.util import ashex, fromhex, sha1, txnobjv, parse_tidrange, TidRan ...@@ -59,7 +67,7 @@ from zodbtools.util import ashex, fromhex, sha1, txnobjv, parse_tidrange, TidRan
storageFromURL, hashRegistry, asbinstream storageFromURL, hashRegistry, asbinstream
from ZODB._compat import loads, _protocol, BytesIO from ZODB._compat import loads, _protocol, BytesIO
from zodbpickle.slowpickle import Pickler as pyPickler from zodbpickle.slowpickle import Pickler as pyPickler
#import pickletools import pickletools
from ZODB.interfaces import IStorageTransactionInformation from ZODB.interfaces import IStorageTransactionInformation
from zope.interface import implementer from zope.interface import implementer
...@@ -92,15 +100,36 @@ _already_warned_notxnraw = set() ...@@ -92,15 +100,36 @@ _already_warned_notxnraw = set()
# zodbdump dumps content of a ZODB storage to a file. # zodbdump dumps content of a ZODB storage to a file.
# please see module doc-string for dump format and details # please see module doc-string for dump format and details
def zodbdump(stor, tidmin, tidmax, hashonly=False, out=asbinstream(sys.stdout)): def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream(sys.stdout)):
def badpretty():
raise ValueError("invalid pretty format %s" % pretty)
for txn in stor.iterator(tidmin, tidmax): for txn in stor.iterator(tidmin, tidmax):
# XXX .status not covered by IStorageTransactionInformation # XXX .status not covered by IStorageTransactionInformation
# XXX but covered by BaseStorage.TransactionRecord # XXX but covered by BaseStorage.TransactionRecord
out.write(b"txn %s %s\nuser %s\ndescription %s\nextension %s\n" % ( out.write(b"txn %s %s\nuser %s\ndescription %s\n" % (
ashex(txn.tid), qq(txn.status), ashex(txn.tid), qq(txn.status),
qq(txn.user), qq(txn.user),
qq(txn.description), qq(txn.description) ))
qq(txn_raw_extension(stor, txn)) ))
# extension is saved by ZODB as either empty or as pickle dump of an object
rawext = txn_raw_extension(stor, txn)
if pretty == 'raw':
out.write(b"extension %s\n" % qq(rawext))
elif pretty == 'zpickledis':
if len(rawext) == 0:
out.write(b'extension ""\n')
else:
out.write(b"extension\n")
extf = BytesIO(rawext)
disf = BytesIO()
pickletools.dis(extf, disf)
out.write(indent(disf.getvalue(), " "))
extra = extf.read()
if len(extra) > 0:
out.write(b" + extra data %s\n" % qq(extra))
else:
badpretty()
objv = txnobjv(txn) objv = txnobjv(txn)
...@@ -127,7 +156,20 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, out=asbinstream(sys.stdout)): ...@@ -127,7 +156,20 @@ def zodbdump(stor, tidmin, tidmax, hashonly=False, out=asbinstream(sys.stdout)):
out.write(b" -") out.write(b" -")
else: else:
out.write(b"\n") out.write(b"\n")
if pretty == 'raw':
out.write(obj.data) out.write(obj.data)
elif pretty == 'zpickledis':
# https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29
dataf = BytesIO(obj.data)
disf = BytesIO()
pickletools.dis(dataf, disf) # class
pickletools.dis(dataf, disf) # state
out.write(indent(disf.getvalue(), " "))
extra = dataf.read()
if len(extra) > 0:
out.write(b" + extra data %s\n" % qq(extra))
else:
badpretty()
out.write(b"\n") out.write(b"\n")
...@@ -224,6 +266,14 @@ def serializeext(ext): ...@@ -224,6 +266,14 @@ def serializeext(ext):
assert loads(out) == ext assert loads(out) == ext
return out return out
# indent returns text with each line of it indented with prefix.
def indent(text, prefix): # -> text
textv = text.splitlines(True)
textv = [prefix+_ for _ in textv]
text = ''.join(textv)
return text
# ---------------------------------------- # ----------------------------------------
import sys, getopt import sys, getopt
...@@ -239,6 +289,8 @@ Dump content of a ZODB database. ...@@ -239,6 +289,8 @@ Dump content of a ZODB database.
Options: Options:
--pretty=<format> output in a given format, where <format> can be one
of raw, zpickledis
--hashonly dump only hashes of objects without content --hashonly dump only hashes of objects without content
-h --help show this help -h --help show this help
""", file=out) """, file=out)
...@@ -246,20 +298,26 @@ Options: ...@@ -246,20 +298,26 @@ Options:
@func @func
def main(argv): def main(argv):
hashonly = False hashonly = False
pretty = 'raw'; prettyok = {'raw', 'zpickledis'}
try: try:
optv, argv = getopt.getopt(argv[1:], "h", ["help", "hashonly"]) optv, argv = getopt.getopt(argv[1:], "h", ["help", "hashonly", "pretty="])
except getopt.GetoptError as e: except getopt.GetoptError as e:
print(e, file=sys.stderr) print(e, file=sys.stderr)
usage(sys.stderr) usage(sys.stderr)
sys.exit(2) sys.exit(2)
for opt, _ in optv: for opt, arg in optv:
if opt in ("-h", "--help"): if opt in ("-h", "--help"):
usage(sys.stdout) usage(sys.stdout)
sys.exit(0) sys.exit(0)
if opt in ("--hashonly"): if opt in ("--hashonly"):
hashonly = True hashonly = True
if opt in ("--pretty"):
pretty = arg
if pretty not in prettyok:
print("E: unsupported pretty format: %s" % pretty, file=sys.stderr)
sys.exit(2)
try: try:
storurl = argv[0] storurl = argv[0]
...@@ -279,7 +337,7 @@ def main(argv): ...@@ -279,7 +337,7 @@ def main(argv):
stor = storageFromURL(storurl, read_only=True) stor = storageFromURL(storurl, read_only=True)
defer(stor.close) defer(stor.close)
zodbdump(stor, tidmin, tidmax, hashonly) zodbdump(stor, tidmin, tidmax, hashonly, pretty)
# ---------------------------------------- # ----------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment