Commit 9861c136 authored by Kirill Smelkov's avatar Kirill Smelkov

*: Fix working on py3 by using bstr bytestring instead of raw bytes

e.g. for ObjectData .hashfunc:

In many contexts we need that .hashfunc to be like string, e.g. for
accessing hashRegistry by keys. In many other contexts - e.g. when
zodbdump input it parsed or emitted, it is more handy to handle it like
raw bytes.

If we let .hashfunc to be of type str - it breaks the second mode. If of
type bytes - it breaks the first mode.

And also in many places it is hard to constantly encode/decode str and
bytes, especially in the places where an object is sometimes used in
strings context, and sometimes in binary context.

-> Fix it all in one go by using bytestring type from pygolang,
which provides both unicode string and binary semantics simultaneously.

This needs bstr from pygolang (see pygolang@c9648c44),
but even if pygolang comes without bstr, with this patch zodbtools
continues to work ok on py2 - it will be just py3 mode that won't work.

The list of test failures before this patch is provided below:

    _______________________________ test_zodbanalyze _______________________________

    tmpdir = local('/tmp/pytest-of-kirr/pytest-22/test_zodbanalyze0')
    capsys = <_pytest.capture.CaptureFixture object at 0x7f3de6835c70>

        def test_zodbanalyze(tmpdir, capsys):
            tfs1 = fs1_testdata_py23(tmpdir,
                            os.path.join(os.path.dirname(__file__), "testdata", "1.fs"))

            for use_dbm in (False, True):
    >           report(
                    analyze(
                        tfs1,
                        use_dbm=use_dbm,
                        delta_fs=False,
                        tidmin=None,
                        tidmax=None,
                    ),
                    csv=False,
                )

    zodbtools/test/test_analyze.py:30:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    rep = <zodbtools.zodbanalyze.Report object at 0x7f3de5e16b20>, csv = False

        def report(rep, csv=False):
            ...
                    print (fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
                                   pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
    >                              rep.COIDSMAP[t], rep.CBYTESMAP[t],
                                   rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0)))
    E               KeyError: b'persistent.mapping.PersistentMapping'

    zodbtools/zodbanalyze.py:147: KeyError

    ____________________________ test_zodbcommit[!zext] ____________________________

    zext = <function zext.<locals>._ at 0x7f3deb5c3e50>

        @func
        def test_zodbcommit(zext):
            tmpd = mkdtemp('', 'zodbcommit.')
            defer(lambda: rmtree(tmpd))

            stor = storageFromURL('%s/2.fs' % tmpd)
            defer(stor.close)

            head = stor.lastTransaction()

            # commit some transactions via zodbcommit and verify if storage dump gives
            # what is expected.
            t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
                ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
                ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])

            t1.tid = zodbcommit(stor, head, t1)

            t2 = Transaction(z64, ' ', b'user2', b'desc2', b'', [
                ObjectDelete(p64(2))])

            t2.tid = zodbcommit(stor, t1.tid, t2)

            buf = BytesIO()
            zodbdump(stor, p64(u64(head)+1), None, out=buf)
            dumped = buf.getvalue()

    >       assert dumped == b''.join([_.zdump() for _ in (t1, t2)])

    zodbtools/test/test_commit.py:61:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    zodbtools/test/test_commit.py:61: in <listcomp>
        assert dumped == b''.join([_.zdump() for _ in (t1, t2)])
    zodbtools/zodbdump.py:521: in zdump
        z += obj.zdump()
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = <zodbtools.zodbdump.ObjectData object at 0x7f3de5d26d90>

        def zdump(self):
            data = self.data
            hashonly = isinstance(data, HashOnly)
            if hashonly:
                size = data.size
            else:
                size = len(data)
    >       z = b'obj %s %d %s:%s' % (ashex(self.oid), size, self.hashfunc, ashex(self.hash_))
    E       TypeError: %b requires a bytes-like object, or an object that implements __bytes__, not 'str'

    zodbtools/zodbdump.py:569: TypeError

    _______________________________ test_dumpreader ________________________________

        def test_dumpreader():
            in_ = b"""\
        txn 0123456789abcdef " "
        user "my name"
        description "o la-la..."
        extension "zzz123 def"
        obj 0000000000000001 delete
        obj 0000000000000002 from 0123456789abcdee
        obj 0000000000000003 54 adler32:01234567 -
        obj 0000000000000004 4 sha1:9865d483bc5a94f2e30056fc256ed3066af54d04
        ZZZZ
        obj 0000000000000005 9 crc32:52fdeac5
        ABC

        DEF!

        txn 0123456789abcdf0 " "
        user "author2"
        description "zzz"
        extension "qqq"

        """

            r = DumpReader(BytesIO(in_))
    >       t1 = r.readtxn()

    zodbtools/test/test_dump.py:78:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    zodbtools/zodbdump.py:443: in readtxn
        self._badline('unknown hash function %s' % qq(hashfunc))
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = <zodbtools.zodbdump.DumpReader object at 0x7f3de5d69cd0>
    msg = 'unknown hash function "adler32"'

        def _badline(self, msg):
    >       raise RuntimeError("%s+%d: invalid line: %s (%s)" % (_ioname(self._r), self.lineno, msg, qq(self._line)))
    E       RuntimeError: +7: invalid line: unknown hash function "adler32" ("obj 0000000000000003 54 adler32:01234567 -")

    zodbtools/zodbdump.py:382: RuntimeError

    ___________________________ test_zodbrestore[!zext] ____________________________

    tmpdir = local('/tmp/pytest-of-kirr/pytest-22/test_zodbrestore__zext_0')
    zext = <function zext.<locals>._ at 0x7f3de5d6ddc0>

        @func
        def test_zodbrestore(tmpdir, zext):
            zkind = '_!zext' if zext.disabled else ''

            # restore from testdata/1.zdump.ok and verify it gives result that is
            # bit-to-bit identical to testdata/1.fs
            tdata = dirname(__file__) + "/testdata"
            @func
            def _():
                zdump = open("%s/1%s.zdump.raw.ok" % (tdata, zkind), 'rb')
                defer(zdump.close)

                stor = storageFromURL('%s/2.fs' % tmpdir)
                defer(stor.close)

                zodbrestore(stor, zdump)
    >       _()

    zodbtools/test/test_restore.py:49:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    ../../venv/py3.venv/lib/python3.9/site-packages/decorator.py:232: in fun
        return caller(func, *(extras + args), **kw)
    ../../../tools/go/pygolang/golang/__init__.py:103: in _
        return f(*argv, **kw)
    zodbtools/test/test_restore.py:48: in _
        zodbrestore(stor, zdump)
    zodbtools/zodbrestore.py:39: in zodbrestore
        txn = zr.readtxn()
    zodbtools/zodbdump.py:443: in readtxn
        self._badline('unknown hash function %s' % qq(hashfunc))
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    self = <zodbtools.zodbdump.DumpReader object at 0x7f3de5d79e20>
    msg = 'unknown hash function "sha1"'

        def _badline(self, msg):
    >       raise RuntimeError("%s+%d: invalid line: %s (%s)" % (_ioname(self._r), self.lineno, msg, qq(self._line)))
    E       RuntimeError: /home/kirr/src/wendelin/z/zodbtools/zodbtools/test/testdata/1_!zext.zdump.raw.ok+5: invalid line: unknown hash function "sha1" ("obj 0000000000000000 61 sha1:664e6de0f153d8eaeda638d616a320c6e3c5feb1")

    zodbtools/zodbdump.py:382: RuntimeError
parent b21fbe23
......@@ -20,6 +20,7 @@
from zodbtools.zodbanalyze import analyze, report
from zodbtools.test.testutil import fs1_testdata_py23
import os.path
from golang import b
def test_zodbanalyze(tmpdir, capsys):
......@@ -74,5 +75,5 @@ __main__.Object,56,1880,54.366686%,33.571429,9,303,47,1577
csv=False,
)
captured = capsys.readouterr()
assert "# ø\nNo transactions processed\n" == captured.out.encode('utf-8')
assert "# ø\nNo transactions processed\n" == b(captured.out)
assert captured.err == ""
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2020 Nexedi SA and Contributors.
# Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com>
#
......@@ -27,7 +27,7 @@ from ZODB._compat import BytesIO, dumps, _protocol # XXX can't yet commit with
from tempfile import mkdtemp
from shutil import rmtree
from golang import func, defer
from golang import func, defer, b
# verify zodbcommit.
@func
......@@ -43,8 +43,8 @@ def test_zodbcommit(zext):
# commit some transactions via zodbcommit and verify if storage dump gives
# what is expected.
t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [
ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')),
ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))])
ObjectData(p64(1), b'data1', b('sha1'), sha1(b'data1')),
ObjectData(p64(2), b'data2', b('sha1'), sha1(b'data2'))])
t1.tid = zodbcommit(stor, head, t1)
......
......@@ -27,9 +27,12 @@ from zlib import crc32, adler32
from ZODB.TimeStamp import TimeStamp
import dateparser
from golang import b
def ashex(s):
# type: (bytes) -> bytes
return codecs.encode(s, 'hex')
# type: (bytes) -> bstr
return b(codecs.encode(s, 'hex'))
def fromhex(s):
# type: (Union[str,bytes]) -> bytes
......
......@@ -16,7 +16,7 @@ from ZODB.FileStorage import FileIterator, packed_version
from ZODB.FileStorage.format import FileStorageFormatter
from ZODB.utils import get_pickle_metadata
from zodbtools.util import storageFromURL, parse_tidrange, ashex
from golang import func, defer
from golang import func, defer, b
class DeltaFileStorage(
FileStorageFormatter,
......@@ -225,7 +225,7 @@ def analyze_rec(report, record):
report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
else:
type = report.OIDMAP[oid]
type = b(report.OIDMAP[oid])
if report.use_dbm:
fsize = int(report.USEDMAP[oid])
report.USEDMAP[oid] = str(size)
......
......@@ -45,7 +45,7 @@ from ZODB.interfaces import IStorageRestoreable
from ZODB.utils import p64, u64, z64
from ZODB.POSException import POSKeyError
from ZODB._compat import BytesIO
from golang import func, defer, panic
from golang import func, defer, panic, b
import warnings
......@@ -217,7 +217,7 @@ def main(argv):
defer(stor.close)
# artificial transaction header with tid=0 to request regular commit
zin = b'txn 0000000000000000 " "\n'
zin = b('txn 0000000000000000 " "\n')
zin += asbinstream(sys.stdin).read()
zin = BytesIO(zin)
......
......@@ -433,7 +433,7 @@ class DumpReader(object):
else:
size = int(m.group('size'))
hashfunc = m.group('hashfunc')
hashfunc = b(m.group('hashfunc'))
hashok = fromhex(m.group('hash'))
hashonly = m.group('hashonly') is not None
data = None # see vvv
......@@ -551,7 +551,7 @@ class ObjectCopy(Object):
# ObjectData represents record with object data.
class ObjectData(Object):
# .data HashOnly | bytes
# .hashfunc str hash function used for integrity
# .hashfunc bstr hash function used for integrity
# .hash_ bytes hash of the object's data
def __init__(self, oid, data, hashfunc, hash_):
super(ObjectData, self).__init__(oid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment