Commit dbb63f65 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/zodbtools: Dump

Add `zodb dump` command to dump arbitrary ZODB database in generic
format. The actual dump protocol being used here is the same as in
zodbtools/py with

	zodbtools!3

applied. (the MR there is OK and is just waiting for upstream ZODB to
negotiate a way to retrieve transaction extension data in raw form).
parent c6457cf7
// Copyright (C) 2016-2017 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
/*
Zodbdump - Tool to dump content of a ZODB database
This program dumps content of a ZODB database.
It uses ZODB Storage iteration API to get list of transactions and for every
transaction prints transaction's header and information about changed objects.
The information dumped is complete raw information as stored in ZODB storage
and should be suitable for restoring the database from the dump file bit-to-bit
identical to its original. It is dumped in semi text-binary format where
object data is output as raw binary and everything else is text.
There is also shortened mode activated via -hashonly where only hash of object
data is printed without content.
Dump format:
txn <tid> <status|quote>
user <user|quote>
description <description|quote>
extension <extension|quote>
obj <oid> (delete | from <tid> | <size> <hashfunc>:<hash> (-|LF <raw-content>)) LF
obj ...
...
obj ...
LF
txn ...
quote: quote string with " with non-printable and control characters \-escaped
hashfunc: one of sha1, sha256, sha512 ...
TODO also protect txn record by hash.
*/
package zodbtools
import (
"context"
"crypto/sha1"
"flag"
"fmt"
"io"
"os"
"lab.nexedi.com/kirr/go123/prog"
"lab.nexedi.com/kirr/go123/xfmt"
"lab.nexedi.com/kirr/neo/go/zodb"
)
// dumper dumps zodb record to a writer
type dumper struct {
W io.Writer
HashOnly bool // whether to dump only hashes of data without content
afterFirst bool // true after first transaction has been dumped
buf xfmt.Buffer // reusable data buffer for formatting
}
var _LF = []byte{'\n'}
// DumpData dumps one data record
func (d *dumper) DumpData(datai *zodb.DataInfo) error {
buf := &d.buf
buf.Reset()
buf .S("obj ") .V(&datai.Oid) .Cb(' ')
writeData := false
switch {
case datai.Data == nil:
buf .S("delete")
case datai.DataTidHint != 0:
buf .S("from ") .V(&datai.DataTidHint)
default:
// XXX sha1 is hardcoded for now. Dump format allows other hashes.
dataSha1 := sha1.Sum(datai.Data)
buf .D(len(datai.Data)) .S(" sha1:") .Xb(dataSha1[:])
writeData = true
}
var data []byte
if writeData {
if d.HashOnly {
buf .S(" -")
} else {
buf .Cb('\n')
data = datai.Data
}
}
// TODO use writev(buf, data, "\n") via net.Buffers (it is already available)
_, err := d.W.Write(buf.Bytes())
if err != nil {
goto out
}
if data != nil {
_, err = d.W.Write(datai.Data)
if err != nil {
goto out
}
}
_, err = d.W.Write(_LF)
if err != nil {
goto out
}
out:
// XXX do we need this context ?
// see for rationale in similar place in DumpTxn
if err != nil {
return fmt.Errorf("%v: %v", datai.Oid, err)
}
return nil
}
// DumpTxn dumps one transaction record
func (d *dumper) DumpTxn(ctx context.Context, txni *zodb.TxnInfo, dataIter zodb.IDataIterator) error {
var datai *zodb.DataInfo
// LF in-between txn records
vskip := "\n"
if !d.afterFirst {
vskip = ""
d.afterFirst = true
}
_, err := fmt.Fprintf(d.W, "%stxn %s %q\nuser %q\ndescription %q\nextension %q\n",
vskip, txni.Tid, string(txni.Status), txni.User, txni.Description, txni.Extension)
if err != nil {
goto out
  • There is the defer feature of Go which would produce the same effect but looked more up-to-date. Why did you choose goto instead? (Just asking out of curiosity)

  • I frankly don't remember. Probably gotos were left there because initially DumpTxn and DumpData were all one function and it was then needed to distinguish which error prefix to add via jumping to corresponding label. But since txn/data handling was split, as you rightly say, the gotos are not really needed there. So I've made the following patch to say goodbye to them:

    5bf40022

    Thanks for asking,
    Kirill

Please register or sign in to reply
}
// data records
for {
datai, err = dataIter.NextData(ctx)
if err != nil {
if err == io.EOF {
err = nil // XXX -> okEOF ?
}
break
}
err = d.DumpData(datai)
if err != nil {
break
}
}
out:
// XXX do we need this context ?
// rationale: dataIter.NextData() if error in db - will include db context
// if error is in writer - it will include its own context
if err != nil {
return fmt.Errorf("%v: %v", txni.Tid, err)
}
return nil
}
// Dump dumps transaction records in between tidMin..tidMax
func (d *dumper) Dump(ctx context.Context, stor zodb.IStorage, tidMin, tidMax zodb.Tid) error {
var txni *zodb.TxnInfo
var dataIter zodb.IDataIterator
var err error
iter := stor.Iterate(ctx, tidMin, tidMax)
// transactions
for {
txni, dataIter, err = iter.NextTxn(ctx)
if err != nil {
if err == io.EOF {
err = nil // XXX -> okEOF ?
}
break
}
err = d.DumpTxn(ctx, txni, dataIter)
if err != nil {
break
}
}
if err != nil {
return fmt.Errorf("%s: dump %v..%v: %v", stor.URL(), tidMin, tidMax, err)
}
return nil
}
// Dump dumps contents of a storage in between tidMin..tidMax range to a writer.
//
// see top-level documentation for the dump format.
func Dump(ctx context.Context, w io.Writer, stor zodb.IStorage, tidMin, tidMax zodb.Tid, hashOnly bool) error {
d := dumper{W: w, HashOnly: hashOnly}
return d.Dump(ctx, stor, tidMin, tidMax)
}
// ----------------------------------------
const dumpSummary = "dump content of a ZODB database"
func dumpUsage(w io.Writer) {
fmt.Fprintf(w,
`Usage: zodb dump [OPTIONS] <storage> [tidmin..tidmax]
Dump content of a ZODB database.
<storage> is an URL (see 'zodb help zurl') of a ZODB-storage.
Options:
-h --help this help text.
-hashonly dump only hashes of objects without content.
`)
}
func dumpMain(argv []string) {
hashOnly := false
tidRange := ".." // [0, +inf]
flags := flag.FlagSet{Usage: func() { dumpUsage(os.Stderr) }}
flags.Init("", flag.ExitOnError)
flags.BoolVar(&hashOnly, "hashonly", hashOnly, "dump only hashes of objects")
flags.Parse(argv[1:])
argv = flags.Args()
if len(argv) < 1 {
flags.Usage()
prog.Exit(2)
}
storUrl := argv[0]
if len(argv) > 1 {
tidRange = argv[1]
}
tidMin, tidMax, err := zodb.ParseTidRange(tidRange)
if err != nil {
prog.Fatal(err)
}
ctx := context.Background()
stor, err := zodb.OpenStorage(ctx, storUrl, &zodb.OpenOptions{ReadOnly: true})
if err != nil {
prog.Fatal(err)
}
// TODO defer stor.Close()
err = Dump(ctx, os.Stdout, stor, tidMin, tidMax, hashOnly)
if err != nil {
prog.Fatal(err)
}
}
// Copyright (C) 2016-2017 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package zodbtools
//go:generate sh -c "python2 -m zodbtools.zodb dump ../../zodb/storage/fs1/testdata/1.fs >testdata/1.zdump.pyok"
//go:generate sh -c "python2 -m zodbtools.zodb dump ../../zodb/storage/fs1/testdata/empty.fs >testdata/empty.zdump.pyok"
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"regexp"
"testing"
"lab.nexedi.com/kirr/neo/go/zodb"
_ "lab.nexedi.com/kirr/neo/go/zodb/wks"
"github.com/kylelemons/godebug/diff"
"lab.nexedi.com/kirr/go123/exc"
)
// loadZdumpPy loads a zdump file and normalizes escaped strings to the way go
// would escape them.
func loadZdumpPy(t *testing.T, path string) string {
dump, err := ioutil.ReadFile(path)
if err != nil {
t.Fatal(err)
}
// python quotes "\v" as "\x0b", go as "\v"; same for "\f", "\a", "\b".
// XXX this is a bit hacky. We could compare quoted strings as decoded,
// but this would need zdump format parser which could contain other
// bugs. Here we want to compare output ideally bit-to-bit but those
// \v vs \x0b glitches prevents that to be done directly. So here we
// are with this ugly hack:
var pyNoBackLetter = []struct{ backNoLetterRe, backLetter string }{
{`\\x07`, `\a`},
{`\\x08`, `\b`},
{`\\x0b`, `\v`},
{`\\x0c`, `\f`},
}
for _, __ := range pyNoBackLetter {
re := regexp.MustCompile(__.backNoLetterRe)
dump = re.ReplaceAllLiteral(dump, []byte(__.backLetter))
}
return string(dump)
}
func withTestdataFs(t testing.TB, db string, f func(zstor zodb.IStorage)) {
zstor, err := zodb.OpenStorage(context.Background(), fmt.Sprintf("../../zodb/storage/fs1/testdata/%s.fs", db), &zodb.OpenOptions{ReadOnly: true})
if err != nil {
t.Fatal(err)
}
defer exc.XRun(zstor.Close)
f(zstor)
}
func TestZodbDump(t *testing.T) {
testv := []string{"1", "empty"}
for _, tt := range testv {
t.Run("db=" + tt, func(t *testing.T) {
withTestdataFs(t, tt, func(zstor zodb.IStorage) {
buf := bytes.Buffer{}
err := Dump(context.Background(), &buf, zstor, 0, zodb.TidMax, false)
if err != nil {
t.Fatal(err)
}
dumpOk := loadZdumpPy(t, fmt.Sprintf("testdata/%s.zdump.pyok", tt))
if dumpOk != buf.String() {
t.Errorf("dump different:\n%v", diff.Diff(dumpOk, buf.String()))
}
})
})
}
}
func BenchmarkZodbDump(b *testing.B) {
// FIXME small testdata/1.fs is not representative for benchmarking
withTestdataFs(b, "1", func(zstor zodb.IStorage) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := Dump(context.Background(), ioutil.Discard, zstor, 0, zodb.TidMax, false)
if err != nil {
b.Fatal(err)
}
}
b.StopTimer()
})
}
......@@ -25,6 +25,7 @@ import "lab.nexedi.com/kirr/go123/prog"
// registry of all zodbtools commands
var commands = prog.CommandRegistry{
// NOTE the order commands are listed here is the order how they will appear in help
{"dump", dumpSummary, dumpUsage, dumpMain},
}
// main zodbtools driver
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment