go/neo/t/neotest: bench-cpu += unzlib for wczblk1 and prod1 objects

NEO uses zlib compression for data, and this way client has to spend time decompressing it. Benchmark how much time zlib decompression takes. With stdlib zlib decompressor out of the box it looks like: name time/op unzlib/py/wczdata 20.7µs ± 2% unzlib/go/wczdata 70.6µs ± 0% unzlib/py/prod1-avg 4.02µs ± 1% unzlib/go/prod1-avg 15.2µs ± 0% i.e. much not in favour of Go. We'll be fixing that in the following patches.

go/neo/t/neotest: bench-cpu += unzlib for wczblk1 and prod1 objects
NEO uses zlib compression for data, and this way client has to spend time decompressing it. Benchmark how much time zlib decompression takes. With stdlib zlib decompressor out of the box it looks like: name time/op unzlib/py/wczdata 20.7µs ± 2% unzlib/go/wczdata 70.6µs ± 0% unzlib/py/prod1-avg 4.02µs ± 1% unzlib/go/prod1-avg 15.2µs ± 0% i.e. much not in favour of Go. We'll be fixing that in the following patches.
91a8afa8 · Kirill Smelkov · 646a94b5 · 91a8afa8 · 91a8afa8 · 91a8afa8
Commit 91a8afa8 authored Jul 11, 2018 by Kirill Smelkov
12 changed files
--- a/go/internal/xzlib/xzlib.go
+++ b/go/internal/xzlib/xzlib.go
+// Copyright (C) 2017-2018  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// Package zlib provides convenience utilities to compress/decompress zlib data.
+package xzlib
+
+import (
+	"bytes"
+	"compress/zlib"
+	"io"
+)
+
+// Compress compresses data according to zlib encoding.
+//
+// default level and dictionary are used.
+func Compress(data []byte) (zdata []byte) {
+	var b bytes.Buffer
+	w := zlib.NewWriter(&b)
+	_, err := w.Write(data)
+	if err != nil {
+		panic(err) // bytes.Buffer.Write never return error
+	}
+	err = w.Close()
+	if err != nil {
+		panic(err) // ----//----
+	}
+	return b.Bytes()
+}
+
+// Decompress decompresses data according to zlib encoding.
+//
+// out buffer, if there is enough capacity, is used for decompression destination.
+// if out has not enough capacity a new buffer is allocated and used.
+//
+// return: destination buffer with full decompressed data or error.
+func Decompress(zdata []byte, out []byte) (data []byte, err error) {
+	bin := bytes.NewReader(zdata)
+	zr, err := zlib.NewReader(bin)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		err2 := zr.Close()
+		if err2 != nil && err == nil {
+			err = err2
+			data = nil
+		}
+	}()
+
+	bout := bytes.NewBuffer(out[:0])
+	_, err = io.Copy(bout, zr)
+	if err != nil {
+		return nil, err
+	}
+
+	return bout.Bytes(), nil
+}
--- a/go/internal/xzlib/xzlib_test.go
+++ b/go/internal/xzlib/xzlib_test.go
+// Copyright (C) 2017  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+package xzlib
+
+import (
+	"testing"
+
+	"github.com/kylelemons/godebug/pretty"
+)
+
+var ztestv = []struct{ in, out string }{
+	{
+		in:  "x\x9c\xf3H\xcd\xc9\xc9W\x08\xcf/\xcaIQ\x04\x00\x1cI\x04>",
+		out: "Hello World!",
+	},
+	{
+		in:  "x\x9cK.H-*\xce,.I\xcd+\xd1\xcbM,(\xc8\xccK\xe7\n\x80\x0b\xf9BE\n\x19\xf5j\x0b\x99BYR\x12K\x12\x0b\x99k\x0bYB\xd9\x8b3\xd3\xf3\x12s\xca\nY5B9\x18 \x80\xb1\x90-\xb9<5/%5'3O/)3=\xb1\xa8(\xb1R\x0fL\xc6W\xe5\xa7$qE9e\xa6;\x82\xb8\\\x85\xec%\x81\xc5\xc5z\x00\xb0d)\xef",
+		out: "cpersistent.mapping\nPersistentMapping\nq\x01.}q\x02U\x04dataq\x03}q\x04U\x07signalvq\x05(U\x08\x00\x00\x00\x00\x00\x00\x00\x01q\x06cwendelin.bigarray.array_zodb\nZBigArray\nq\x07tQss.",
+	},
+}
+
+func TestDecompress(t *testing.T) {
+	for _, tt := range ztestv {
+		got, err := Decompress([]byte(tt.in), nil)
+		if err != nil {
+			t.Errorf("decompress err: %q", tt.in)
+			continue
+		}
+		gots := string(got)
+		if gots != tt.out {
+			t.Errorf("decompress output mismatch:\n%s\n",
+				pretty.Compare(tt.out, gots))
+		}
+	}
+}
--- a/go/neo/t/gen-testdata
+++ b/go/neo/t/gen-testdata
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""generate testdata/ files"""
+
+import zlib
+import zodbtools.util as zutil
+from tcpu import fmtsize
+
+K = 1024
+M = 1024*K
+
+sizev = (1*K, 4*K, 2*M)
+
+def writefile(path, data):
+    with open(path, 'w') as f:
+        f.write(data)
+
+
+def zcompress(data):
+    zdata = zlib.compress(data)
+    #print '%d -> %d  (%.1f%%)' % (len(data), len(zdata), 100. * len(zdata) / len(data))
+    return zdata
+
+def main():
+    # zlib/null
+    for size in sizev:
+        data = '\0' * size
+        zdata = zcompress(data)
+        writefile('testdata/zlib/null-%s' % fmtsize(size), zdata)
+
+
+    # representative ZODB objects
+    # (to regenerate this requires `neotest zbench-local` to be already run once)
+
+    # wendelin.core's ZData
+    zdatav = []
+    def update_zdata(objdata):
+        if 'ZData' in objdata:  # XXX hack
+            zdatav.append(objdata)
+    iter_zobjects('var/wczblk1-8/fs1/data.fs', update_zdata)
+    writeobjects('testdata/zlib/wczdata', zdatav)
+
+    # min avg max from prod1
+    prod1_objv = []
+    def update_prod1(objdata):
+        prod1_objv.append(objdata)
+
+    iter_zobjects('var/prod1-1024/fs1/data.fs', update_prod1)
+    writeobjects('testdata/zlib/prod1', prod1_objv)
+
+
+# writeobjects writes to prefix compressed objects with average and maximum uncompressed sizes.
+def writeobjects(prefix, objv):
+    objv.sort(key = lambda obj: len(obj))
+    lavg = sum(len(_) for _ in objv) // len(objv)
+
+    lo, hi = 0, len(objv)
+    while lo < hi:
+        #print lo, hi
+        i = (lo + hi) // 2
+        l = len(objv[i])
+        if l < lavg:
+            lo = i+1
+        else:
+            hi = i
+
+    objavg = objv[lo]
+    objmax = objv[-1]
+    #print '[%d,%d] -> avgi=%d, avglen=%d maxlen=%d' % (0, len(objv), lo, len(objavg), len(objmax))
+
+    if len(objavg) == len(objmax):  # it is so for wczdata
+        writefile('%s' % prefix, zcompress(objavg))
+    else:
+        writefile('%s-avg' % prefix, zcompress(objavg))
+        writefile('%s-max' % prefix, zcompress(objv[-1]))
+
+# iter_zobjects iterates throuh all non-nil object data from fs1@path.
+#
+# for every object f is called, and if it returns !false iteration is stopped.
+def iter_zobjects(path, f):
+    stor = zutil.storageFromURL(path, read_only=True)
+
+    for txn in stor.iterator():
+        for obj in txn:
+            if obj.data is not None:
+                if f(obj.data):
+                    return
+
+if __name__ == '__main__':
+    main()
--- a/go/neo/t/neotest
+++ b/go/neo/t/neotest
@@ -959,6 +959,14 @@ bench_cpu() {
 			nrun tcpu_go $bench $size
 		done
 	done
+
+	datav="wczdata prod1-avg"	# null-1K null-4K null-2M prod1-max
+	for data in $datav; do
+		nrun tcpu.py unzlib $data
+		nrun tcpu_go unzlib $data
+	done
+
+	# TODO bench compress
 }

 # bench_disk	- benchmark direct (uncached) and cached random reads

--- a/go/neo/t/tcpu.go
+++ b/go/neo/t/tcpu.go
@@ -18,6 +18,7 @@
 // See https://www.nexedi.com/licensing for rationale and options.

 // +build ignore
+//go:generate ./gen-testdata

 // tcpu - cpu-related benchmarks
 package main
@@ -29,11 +30,16 @@ import (
 	"hash"
 	"hash/adler32"
 	"hash/crc32"
+	"io/ioutil"
 	"log"
 	"os"
+	"path/filepath"
 	"strconv"
 	"testing"
 	"time"
+
+	"lab.nexedi.com/kirr/go123/my"
+	"lab.nexedi.com/kirr/neo/go/internal/xzlib"
 )

 func dieusage() {
@@ -94,11 +100,37 @@ func BenchmarkAdler32(b *testing.B, arg string) { benchHash(b, adler32.New(), ar
 func BenchmarkCrc32(b *testing.B, arg string)   { benchHash(b, crc32.NewIEEE(), arg) }
 func BenchmarkSha1(b *testing.B, arg string)    { benchHash(b, sha1.New(), arg) }

+func xreadfile(path string) []byte {
+	data, err := ioutil.ReadFile(path)
+	if err != nil {
+		log.Fatal(err)
+	}
+	return data
+}
+
+var __dir__ = filepath.Dir(my.File())
+
+func BenchmarkUnzlib(b *testing.B, zfile string) {
+	zdata := xreadfile(fmt.Sprintf("%s/testdata/zlib/%s", __dir__, zfile))
+
+	b.ResetTimer()
+
+	var data []byte
+	var err  error
+	for i := 0; i < b.N; i++ {
+		data, err = xzlib.Decompress(zdata, data)
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+}
+

 var benchv = map[string]func(*testing.B, string) {
 	"adler32":	BenchmarkAdler32,
 	"crc32":	BenchmarkCrc32,
 	"sha1":		BenchmarkSha1,
+	"unzlib":	BenchmarkUnzlib,
 }



--- a/go/neo/t/tcpu.py
+++ b/go/neo/t/tcpu.py
@@ -24,7 +24,9 @@ from __future__ import print_function

 import sys
 import hashlib
+import zlib
 from zlib import crc32, adler32
+from os.path import dirname

 from golang import testing

@@ -106,6 +108,24 @@ def bench_crc32(b, blksize):    _bench_hasher(b, CRC32Hasher(), blksize)
 def bench_sha1(b, blksize):     _bench_hasher(b, hashlib.sha1(), blksize)


+def readfile(path):
+    with open(path, 'r') as f:
+        return f.read()
+
+
+__dir__ = dirname(__file__)
+
+def bench_unzlib(b, zfile):
+    zdata = readfile('%s/testdata/zlib/%s' % (__dir__, zfile))
+    b.reset_timer()
+
+    n = b.N
+    i = 0
+    while i < n:
+        zlib.decompress(zdata)
+        i += 1
+
+
 def main():
    bench    = sys.argv[1]
    bencharg = sys.argv[2]

--- a/go/neo/t/testdata/zlib/null-1K
+++ b/go/neo/t/testdata/zlib/null-1K
--- a/go/neo/t/testdata/zlib/null-2M
+++ b/go/neo/t/testdata/zlib/null-2M
--- a/go/neo/t/testdata/zlib/null-4K
+++ b/go/neo/t/testdata/zlib/null-4K
--- a/go/neo/t/testdata/zlib/prod1-avg
+++ b/go/neo/t/testdata/zlib/prod1-avg
--- a/go/neo/t/testdata/zlib/prod1-max
+++ b/go/neo/t/testdata/zlib/prod1-max
--- a/go/neo/t/testdata/zlib/wczdata
+++ b/go/neo/t/testdata/zlib/wczdata