Commit 91a8afa8 authored by Kirill Smelkov's avatar Kirill Smelkov

go/neo/t/neotest: bench-cpu += unzlib for wczblk1 and prod1 objects

NEO uses zlib compression for data, and this way client has to spend
time decompressing it. Benchmark how much time zlib decompression takes.
With stdlib zlib decompressor out of the box it looks like:

	name                 time/op
	unzlib/py/wczdata    20.7µs ± 2%
	unzlib/go/wczdata    70.6µs ± 0%
	unzlib/py/prod1-avg  4.02µs ± 1%
	unzlib/go/prod1-avg  15.2µs ± 0%

i.e. much not in favour of Go.

We'll be fixing that in the following patches.
parent 646a94b5
// Copyright (C) 2017-2018 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
// Package zlib provides convenience utilities to compress/decompress zlib data.
package xzlib
import (
"bytes"
"compress/zlib"
"io"
)
// Compress compresses data according to zlib encoding.
//
// default level and dictionary are used.
func Compress(data []byte) (zdata []byte) {
var b bytes.Buffer
w := zlib.NewWriter(&b)
_, err := w.Write(data)
if err != nil {
panic(err) // bytes.Buffer.Write never return error
}
err = w.Close()
if err != nil {
panic(err) // ----//----
}
return b.Bytes()
}
// Decompress decompresses data according to zlib encoding.
//
// out buffer, if there is enough capacity, is used for decompression destination.
// if out has not enough capacity a new buffer is allocated and used.
//
// return: destination buffer with full decompressed data or error.
func Decompress(zdata []byte, out []byte) (data []byte, err error) {
bin := bytes.NewReader(zdata)
zr, err := zlib.NewReader(bin)
if err != nil {
return nil, err
}
defer func() {
err2 := zr.Close()
if err2 != nil && err == nil {
err = err2
data = nil
}
}()
bout := bytes.NewBuffer(out[:0])
_, err = io.Copy(bout, zr)
if err != nil {
return nil, err
}
return bout.Bytes(), nil
}
// Copyright (C) 2017 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package xzlib
import (
"testing"
"github.com/kylelemons/godebug/pretty"
)
var ztestv = []struct{ in, out string }{
{
in: "x\x9c\xf3H\xcd\xc9\xc9W\x08\xcf/\xcaIQ\x04\x00\x1cI\x04>",
out: "Hello World!",
},
{
in: "x\x9cK.H-*\xce,.I\xcd+\xd1\xcbM,(\xc8\xccK\xe7\n\x80\x0b\xf9BE\n\x19\xf5j\x0b\x99BYR\x12K\x12\x0b\x99k\x0bYB\xd9\x8b3\xd3\xf3\x12s\xca\nY5B9\x18 \x80\xb1\x90-\xb9<5/%5'3O/)3=\xb1\xa8(\xb1R\x0fL\xc6W\xe5\xa7$qE9e\xa6;\x82\xb8\\\x85\xec%\x81\xc5\xc5z\x00\xb0d)\xef",
out: "cpersistent.mapping\nPersistentMapping\nq\x01.}q\x02U\x04dataq\x03}q\x04U\x07signalvq\x05(U\x08\x00\x00\x00\x00\x00\x00\x00\x01q\x06cwendelin.bigarray.array_zodb\nZBigArray\nq\x07tQss.",
},
}
func TestDecompress(t *testing.T) {
for _, tt := range ztestv {
got, err := Decompress([]byte(tt.in), nil)
if err != nil {
t.Errorf("decompress err: %q", tt.in)
continue
}
gots := string(got)
if gots != tt.out {
t.Errorf("decompress output mismatch:\n%s\n",
pretty.Compare(tt.out, gots))
}
}
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2018 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""generate testdata/ files"""
import zlib
import zodbtools.util as zutil
from tcpu import fmtsize
K = 1024
M = 1024*K
sizev = (1*K, 4*K, 2*M)
def writefile(path, data):
with open(path, 'w') as f:
f.write(data)
def zcompress(data):
zdata = zlib.compress(data)
#print '%d -> %d (%.1f%%)' % (len(data), len(zdata), 100. * len(zdata) / len(data))
return zdata
def main():
# zlib/null
for size in sizev:
data = '\0' * size
zdata = zcompress(data)
writefile('testdata/zlib/null-%s' % fmtsize(size), zdata)
# representative ZODB objects
# (to regenerate this requires `neotest zbench-local` to be already run once)
# wendelin.core's ZData
zdatav = []
def update_zdata(objdata):
if 'ZData' in objdata: # XXX hack
zdatav.append(objdata)
iter_zobjects('var/wczblk1-8/fs1/data.fs', update_zdata)
writeobjects('testdata/zlib/wczdata', zdatav)
# min avg max from prod1
prod1_objv = []
def update_prod1(objdata):
prod1_objv.append(objdata)
iter_zobjects('var/prod1-1024/fs1/data.fs', update_prod1)
writeobjects('testdata/zlib/prod1', prod1_objv)
# writeobjects writes to prefix compressed objects with average and maximum uncompressed sizes.
def writeobjects(prefix, objv):
objv.sort(key = lambda obj: len(obj))
lavg = sum(len(_) for _ in objv) // len(objv)
lo, hi = 0, len(objv)
while lo < hi:
#print lo, hi
i = (lo + hi) // 2
l = len(objv[i])
if l < lavg:
lo = i+1
else:
hi = i
objavg = objv[lo]
objmax = objv[-1]
#print '[%d,%d] -> avgi=%d, avglen=%d maxlen=%d' % (0, len(objv), lo, len(objavg), len(objmax))
if len(objavg) == len(objmax): # it is so for wczdata
writefile('%s' % prefix, zcompress(objavg))
else:
writefile('%s-avg' % prefix, zcompress(objavg))
writefile('%s-max' % prefix, zcompress(objv[-1]))
# iter_zobjects iterates throuh all non-nil object data from fs1@path.
#
# for every object f is called, and if it returns !false iteration is stopped.
def iter_zobjects(path, f):
stor = zutil.storageFromURL(path, read_only=True)
for txn in stor.iterator():
for obj in txn:
if obj.data is not None:
if f(obj.data):
return
if __name__ == '__main__':
main()
......@@ -959,6 +959,14 @@ bench_cpu() {
nrun tcpu_go $bench $size
done
done
datav="wczdata prod1-avg" # null-1K null-4K null-2M prod1-max
for data in $datav; do
nrun tcpu.py unzlib $data
nrun tcpu_go unzlib $data
done
# TODO bench compress
}
# bench_disk - benchmark direct (uncached) and cached random reads
......
......@@ -18,6 +18,7 @@
// See https://www.nexedi.com/licensing for rationale and options.
// +build ignore
//go:generate ./gen-testdata
// tcpu - cpu-related benchmarks
package main
......@@ -29,11 +30,16 @@ import (
"hash"
"hash/adler32"
"hash/crc32"
"io/ioutil"
"log"
"os"
"path/filepath"
"strconv"
"testing"
"time"
"lab.nexedi.com/kirr/go123/my"
"lab.nexedi.com/kirr/neo/go/internal/xzlib"
)
func dieusage() {
......@@ -94,11 +100,37 @@ func BenchmarkAdler32(b *testing.B, arg string) { benchHash(b, adler32.New(), ar
func BenchmarkCrc32(b *testing.B, arg string) { benchHash(b, crc32.NewIEEE(), arg) }
func BenchmarkSha1(b *testing.B, arg string) { benchHash(b, sha1.New(), arg) }
func xreadfile(path string) []byte {
data, err := ioutil.ReadFile(path)
if err != nil {
log.Fatal(err)
}
return data
}
var __dir__ = filepath.Dir(my.File())
func BenchmarkUnzlib(b *testing.B, zfile string) {
zdata := xreadfile(fmt.Sprintf("%s/testdata/zlib/%s", __dir__, zfile))
b.ResetTimer()
var data []byte
var err error
for i := 0; i < b.N; i++ {
data, err = xzlib.Decompress(zdata, data)
if err != nil {
log.Fatal(err)
}
}
}
var benchv = map[string]func(*testing.B, string) {
"adler32": BenchmarkAdler32,
"crc32": BenchmarkCrc32,
"sha1": BenchmarkSha1,
"unzlib": BenchmarkUnzlib,
}
......
......@@ -24,7 +24,9 @@ from __future__ import print_function
import sys
import hashlib
import zlib
from zlib import crc32, adler32
from os.path import dirname
from golang import testing
......@@ -106,6 +108,24 @@ def bench_crc32(b, blksize): _bench_hasher(b, CRC32Hasher(), blksize)
def bench_sha1(b, blksize): _bench_hasher(b, hashlib.sha1(), blksize)
def readfile(path):
with open(path, 'r') as f:
return f.read()
__dir__ = dirname(__file__)
def bench_unzlib(b, zfile):
zdata = readfile('%s/testdata/zlib/%s' % (__dir__, zfile))
b.reset_timer()
n = b.N
i = 0
while i < n:
zlib.decompress(zdata)
i += 1
def main():
bench = sys.argv[1]
bencharg = sys.argv[2]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment