Commit 3f631932 authored by Kirill Smelkov's avatar Kirill Smelkov

fixup! fixup! ZBigFile: Add ZBlk format option 'h' (heuristic) (4)

Take suggestions from Levin into account (nexedi/wendelin.core!20 (comment 198330)) :

    1. appending can be False, even though we are appending (misleading name).
    2. A big append uses ZBlk0 due to an if clause 25 lines later (logic is a bit far).
    3. in the previous version it could happen that if a block was filled up
       with small appends (ZBlk1), it wasn't transformed to ZBlk0 in case
       the next block would be filled up with only one big append.
    4. Regarding the actual algorithm, I wonder, why do we only use ZBlk0
       for big appends in case it's the first append of a new ZBlk? Couldn't
       we generally say it's ok to use ZBlk0 in case of big appends?

All these notes are valid. The problem comes from misleadin semantic
attached to 'appending' name. From the name it indicates only appending,
but sometimes we want to attach 'small' meaning to it and we were not
doing it universally.

-> Fix the problem by splitting 'appending' and 'small' into separate
   flags so that there is no room for confusion.

-> Rework the flow of code so that all cases that related to appending
   are under one branch.

-> Also optimize ndelta computation - when done in plain python just
   this part was taking a lot of time as timing for initial writeup
   showed:

     writeup with ZBlk0: ~20-25s
     writeup with ZBlk1: ~20-30s
     writeup with auto:  was ~ 120s

   now, after switching to numpy for ndelta computation, whole runtime
   with 'auto' is taking ~ 35s. The whole runtime, if I observe
   benchmark execution correctly, is dominated by database writeup.
parent 4eb85a12
...@@ -87,7 +87,7 @@ users can explicitly indicate via environment variable that their workload is ...@@ -87,7 +87,7 @@ users can explicitly indicate via environment variable that their workload is
either "big changes", if they prefer to prioritize access speed, or "small either "big changes", if they prefer to prioritize access speed, or "small
changes" if they prefer to prioritize database size over access speed. There is changes" if they prefer to prioritize database size over access speed. There is
also "auto" mode that tries to heuristically use both ZBlk0 and ZBlk1 depending also "auto" mode that tries to heuristically use both ZBlk0 and ZBlk1 depending
on change pattern and works relatively good regrding both access speed and on change pattern and works relatively good regarding both access speed and
database size for append-like workloads:: database size for append-like workloads::
$WENDELIN_CORE_ZBLK_FMT $WENDELIN_CORE_ZBLK_FMT
...@@ -164,7 +164,7 @@ will be our future approach after we teach NEO about object deduplication. ...@@ -164,7 +164,7 @@ will be our future approach after we teach NEO about object deduplication.
from wendelin.bigfile import WRITEOUT_STORE, WRITEOUT_MARKSTORED from wendelin.bigfile import WRITEOUT_STORE, WRITEOUT_MARKSTORED
from wendelin.bigfile._file_zodb import _ZBigFile from wendelin.bigfile._file_zodb import _ZBigFile
from wendelin.lib.mem import bzero, memcpy from wendelin.lib.mem import bzero, memcpy, memdelta
from wendelin.lib.zodb import LivePersistent, deactivate_btree from wendelin.lib.zodb import LivePersistent, deactivate_btree
from transaction.interfaces import IDataManager, ISynchronizer from transaction.interfaces import IDataManager, ISynchronizer
...@@ -588,34 +588,33 @@ class ZBigFile(LivePersistent): ...@@ -588,34 +588,33 @@ class ZBigFile(LivePersistent):
# fast reads and small database size growth ..." # fast reads and small database size growth ..."
def _zblk_fmt_heuristic(self, zblk, blk, buf): def _zblk_fmt_heuristic(self, zblk, blk, buf):
# see if we are doing a "small append" like change # see if we are doing a "small append" like change
# load previous data along the way # load previous data and compute the difference along the way
new_data = bytes(buf).rstrip(b'\0') new_data = bytes(buf).rstrip(b'\0')
old_data = b''
if zblk is None: if zblk is None:
appending = (len(new_data) < 0.5*self.blksize) old_data = b''
else: else:
old_data = bytes(zblk.loadblkdata()).rstrip(b'\0') old_data = bytes(zblk.loadblkdata()).rstrip(b'\0')
appending = new_data[:len(old_data)] == old_data ndelta = memdelta(old_data, new_data)
# append - use ZBlk1 and migrate previously filled-up block to ZBlk0 for fast reads append = (new_data[:len(old_data)] == old_data)
if appending: small = (ndelta < 0.5*self.blksize)
# append - migrate previously filled-up block to ZBlk0 for fast reads
# - for current block use ZBlk1 if the append is small and ZBlk0 otherwise
if append:
if not zblk and blk > 0: # is new zblk? if not zblk and blk > 0: # is new zblk?
zblk_prev = self.blktab.get(blk-1) zblk_prev = self.blktab.get(blk-1)
if zblk_prev is not None and type(zblk_prev) is not ZBlk0: if zblk_prev is not None and type(zblk_prev) is not ZBlk0:
self._setzblk(blk-1, zblk_prev, zblk_prev.loadblkdata(), ZBlk0) self._setzblk(blk-1, zblk_prev, zblk_prev.loadblkdata(), ZBlk0)
return ZBlk1 return ZBlk1 if small else ZBlk0
# arbitrary change - use ZBlk1 if the change is small and ZBlk0 otherwise # all other changes - use ZBlk1 if the change is small and ZBlk0 otherwise
else: else:
# TODO(kirr): "to support sporadic small changes over initial big fillup [...] if small:
# we could introduce e.g. a ZBlkδ object, which would refer to base # TODO(kirr): "to support sporadic small changes over initial big fillup [...]
# underlying ZBlk object and add "patch" information on top of that [...]." # we could introduce e.g. a ZBlkδ object, which would refer to base
# See https://lab.nexedi.com/nexedi/wendelin.core/merge_requests/20#note_196084 # underlying ZBlk object and add "patch" information on top of that [...]."
ndelta = 0 # See https://lab.nexedi.com/nexedi/wendelin.core/merge_requests/20#note_196084
for i in range(self.blksize):
if new_data[i:i+1] != old_data[i:i+1]:
ndelta += 1
if ndelta < 0.5*self.blksize:
return ZBlk1 return ZBlk1
else: else:
return ZBlk0 return ZBlk0
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Wendelin. Memory helpers # Wendelin. Memory helpers
# Copyright (C) 2014-2015 Nexedi SA and Contributors. # Copyright (C) 2014-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
# #
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
from numpy import ndarray, uint8, copyto from numpy import ndarray, uint8, copyto, count_nonzero
# zero buffer memory # zero buffer memory
...@@ -42,3 +42,15 @@ def memcpy(dst, src): ...@@ -42,3 +42,15 @@ def memcpy(dst, src):
adst = ndarray(l, buffer=dst, dtype=uint8) adst = ndarray(l, buffer=dst, dtype=uint8)
asrc = ndarray(l, buffer=src, dtype=uint8) asrc = ndarray(l, buffer=src, dtype=uint8)
copyto(adst, asrc) copyto(adst, asrc)
# memdelta returns how many bytes are different in between buf1 and buf2.
def memdelta(buf1, buf2):
l1 = len(buf1)
l2 = len(buf2)
l = min(l1, l2)
l_max = max(l1, l2)
a1 = ndarray(l, buffer=buf1, dtype=uint8)
a2 = ndarray(l, buffer=buf2, dtype=uint8)
d = a1 - a2
return (l_max - l) + count_nonzero(d)
# Copyright (C) 2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
from wendelin.lib.mem import memdelta
def test_memdelta():
def _(a, b, ndelta):
assert memdelta(a, b) == ndelta
_(b'', b'', 0)
_(b'', b'123', 3)
_(b'ab', b'', 2)
_(b'abc', b'abc', 0)
_(b'aXc', b'aYc', 1)
_(b'aXcZ', b'aYc', 2)
_(b'aXcZ', b'aYcZ', 1)
_(b'aXcZ', b'aYcQ', 2)
_(b'aXcZ', b'aYcQR', 3)
_(b'aXcZE', b'aYcQR', 3)
_(b'aXcZEF', b'aYcQR', 4)
_(b'aXcZEF', b'aYcQRS', 4)
_(b'aXcdEF', b'aYcdRS', 3)
_(b'aXcdeF', b'aYcdeS', 2)
_(b'aXcdef', b'aYcdef', 1)
_(b'abcdef', b'abcdef', 0)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment