Commit 4eb85a12 authored by Kirill Smelkov's avatar Kirill Smelkov

fixup! ZBigFile: Add ZBlk format option 'h' (heuristic) (4)

Try to keep documented promise that

	The heuristic 'auto' should behave as good as ZBlk0 in case of wide changes

Previously the heuristic code would use ZBlk0 only in append case after
switching previously filled-up block to ZBlk0, and the case of arbitrary
changes were unconditionally using ZBlk1.

Now we see if the append is small and use ZBlk1 only then. If the append
is big, or if the change is itself is big - we use ZBlk0.

This should restore documented behaviour that heuristic behaves as good
as ZBlk0 in case of wide changes.

TODO update benchmark/test to cover this case.
parent e3570c83
...@@ -587,20 +587,38 @@ class ZBigFile(LivePersistent): ...@@ -587,20 +587,38 @@ class ZBigFile(LivePersistent):
# servers, wendelin.core cannot provide at the same time both # servers, wendelin.core cannot provide at the same time both
# fast reads and small database size growth ..." # fast reads and small database size growth ..."
def _zblk_fmt_heuristic(self, zblk, blk, buf): def _zblk_fmt_heuristic(self, zblk, blk, buf):
if _is_appending(zblk, buf): # see if we are doing a "small append" like change
# load previous data along the way
new_data = bytes(buf).rstrip(b'\0')
old_data = b''
if zblk is None:
appending = (len(new_data) < 0.5*self.blksize)
else:
old_data = bytes(zblk.loadblkdata()).rstrip(b'\0')
appending = new_data[:len(old_data)] == old_data
# append - use ZBlk1 and migrate previously filled-up block to ZBlk0 for fast reads
if appending:
if not zblk and blk > 0: # is new zblk? if not zblk and blk > 0: # is new zblk?
# Set previous filled-up ZBlk to ZBlk0 for fast reads zblk_prev = self.blktab.get(blk-1)
previous_blk = blk - 1 if zblk_prev is not None and type(zblk_prev) is not ZBlk0:
previous_zblk = self.blktab.get(previous_blk) self._setzblk(blk-1, zblk_prev, zblk_prev.loadblkdata(), ZBlk0)
if previous_zblk is not None:
self._setzblk(previous_blk, previous_zblk, previous_zblk.loadblkdata(), ZBlk0)
return ZBlk1 return ZBlk1
else: # it's changing
# kirr: "to support sporadic small changes over initial big fillup [...] # arbitrary change - use ZBlk1 if the change is small and ZBlk0 otherwise
else:
# TODO(kirr): "to support sporadic small changes over initial big fillup [...]
# we could introduce e.g. a ZBlkδ object, which would refer to base # we could introduce e.g. a ZBlkδ object, which would refer to base
# underlying ZBlk object and add "patch" information on top of that [...]." # underlying ZBlk object and add "patch" information on top of that [...]."
# See https://lab.nexedi.com/nexedi/wendelin.core/merge_requests/20#note_196084 # See https://lab.nexedi.com/nexedi/wendelin.core/merge_requests/20#note_196084
ndelta = 0
for i in range(self.blksize):
if new_data[i:i+1] != old_data[i:i+1]:
ndelta += 1
if ndelta < 0.5*self.blksize:
return ZBlk1 return ZBlk1
else:
return ZBlk0
# invalidate data .blktab[blk] invalidated -> invalidate page # invalidate data .blktab[blk] invalidated -> invalidate page
...@@ -865,11 +883,3 @@ class _ZBigFileH(object): ...@@ -865,11 +883,3 @@ class _ZBigFileH(object):
# and also more right - tpc_finish is there assumed as non-failing by # and also more right - tpc_finish is there assumed as non-failing by
# ZODB design) # ZODB design)
self.abort(txn) self.abort(txn)
# Utility functions for heuristic
def _is_appending(zblk, buf):
if not zblk:
return True
old_buf = bytes(zblk.loadblkdata())
return bytes(buf).rstrip(b'\0')[:len(old_buf)] == old_buf
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment