Commit 542917d1 authored by Kirill Smelkov's avatar Kirill Smelkov

bigfile/zodb/ZBlk1: Don't miss to deactivate/free internal .chunktab buckets in loadblkdata()

13c0c17c (bigfile/zodb: Format #1 which is optimized for small changes)
used BTree to organize ZBlk1 block's chunks and for loadblkdata() added
"TODO we are missing to free internal BTree structures on data load".

#3 besides other
things showed that even when we deactivate ZData objects, we are still
keeping them as ghosts occupying memory and the same for IOBucket
objects.

This all happens because there is no proper way to deactivate whole
btree - including internal buckets objects. And since internal buckets
are not deactivated, they stay in picklecache and thus hold a reference
to ZData objects and ZData objects in turn, even if explicitly
deactivated, stay in memory.

We can fix this all via implementing whole-btree deactivation procedure.

To do so we need to iterate over all btree buckets recursively, but
unfortunately there is no BTree API to access/iterate btree's buckets.
We can however still get reference to first top-level buckets via
gc.get_referents(btree) and then scan buckets further without hacks.

gc.get_referents(btree) is a hack, but

- it works in O(1)  (we only get pointers from btree, not scanning all
  gcable objects and deducing them)
- it works reliable if we filter out non-interesting objects.

So in the end it works.

Before the patch loading more and more ZBlk1 data with objgraph
instrumentation was showing itself like

    #                                    Nobj        δ
    wendelin.bigfile.file_zodb.ZData     7168      +512
    BTrees.IOBTree.IOBucket               238       +17
    BTrees.IOBTree.IOBTree                 14        +1

and after this patch we now have

    BTrees.IOBTree.IOBTree                 14        +1

we cannot remove that "IOBTree + 1", since ZBlk1 is holding direct
reference on it (via .chunktab) and we have to keep ZBlk1 live with
._v_zfile and ._v_zblk set for invalidation to work. "+1 IOBtree" is
however small - 144 bytes per 2M (= 0.006%) so we can neglect that the
same way we neglect keeping ZBlk1 staying live for each block.
parent f7803634
...@@ -78,6 +78,7 @@ natural to also use "2" here. ...@@ -78,6 +78,7 @@ natural to also use "2" here.
from wendelin.bigfile import BigFile, WRITEOUT_STORE, WRITEOUT_MARKSTORED from wendelin.bigfile import BigFile, WRITEOUT_STORE, WRITEOUT_MARKSTORED
from wendelin.lib.mem import bzero, memcpy from wendelin.lib.mem import bzero, memcpy
from wendelin.lib.zodb import deactivate_btree
from transaction.interfaces import IDataManager, ISynchronizer from transaction.interfaces import IDataManager, ISynchronizer
from persistent import Persistent, PickleCache, GHOST from persistent import Persistent, PickleCache, GHOST
...@@ -292,12 +293,8 @@ class ZBlk1(ZBlkBase): ...@@ -292,12 +293,8 @@ class ZBlk1(ZBlkBase):
stop = start+len(chunk.data) stop = start+len(chunk.data)
blkdata[start:stop] = chunk.data blkdata[start:stop] = chunk.data
# deactivate .chunktab to not waste memory # deactivate whole .chunktab not to waste memory
# (see comments about why in ZBlk0.loadblkdata()) deactivate_btree(self.chunktab)
for chunk in self.chunktab.values():
chunk._p_deactivate()
self.chunktab._p_deactivate()
# TODO deactivate all chunktab buckets - XXX how?
return blkdata return blkdata
......
# Wendelin.core.bigfile | Tests for ZODB utilities
# Copyright (C) 2014-2016 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Open Source Initiative approved licenses and Convey
# the resulting work. Corresponding source of such a combination shall include
# the source code for all other software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
from wendelin.lib.zodb import deactivate_btree
from wendelin.lib.testing import getTestDB
from persistent import Persistent, UPTODATE, GHOST
from BTrees.IOBTree import IOBTree
import transaction
import gc
testdb = None
def dbopen():
return testdb.dbopen()
def setup_module():
global testdb
testdb = getTestDB()
testdb.setup()
def teardown_module():
testdb.teardown()
class XInt(Persistent):
def __init__(self, i):
self.i = i
def objscachedv(jar):
return [obj for oid, obj in jar._cache.lru_items()]
def test_deactivate_btree():
root = dbopen()
# init btree with many leaf nodes
leafv = []
root['btree'] = B = IOBTree()
for i in range(10000):
B[i] = xi = XInt(i)
leafv.append(xi)
transaction.commit()
for npass in range(2):
# access all elements making them live
for _ in B.values():
_._p_activate()
# now B or/and some leaf nodes should be up-to-date and in cache
cached = objscachedv(root._p_jar)
nlive = 0
for obj in [B] + leafv:
if obj._p_state == UPTODATE:
assert obj in cached
nlive += 1
assert nlive > 0
# check how deactivate_btree() works dependently from initially BTree state
if npass == 0:
B._p_activate()
else:
B._p_deactivate()
# after btree deactivation B & all leaf nodes should be in ghost state and not in cache
deactivate_btree(B)
cached = objscachedv(root._p_jar)
for obj in [B] + leafv:
assert obj._p_state == GHOST
assert obj not in cached
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
from ZODB.FileStorage import FileStorage from ZODB.FileStorage import FileStorage
from ZODB import DB from ZODB import DB
from persistent import Persistent
import gc
# open db storage by uri # open db storage by uri
...@@ -62,3 +64,32 @@ def dbclose(root): ...@@ -62,3 +64,32 @@ def dbclose(root):
conn.close() conn.close()
db.close() db.close()
stor.close() stor.close()
# deactivate a btree, including all internal buckets and leaf nodes
def deactivate_btree(btree):
# first activate btree, to make sure its first bucket is loaded at all.
#
# we have to do this because btree could be automatically deactivated
# before by cache (the usual way) and then in its ghost state it does not
# contain pointer to first bucket and thus we won't be able to start
# bucket deactivation traversal.
btree._p_activate()
for _ in gc.get_referents(btree):
# for top-level btree we ignore any direct referent besides bucket
# (there are _p_jar, cache, etc)
if type(_) is btree._bucket_type:
_deactivate_bucket(_)
btree._p_deactivate()
def _deactivate_bucket(bucket):
# TODO also support objects in keys, when we need it
for obj in bucket.values():
if type(obj) == type(bucket):
_deactivate_bucket(obj)
elif isinstance(obj, Persistent):
obj._p_deactivate()
bucket._p_deactivate()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment