Commit 313d2d78 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/btree: Expose access to BTree/Bucket entries as public API

Traditionally BTrees in ZODB/py expose point query and iteration APIs.
However they don't allow a BTree to be scanned through concurrently.

For example in wendelin.core each ZBlk1 consists of a IOBTree with 512
chunks

	https://lab.nexedi.com/nexedi/wendelin.core/blob/v0.12-6-g318efce/bigfile/file_zodb.py#L267

and loading those chunks from ZODB one-by-one serially is very slow.

Expose a way to retrieve all children of a B⁺ tree node. This way
loading them all could be started in parallel thus significantly
reducing overall latency if a range or whole BTree needs to be fetched.
parent 8d21e8cc
......@@ -22,6 +22,8 @@
//
// node.Get(key) performs point-query.
//
// node.Entryv() returns [] of (key, child/value).
//
// --------
//
// (*) https://github.com/zopefoundation/ZODB/blob/3.10.7-4-gb8d7a8567/src/BTrees/Development.txt#L211
......
......@@ -31,6 +31,8 @@ import (
// BTree is a non-leaf node of a B tree.
//
// It contains []Entry in key order.
//
// It mimics BTree from btree/py.
type BTree struct {
zodb.Persistent
......@@ -49,19 +51,23 @@ type BTree struct {
// order. data[0].key is unused. For i in 0 .. len-1, all keys reachable
// from data[i].child are >= data[i].key and < data[i+1].key, at the
// endpoints pretending that data[0].key is - and data[len].key is +.
data []_BTreeItem
data []Entry
}
// _BTreeItem mimics BTreeItem from btree/py.
// Entry is one BTree node entry.
//
// It contains key and child, who is either BTree or Bucket.
//
// XXX export for BTree.Children?
type _BTreeItem struct {
// Key limits child's keys - see BTree.Entryv for details.
type Entry struct {
key KEY
child interface{} // BTree or Bucket
}
// Bucket is a leaf node of a B⁺ tree.
//
// It contains []BucketEntry in ↑ key order.
//
// It mimics Bucket from btree/py.
type Bucket struct {
zodb.Persistent
......@@ -79,6 +85,54 @@ type Bucket struct {
values []interface{} // 'len' corresponding values
}
// BucketEntry is one Bucket node entry.
//
// It contains key and value.
type BucketEntry struct {
key KEY
value interface{}
}
// Key returns BTree entry key.
func (e *Entry) Key() KEY { return e.key }
// Child returns BTree entry child.
func (e *Entry) Child() interface{} { return e.child }
// Entryv returns entries of a BTree node.
//
// Entries keys limit the keys of all children reachable from an entry:
//
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
//
//
// Children of all entries are guaranteed to be of the same kind - either all BTree, or all Bucket.
//
// The caller must not modify returned array.
func (t *BTree) Entryv() []Entry {
return t.data
}
// Key returns Bucket entry key.
func (e *BucketEntry) Key() KEY { return e.key }
// Value returns Bucket entry value.
func (e *BucketEntry) Value() interface{} { return e.value }
// Entryv returns entries of a Bucket node.
func (b *Bucket) Entryv() []BucketEntry {
ev := make([]BucketEntry, len(b.keys))
for i, k := range b.keys {
ev[i] = BucketEntry{k, b.values[i]}
}
return ev
}
// Get searches BTree by key.
//
// It loads intermediate BTree nodes from database on demand as needed.
......@@ -310,7 +364,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) {
}
bt.firstbucket = bucket
bt.data = []_BTreeItem{{key: 0, child: bucket}}
bt.data = []Entry{{key: 0, child: bucket}}
return nil
}
......@@ -329,7 +383,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) {
}
n := (len(t) + 1) / 2
bt.data = make([]_BTreeItem, 0, n)
bt.data = make([]Entry, 0, n)
var kprev int64
var childrenKind int // 1 - BTree, 2 - Bucket
for i, idx := 0, 0; i < n; i++ {
......@@ -374,7 +428,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) {
fmt.Errorf("data: [%d]: children must be of the same type", i)
}
bt.data = append(bt.data, _BTreeItem{key: kkey, child: child})
bt.data = append(bt.data, Entry{key: kkey, child: child})
}
return nil
......
......@@ -177,5 +177,7 @@ func TestBTree(t *testing.T) {
return firstbucket
}
// XXX verify Entryv ?
verifyFirstBucket(B3)
}
......@@ -39,8 +39,9 @@ sed \
-e "s/KEY/$KEY/g" \
-e "s/<Key>/$Key/g" \
-e "s/\bBTree\b/${KIND}BTree/g" \
-e "s/\b_BTreeItem\b/_${KIND}BTreeItem/g" \
-e "s/\bEntry\b/${KIND}Entry/g" \
-e "s/\bBucket\b/${KIND}Bucket/g" \
-e "s/\bBucketEntry\b/${KIND}BucketEntry/g" \
-e "s/\bbtreeState\b/${kind}btreeState/g" \
-e "s/\bbucketState\b/${kind}bucketState/g" \
$input >>$out
......@@ -33,6 +33,8 @@ import (
// IOBTree is a non-leaf node of a B⁺ tree.
//
// It contains []IOEntry in ↑ key order.
//
// It mimics IOBTree from btree/py.
type IOBTree struct {
zodb.Persistent
......@@ -51,19 +53,23 @@ type IOBTree struct {
// order. data[0].key is unused. For i in 0 .. len-1, all keys reachable
// from data[i].child are >= data[i].key and < data[i+1].key, at the
// endpoints pretending that data[0].key is -∞ and data[len].key is +∞.
data []_IOBTreeItem
data []IOEntry
}
// _IOBTreeItem mimics BTreeItem from btree/py.
// IOEntry is one IOBTree node entry.
//
// It contains key and child, who is either IOBTree or IOBucket.
//
// XXX export for IOBTree.Children?
type _IOBTreeItem struct {
// Key limits child's keys - see IOBTree.Entryv for details.
type IOEntry struct {
key int32
child interface{} // IOBTree or IOBucket
}
// IOBucket is a leaf node of a B⁺ tree.
//
// It contains []IOBucketEntry in ↑ key order.
//
// It mimics IOBucket from btree/py.
type IOBucket struct {
zodb.Persistent
......@@ -81,6 +87,54 @@ type IOBucket struct {
values []interface{} // 'len' corresponding values
}
// IOBucketEntry is one IOBucket node entry.
//
// It contains key and value.
type IOBucketEntry struct {
key int32
value interface{}
}
// Key returns IOBTree entry key.
func (e *IOEntry) Key() int32 { return e.key }
// Child returns IOBTree entry child.
func (e *IOEntry) Child() interface{} { return e.child }
// Entryv returns entries of a IOBTree node.
//
// Entries keys limit the keys of all children reachable from an entry:
//
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
//
//
// Children of all entries are guaranteed to be of the same kind - either all IOBTree, or all IOBucket.
//
// The caller must not modify returned array.
func (t *IOBTree) Entryv() []IOEntry {
return t.data
}
// Key returns IOBucket entry key.
func (e *IOBucketEntry) Key() int32 { return e.key }
// Value returns IOBucket entry value.
func (e *IOBucketEntry) Value() interface{} { return e.value }
// Entryv returns entries of a IOBucket node.
func (b *IOBucket) Entryv() []IOBucketEntry {
ev := make([]IOBucketEntry, len(b.keys))
for i, k := range b.keys {
ev[i] = IOBucketEntry{k, b.values[i]}
}
return ev
}
// Get searches IOBTree by key.
//
// It loads intermediate IOBTree nodes from database on demand as needed.
......@@ -312,7 +366,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) {
}
bt.firstbucket = bucket
bt.data = []_IOBTreeItem{{key: 0, child: bucket}}
bt.data = []IOEntry{{key: 0, child: bucket}}
return nil
}
......@@ -331,7 +385,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) {
}
n := (len(t) + 1) / 2
bt.data = make([]_IOBTreeItem, 0, n)
bt.data = make([]IOEntry, 0, n)
var kprev int64
var childrenKind int // 1 - IOBTree, 2 - IOBucket
for i, idx := 0, 0; i < n; i++ {
......@@ -376,7 +430,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) {
fmt.Errorf("data: [%d]: children must be of the same type", i)
}
bt.data = append(bt.data, _IOBTreeItem{key: kkey, child: child})
bt.data = append(bt.data, IOEntry{key: kkey, child: child})
}
return nil
......
......@@ -33,6 +33,8 @@ import (
// LOBTree is a non-leaf node of a B⁺ tree.
//
// It contains []LOEntry in ↑ key order.
//
// It mimics LOBTree from btree/py.
type LOBTree struct {
zodb.Persistent
......@@ -51,19 +53,23 @@ type LOBTree struct {
// order. data[0].key is unused. For i in 0 .. len-1, all keys reachable
// from data[i].child are >= data[i].key and < data[i+1].key, at the
// endpoints pretending that data[0].key is -∞ and data[len].key is +∞.
data []_LOBTreeItem
data []LOEntry
}
// _LOBTreeItem mimics BTreeItem from btree/py.
// LOEntry is one LOBTree node entry.
//
// It contains key and child, who is either LOBTree or LOBucket.
//
// XXX export for LOBTree.Children?
type _LOBTreeItem struct {
// Key limits child's keys - see LOBTree.Entryv for details.
type LOEntry struct {
key int64
child interface{} // LOBTree or LOBucket
}
// LOBucket is a leaf node of a B⁺ tree.
//
// It contains []LOBucketEntry in ↑ key order.
//
// It mimics LOBucket from btree/py.
type LOBucket struct {
zodb.Persistent
......@@ -81,6 +87,54 @@ type LOBucket struct {
values []interface{} // 'len' corresponding values
}
// LOBucketEntry is one LOBucket node entry.
//
// It contains key and value.
type LOBucketEntry struct {
key int64
value interface{}
}
// Key returns LOBTree entry key.
func (e *LOEntry) Key() int64 { return e.key }
// Child returns LOBTree entry child.
func (e *LOEntry) Child() interface{} { return e.child }
// Entryv returns entries of a LOBTree node.
//
// Entries keys limit the keys of all children reachable from an entry:
//
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
//
//
// Children of all entries are guaranteed to be of the same kind - either all LOBTree, or all LOBucket.
//
// The caller must not modify returned array.
func (t *LOBTree) Entryv() []LOEntry {
return t.data
}
// Key returns LOBucket entry key.
func (e *LOBucketEntry) Key() int64 { return e.key }
// Value returns LOBucket entry value.
func (e *LOBucketEntry) Value() interface{} { return e.value }
// Entryv returns entries of a LOBucket node.
func (b *LOBucket) Entryv() []LOBucketEntry {
ev := make([]LOBucketEntry, len(b.keys))
for i, k := range b.keys {
ev[i] = LOBucketEntry{k, b.values[i]}
}
return ev
}
// Get searches LOBTree by key.
//
// It loads intermediate LOBTree nodes from database on demand as needed.
......@@ -312,7 +366,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) {
}
bt.firstbucket = bucket
bt.data = []_LOBTreeItem{{key: 0, child: bucket}}
bt.data = []LOEntry{{key: 0, child: bucket}}
return nil
}
......@@ -331,7 +385,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) {
}
n := (len(t) + 1) / 2
bt.data = make([]_LOBTreeItem, 0, n)
bt.data = make([]LOEntry, 0, n)
var kprev int64
var childrenKind int // 1 - LOBTree, 2 - LOBucket
for i, idx := 0, 0; i < n; i++ {
......@@ -376,7 +430,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) {
fmt.Errorf("data: [%d]: children must be of the same type", i)
}
bt.data = append(bt.data, _LOBTreeItem{key: kkey, child: child})
bt.data = append(bt.data, LOEntry{key: kkey, child: child})
}
return nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment