Commit 313d2d78 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/btree: Expose access to BTree/Bucket entries as public API

Traditionally BTrees in ZODB/py expose point query and iteration APIs.
However they don't allow a BTree to be scanned through concurrently.

For example in wendelin.core each ZBlk1 consists of a IOBTree with 512
chunks

	https://lab.nexedi.com/nexedi/wendelin.core/blob/v0.12-6-g318efce/bigfile/file_zodb.py#L267

and loading those chunks from ZODB one-by-one serially is very slow.

Expose a way to retrieve all children of a B⁺ tree node. This way
loading them all could be started in parallel thus significantly
reducing overall latency if a range or whole BTree needs to be fetched.
parent 8d21e8cc
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
// //
// node.Get(key) performs point-query. // node.Get(key) performs point-query.
// //
// node.Entryv() returns [] of (key, child/value).
//
// -------- // --------
// //
// (*) https://github.com/zopefoundation/ZODB/blob/3.10.7-4-gb8d7a8567/src/BTrees/Development.txt#L211 // (*) https://github.com/zopefoundation/ZODB/blob/3.10.7-4-gb8d7a8567/src/BTrees/Development.txt#L211
......
...@@ -31,6 +31,8 @@ import ( ...@@ -31,6 +31,8 @@ import (
// BTree is a non-leaf node of a B tree. // BTree is a non-leaf node of a B tree.
// //
// It contains []Entry in key order.
//
// It mimics BTree from btree/py. // It mimics BTree from btree/py.
type BTree struct { type BTree struct {
zodb.Persistent zodb.Persistent
...@@ -49,19 +51,23 @@ type BTree struct { ...@@ -49,19 +51,23 @@ type BTree struct {
// order. data[0].key is unused. For i in 0 .. len-1, all keys reachable // order. data[0].key is unused. For i in 0 .. len-1, all keys reachable
// from data[i].child are >= data[i].key and < data[i+1].key, at the // from data[i].child are >= data[i].key and < data[i+1].key, at the
// endpoints pretending that data[0].key is - and data[len].key is +. // endpoints pretending that data[0].key is - and data[len].key is +.
data []_BTreeItem data []Entry
} }
// _BTreeItem mimics BTreeItem from btree/py. // Entry is one BTree node entry.
//
// It contains key and child, who is either BTree or Bucket.
// //
// XXX export for BTree.Children? // Key limits child's keys - see BTree.Entryv for details.
type _BTreeItem struct { type Entry struct {
key KEY key KEY
child interface{} // BTree or Bucket child interface{} // BTree or Bucket
} }
// Bucket is a leaf node of a B⁺ tree. // Bucket is a leaf node of a B⁺ tree.
// //
// It contains []BucketEntry in ↑ key order.
//
// It mimics Bucket from btree/py. // It mimics Bucket from btree/py.
type Bucket struct { type Bucket struct {
zodb.Persistent zodb.Persistent
...@@ -79,6 +85,54 @@ type Bucket struct { ...@@ -79,6 +85,54 @@ type Bucket struct {
values []interface{} // 'len' corresponding values values []interface{} // 'len' corresponding values
} }
// BucketEntry is one Bucket node entry.
//
// It contains key and value.
type BucketEntry struct {
key KEY
value interface{}
}
// Key returns BTree entry key.
func (e *Entry) Key() KEY { return e.key }
// Child returns BTree entry child.
func (e *Entry) Child() interface{} { return e.child }
// Entryv returns entries of a BTree node.
//
// Entries keys limit the keys of all children reachable from an entry:
//
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
//
//
// Children of all entries are guaranteed to be of the same kind - either all BTree, or all Bucket.
//
// The caller must not modify returned array.
func (t *BTree) Entryv() []Entry {
return t.data
}
// Key returns Bucket entry key.
func (e *BucketEntry) Key() KEY { return e.key }
// Value returns Bucket entry value.
func (e *BucketEntry) Value() interface{} { return e.value }
// Entryv returns entries of a Bucket node.
func (b *Bucket) Entryv() []BucketEntry {
ev := make([]BucketEntry, len(b.keys))
for i, k := range b.keys {
ev[i] = BucketEntry{k, b.values[i]}
}
return ev
}
// Get searches BTree by key. // Get searches BTree by key.
// //
// It loads intermediate BTree nodes from database on demand as needed. // It loads intermediate BTree nodes from database on demand as needed.
...@@ -310,7 +364,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) { ...@@ -310,7 +364,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) {
} }
bt.firstbucket = bucket bt.firstbucket = bucket
bt.data = []_BTreeItem{{key: 0, child: bucket}} bt.data = []Entry{{key: 0, child: bucket}}
return nil return nil
} }
...@@ -329,7 +383,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) { ...@@ -329,7 +383,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) {
} }
n := (len(t) + 1) / 2 n := (len(t) + 1) / 2
bt.data = make([]_BTreeItem, 0, n) bt.data = make([]Entry, 0, n)
var kprev int64 var kprev int64
var childrenKind int // 1 - BTree, 2 - Bucket var childrenKind int // 1 - BTree, 2 - Bucket
for i, idx := 0, 0; i < n; i++ { for i, idx := 0, 0; i < n; i++ {
...@@ -374,7 +428,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) { ...@@ -374,7 +428,7 @@ func (bt *btreeState) PySetState(pystate interface{}) (err error) {
fmt.Errorf("data: [%d]: children must be of the same type", i) fmt.Errorf("data: [%d]: children must be of the same type", i)
} }
bt.data = append(bt.data, _BTreeItem{key: kkey, child: child}) bt.data = append(bt.data, Entry{key: kkey, child: child})
} }
return nil return nil
......
...@@ -177,5 +177,7 @@ func TestBTree(t *testing.T) { ...@@ -177,5 +177,7 @@ func TestBTree(t *testing.T) {
return firstbucket return firstbucket
} }
// XXX verify Entryv ?
verifyFirstBucket(B3) verifyFirstBucket(B3)
} }
...@@ -39,8 +39,9 @@ sed \ ...@@ -39,8 +39,9 @@ sed \
-e "s/KEY/$KEY/g" \ -e "s/KEY/$KEY/g" \
-e "s/<Key>/$Key/g" \ -e "s/<Key>/$Key/g" \
-e "s/\bBTree\b/${KIND}BTree/g" \ -e "s/\bBTree\b/${KIND}BTree/g" \
-e "s/\b_BTreeItem\b/_${KIND}BTreeItem/g" \ -e "s/\bEntry\b/${KIND}Entry/g" \
-e "s/\bBucket\b/${KIND}Bucket/g" \ -e "s/\bBucket\b/${KIND}Bucket/g" \
-e "s/\bBucketEntry\b/${KIND}BucketEntry/g" \
-e "s/\bbtreeState\b/${kind}btreeState/g" \ -e "s/\bbtreeState\b/${kind}btreeState/g" \
-e "s/\bbucketState\b/${kind}bucketState/g" \ -e "s/\bbucketState\b/${kind}bucketState/g" \
$input >>$out $input >>$out
...@@ -33,6 +33,8 @@ import ( ...@@ -33,6 +33,8 @@ import (
// IOBTree is a non-leaf node of a B⁺ tree. // IOBTree is a non-leaf node of a B⁺ tree.
// //
// It contains []IOEntry in ↑ key order.
//
// It mimics IOBTree from btree/py. // It mimics IOBTree from btree/py.
type IOBTree struct { type IOBTree struct {
zodb.Persistent zodb.Persistent
...@@ -51,19 +53,23 @@ type IOBTree struct { ...@@ -51,19 +53,23 @@ type IOBTree struct {
// order. data[0].key is unused. For i in 0 .. len-1, all keys reachable // order. data[0].key is unused. For i in 0 .. len-1, all keys reachable
// from data[i].child are >= data[i].key and < data[i+1].key, at the // from data[i].child are >= data[i].key and < data[i+1].key, at the
// endpoints pretending that data[0].key is -∞ and data[len].key is +∞. // endpoints pretending that data[0].key is -∞ and data[len].key is +∞.
data []_IOBTreeItem data []IOEntry
} }
// _IOBTreeItem mimics BTreeItem from btree/py. // IOEntry is one IOBTree node entry.
//
// It contains key and child, who is either IOBTree or IOBucket.
// //
// XXX export for IOBTree.Children? // Key limits child's keys - see IOBTree.Entryv for details.
type _IOBTreeItem struct { type IOEntry struct {
key int32 key int32
child interface{} // IOBTree or IOBucket child interface{} // IOBTree or IOBucket
} }
// IOBucket is a leaf node of a B⁺ tree. // IOBucket is a leaf node of a B⁺ tree.
// //
// It contains []IOBucketEntry in ↑ key order.
//
// It mimics IOBucket from btree/py. // It mimics IOBucket from btree/py.
type IOBucket struct { type IOBucket struct {
zodb.Persistent zodb.Persistent
...@@ -81,6 +87,54 @@ type IOBucket struct { ...@@ -81,6 +87,54 @@ type IOBucket struct {
values []interface{} // 'len' corresponding values values []interface{} // 'len' corresponding values
} }
// IOBucketEntry is one IOBucket node entry.
//
// It contains key and value.
type IOBucketEntry struct {
key int32
value interface{}
}
// Key returns IOBTree entry key.
func (e *IOEntry) Key() int32 { return e.key }
// Child returns IOBTree entry child.
func (e *IOEntry) Child() interface{} { return e.child }
// Entryv returns entries of a IOBTree node.
//
// Entries keys limit the keys of all children reachable from an entry:
//
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
//
//
// Children of all entries are guaranteed to be of the same kind - either all IOBTree, or all IOBucket.
//
// The caller must not modify returned array.
func (t *IOBTree) Entryv() []IOEntry {
return t.data
}
// Key returns IOBucket entry key.
func (e *IOBucketEntry) Key() int32 { return e.key }
// Value returns IOBucket entry value.
func (e *IOBucketEntry) Value() interface{} { return e.value }
// Entryv returns entries of a IOBucket node.
func (b *IOBucket) Entryv() []IOBucketEntry {
ev := make([]IOBucketEntry, len(b.keys))
for i, k := range b.keys {
ev[i] = IOBucketEntry{k, b.values[i]}
}
return ev
}
// Get searches IOBTree by key. // Get searches IOBTree by key.
// //
// It loads intermediate IOBTree nodes from database on demand as needed. // It loads intermediate IOBTree nodes from database on demand as needed.
...@@ -312,7 +366,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) { ...@@ -312,7 +366,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) {
} }
bt.firstbucket = bucket bt.firstbucket = bucket
bt.data = []_IOBTreeItem{{key: 0, child: bucket}} bt.data = []IOEntry{{key: 0, child: bucket}}
return nil return nil
} }
...@@ -331,7 +385,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) { ...@@ -331,7 +385,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) {
} }
n := (len(t) + 1) / 2 n := (len(t) + 1) / 2
bt.data = make([]_IOBTreeItem, 0, n) bt.data = make([]IOEntry, 0, n)
var kprev int64 var kprev int64
var childrenKind int // 1 - IOBTree, 2 - IOBucket var childrenKind int // 1 - IOBTree, 2 - IOBucket
for i, idx := 0, 0; i < n; i++ { for i, idx := 0, 0; i < n; i++ {
...@@ -376,7 +430,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) { ...@@ -376,7 +430,7 @@ func (bt *iobtreeState) PySetState(pystate interface{}) (err error) {
fmt.Errorf("data: [%d]: children must be of the same type", i) fmt.Errorf("data: [%d]: children must be of the same type", i)
} }
bt.data = append(bt.data, _IOBTreeItem{key: kkey, child: child}) bt.data = append(bt.data, IOEntry{key: kkey, child: child})
} }
return nil return nil
......
...@@ -33,6 +33,8 @@ import ( ...@@ -33,6 +33,8 @@ import (
// LOBTree is a non-leaf node of a B⁺ tree. // LOBTree is a non-leaf node of a B⁺ tree.
// //
// It contains []LOEntry in ↑ key order.
//
// It mimics LOBTree from btree/py. // It mimics LOBTree from btree/py.
type LOBTree struct { type LOBTree struct {
zodb.Persistent zodb.Persistent
...@@ -51,19 +53,23 @@ type LOBTree struct { ...@@ -51,19 +53,23 @@ type LOBTree struct {
// order. data[0].key is unused. For i in 0 .. len-1, all keys reachable // order. data[0].key is unused. For i in 0 .. len-1, all keys reachable
// from data[i].child are >= data[i].key and < data[i+1].key, at the // from data[i].child are >= data[i].key and < data[i+1].key, at the
// endpoints pretending that data[0].key is -∞ and data[len].key is +∞. // endpoints pretending that data[0].key is -∞ and data[len].key is +∞.
data []_LOBTreeItem data []LOEntry
} }
// _LOBTreeItem mimics BTreeItem from btree/py. // LOEntry is one LOBTree node entry.
//
// It contains key and child, who is either LOBTree or LOBucket.
// //
// XXX export for LOBTree.Children? // Key limits child's keys - see LOBTree.Entryv for details.
type _LOBTreeItem struct { type LOEntry struct {
key int64 key int64
child interface{} // LOBTree or LOBucket child interface{} // LOBTree or LOBucket
} }
// LOBucket is a leaf node of a B⁺ tree. // LOBucket is a leaf node of a B⁺ tree.
// //
// It contains []LOBucketEntry in ↑ key order.
//
// It mimics LOBucket from btree/py. // It mimics LOBucket from btree/py.
type LOBucket struct { type LOBucket struct {
zodb.Persistent zodb.Persistent
...@@ -81,6 +87,54 @@ type LOBucket struct { ...@@ -81,6 +87,54 @@ type LOBucket struct {
values []interface{} // 'len' corresponding values values []interface{} // 'len' corresponding values
} }
// LOBucketEntry is one LOBucket node entry.
//
// It contains key and value.
type LOBucketEntry struct {
key int64
value interface{}
}
// Key returns LOBTree entry key.
func (e *LOEntry) Key() int64 { return e.key }
// Child returns LOBTree entry child.
func (e *LOEntry) Child() interface{} { return e.child }
// Entryv returns entries of a LOBTree node.
//
// Entries keys limit the keys of all children reachable from an entry:
//
// [i].Key ≤ [i].Child.*.Key < [i+1].Key i ∈ [0, len([]))
//
// [0].Key = -∞ ; always returned so
// [len(ev)].Key = +∞ ; should be assumed so
//
//
// Children of all entries are guaranteed to be of the same kind - either all LOBTree, or all LOBucket.
//
// The caller must not modify returned array.
func (t *LOBTree) Entryv() []LOEntry {
return t.data
}
// Key returns LOBucket entry key.
func (e *LOBucketEntry) Key() int64 { return e.key }
// Value returns LOBucket entry value.
func (e *LOBucketEntry) Value() interface{} { return e.value }
// Entryv returns entries of a LOBucket node.
func (b *LOBucket) Entryv() []LOBucketEntry {
ev := make([]LOBucketEntry, len(b.keys))
for i, k := range b.keys {
ev[i] = LOBucketEntry{k, b.values[i]}
}
return ev
}
// Get searches LOBTree by key. // Get searches LOBTree by key.
// //
// It loads intermediate LOBTree nodes from database on demand as needed. // It loads intermediate LOBTree nodes from database on demand as needed.
...@@ -312,7 +366,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) { ...@@ -312,7 +366,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) {
} }
bt.firstbucket = bucket bt.firstbucket = bucket
bt.data = []_LOBTreeItem{{key: 0, child: bucket}} bt.data = []LOEntry{{key: 0, child: bucket}}
return nil return nil
} }
...@@ -331,7 +385,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) { ...@@ -331,7 +385,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) {
} }
n := (len(t) + 1) / 2 n := (len(t) + 1) / 2
bt.data = make([]_LOBTreeItem, 0, n) bt.data = make([]LOEntry, 0, n)
var kprev int64 var kprev int64
var childrenKind int // 1 - LOBTree, 2 - LOBucket var childrenKind int // 1 - LOBTree, 2 - LOBucket
for i, idx := 0, 0; i < n; i++ { for i, idx := 0, 0; i < n; i++ {
...@@ -376,7 +430,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) { ...@@ -376,7 +430,7 @@ func (bt *lobtreeState) PySetState(pystate interface{}) (err error) {
fmt.Errorf("data: [%d]: children must be of the same type", i) fmt.Errorf("data: [%d]: children must be of the same type", i)
} }
bt.data = append(bt.data, _LOBTreeItem{key: kkey, child: child}) bt.data = append(bt.data, LOEntry{key: kkey, child: child})
} }
return nil return nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment