Commit f25573e6 authored by John Esmet's avatar John Esmet Committed by John Esmet

FT-93 Add a class for pivot bounds, remove the assumption that pivot

keys must come from a DBT stored in the ftnode by adding
ftnode_pivot_keys::fill_dbt()
parent 113e1704
......@@ -209,7 +209,7 @@ toku_pin_ftnode_for_query(
uint32_t fullhash,
UNLOCKERS unlockers,
ANCESTORS ancestors,
const PIVOT_BOUNDS bounds,
const pivot_bounds &bounds,
FTNODE_FETCH_EXTRA bfe,
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
FTNODE *node_p,
......
......@@ -147,7 +147,7 @@ toku_pin_ftnode_for_query(
uint32_t fullhash,
UNLOCKERS unlockers,
ANCESTORS ancestors,
const PIVOT_BOUNDS pbounds,
const pivot_bounds &bounds,
FTNODE_FETCH_EXTRA bfe,
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
FTNODE *node_p,
......
......@@ -468,7 +468,7 @@ ct_maybe_merge_child(struct flusher_advice *fa,
ctme.is_last_child = false;
pivot_to_save = childnum;
}
toku_clone_dbt(&ctme.target_key, *parent->pivotkeys.get_pivot(pivot_to_save));
toku_clone_dbt(&ctme.target_key, parent->pivotkeys.get_pivot(pivot_to_save));
// at this point, ctme is properly setup, now we can do the merge
struct flusher_advice new_fa;
......@@ -580,7 +580,7 @@ handle_split_of_child(
if (toku_ft_debug_mode) {
printf("%s:%d Child %d splitting on %s\n", __FILE__, __LINE__, childnum, (char*)splitk->data);
printf("%s:%d oldsplitkeys:", __FILE__, __LINE__);
for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i)->data);
for(int i = 0; i < node->n_children - 1; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data);
printf("\n");
}
)
......@@ -631,7 +631,7 @@ handle_split_of_child(
WHEN_NOT_GCOV(
if (toku_ft_debug_mode) {
printf("%s:%d splitkeys:", __FILE__, __LINE__);
for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i)->data);
for (int i = 0; i < node->n_children - 2; i++) printf(" %s", (char *) node->pivotkeys.get_pivot(i).data);
printf("\n");
}
)
......@@ -937,7 +937,7 @@ ftleaf_split(
int split_idx = num_left_bns - (split_on_boundary ? 0 : 1);
node->pivotkeys.split_at(split_idx, &B->pivotkeys);
if (split_on_boundary && num_left_bns < node->n_children && splitk) {
toku_copyref_dbt(splitk, *node->pivotkeys.get_pivot(num_left_bns - 1));
toku_copyref_dbt(splitk, node->pivotkeys.get_pivot(num_left_bns - 1));
} else if (splitk) {
bn_data* bd = BLB_DATA(node, num_left_bns - 1);
uint32_t keylen;
......@@ -997,7 +997,7 @@ ft_nonleaf_split(
// the split key for our parent is the rightmost pivot key in node
node->pivotkeys.split_at(n_children_in_a, &B->pivotkeys);
toku_clone_dbt(splitk, *node->pivotkeys.get_pivot(n_children_in_a - 1));
toku_clone_dbt(splitk, node->pivotkeys.get_pivot(n_children_in_a - 1));
node->pivotkeys.delete_at(n_children_in_a - 1);
node->n_children = n_children_in_a;
......@@ -1408,8 +1408,8 @@ ft_merge_child(
{
DBT splitk;
toku_init_dbt(&splitk);
const DBT *old_split_key = node->pivotkeys.get_pivot(childnuma);
maybe_merge_pinned_nodes(node, old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize);
const DBT old_split_key = node->pivotkeys.get_pivot(childnuma);
maybe_merge_pinned_nodes(node, &old_split_key, childa, childb, &did_merge, &did_rebalance, &splitk, ft->h->nodesize);
//toku_verify_estimates(t,childa);
// the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred
*did_react = (bool)(did_merge || did_rebalance);
......
......@@ -199,7 +199,7 @@ hot_update_flusher_keys(FTNODE parent,
// child node.
if (childnum < (parent->n_children - 1)) {
toku_destroy_dbt(&flusher->max_current_key);
toku_clone_dbt(&flusher->max_current_key, *parent->pivotkeys.get_pivot(childnum));
toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
}
}
......
......@@ -461,15 +461,26 @@ void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_curs
void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
struct pivot_bounds {
const DBT * const lower_bound_exclusive;
const DBT * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages).
};
typedef struct pivot_bounds const * const PIVOT_BOUNDS;
class pivot_bounds {
public:
pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt);
pivot_bounds next_bounds(FTNODE node, int childnum) const;
const DBT *lbe() const;
const DBT *ubi() const;
const DBT *prepivotkey (FTNODE node, int childnum, const DBT * const lower_bound_exclusive);
const DBT *postpivotkey (FTNODE node, int childnum, const DBT * const upper_bound_inclusive);
struct pivot_bounds next_pivot_keys (FTNODE node, int childnum, struct pivot_bounds const * const old_pb);
static pivot_bounds infinite_bounds();
private:
DBT _prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const;
DBT _postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const;
// if toku_dbt_is_empty() is true for either bound, then it represents
// negative or positive infinity (which are exclusive in practice)
const DBT _lower_bound_exclusive;
const DBT _upper_bound_inclusive;
};
bool
toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
......
This diff is collapsed.
......@@ -158,7 +158,8 @@ get_ith_key_dbt (BASEMENTNODE bn, int i) {
#define VERIFY_ASSERTION(predicate, i, string) ({ \
if(!(predicate)) { \
if (verbose) { \
(void) verbose; \
if (true) { \
fprintf(stderr, "%s:%d: Looking at child %d of block %" PRId64 ": %s\n", __FILE__, __LINE__, i, blocknum.b, string); \
} \
result = TOKUDB_NEEDS_REPAIR; \
......@@ -398,24 +399,27 @@ toku_verify_ftnode_internal(FT_HANDLE ft_handle,
}
// Verify that all the pivot keys are in order.
for (int i = 0; i < node->n_children-2; i++) {
int compare = compare_pairs(ft_handle, node->pivotkeys.get_pivot(i), node->pivotkeys.get_pivot(i + 1));
DBT x, y;
int compare = compare_pairs(ft_handle, node->pivotkeys.fill_pivot(i, &x), node->pivotkeys.fill_pivot(i + 1, &y));
VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value");
}
// Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot
for (int i = 0; i < node->n_children-1; i++) {
DBT x;
if (lesser_pivot) {
int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.get_pivot(i));
int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.fill_pivot(i, &x));
VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot");
}
if (greatereq_pivot) {
int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.get_pivot(i));
int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.fill_pivot(i, &x));
VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot");
}
}
for (int i = 0; i < node->n_children; i++) {
const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.get_pivot(i - 1);
const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.get_pivot(i);
DBT x, y;
const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x);
const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y);
if (node->height > 0) {
NONLEAF_CHILDINFO bnc = BNC(node, i);
// Verify that messages in the buffers are in the right place.
......@@ -518,14 +522,15 @@ toku_verify_ftnode (FT_HANDLE ft_handle,
for (int i = 0; i < node->n_children; i++) {
FTNODE child_node;
toku_get_node_for_verify(BP_BLOCKNUM(node, i), ft_handle, &child_node);
DBT x, y;
int r = toku_verify_ftnode(ft_handle, rootmsn,
(toku_bnc_n_entries(BNC(node, i)) > 0
? this_msn
: parentmsn_with_messages),
messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0,
child_node, node->height-1,
(i==0) ? lesser_pivot : node->pivotkeys.get_pivot(i - 1),
(i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.get_pivot(i),
(i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x),
(i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y),
progress_callback, progress_extra,
recurse, verbose, keep_going_on_failure);
if (r) {
......
This diff is collapsed.
......@@ -106,7 +106,7 @@ public:
void create_empty();
// effect: create pivot keys by copying the given DBT array
void create_from_dbts(const DBT *keys, int num_pivots);
void create_from_dbts(const DBT *keys, int n);
// effect: create pivot keys as a clone of an existing set of pivotkeys
void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys);
......@@ -114,10 +114,14 @@ public:
void destroy();
// effect: deserialize pivot keys previously serialized by serialize_to_wbuf()
void deserialize_from_rbuf(struct rbuf *rb, int num_pivots);
void deserialize_from_rbuf(struct rbuf *rb, int n);
// returns: unowned DBT representing the i'th pivot key
const DBT *get_pivot(int i) const;
DBT get_pivot(int i) const;
// effect: fills a DBT with the i'th pivot key
// returns: the given dbt
DBT *fill_pivot(int i, DBT *dbt) const;
// effect: insert a pivot into the i'th position, shifting others to the right
void insert_at(const DBT *key, int i);
......@@ -136,21 +140,59 @@ public:
// requires: *other is empty (size == 0)
void split_at(int i, ftnode_pivot_keys *other);
// effect: serialize pivot keys to a wbuf
// requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
void serialize_to_wbuf(struct wbuf *wb) const;
int num_pivots() const;
// return: the sum of the keys sizes of each pivot
size_t total_size() const;
// effect: serialize pivot keys to a wbuf
// requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
void serialize_to_wbuf(struct wbuf *wb) const;
private:
// adds/destroys keys at a certain index, maintaining _total_size, but not _num_pivots
void _add_key(const DBT *key, int i);
void _destroy_key(int i);
// effect: create pivot keys, in fixed key format, by copying the given key array
void _create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n);
char *_fixed_key(int i) const {
return &_fixed_keys[i * _fixed_keylen];
}
bool _fixed_format() const {
return _fixed_keys != nullptr;
}
void sanity_check() const;
void _insert_at_dbt(const DBT *key, int i);
void _append_dbt(const ftnode_pivot_keys &pivotkeys);
void _replace_at_dbt(const DBT *key, int i);
void _delete_at_dbt(int i);
void _split_at_dbt(int i, ftnode_pivot_keys *other);
void _insert_at_fixed(const DBT *key, int i);
void _append_fixed(const ftnode_pivot_keys &pivotkeys);
void _replace_at_fixed(const DBT *key, int i);
void _delete_at_fixed(int i);
void _split_at_fixed(int i, ftnode_pivot_keys *other);
// adds/destroys keys at a certain index (in dbt format),
// maintaining _total_size, but not _num_pivots
void _add_key_dbt(const DBT *key, int i);
void _destroy_key_dbt(int i);
// conversions to and from packed key array format
void _convert_to_dbt_format();
void _convert_to_fixed_format();
// If every key is _fixed_keylen long, then _fixed_key is a
// packed array of keys..
char *_fixed_keys;
size_t _fixed_keylen;
// ..otherwise _fixed_keys is null and we store an array of dbts,
// each representing a key. this is simpler but less cache-efficient.
DBT *_dbt_keys;
DBT *_keys;
int _num_pivots;
size_t _total_size;
};
......@@ -482,12 +524,13 @@ void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc);
void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
// TODO: Should ft_handle just be FT?
class pivot_bounds;
void toku_apply_ancestors_messages_to_node(FT_HANDLE t, FTNODE node, ANCESTORS ancestors,
struct pivot_bounds const *const bounds,
const pivot_bounds &bounds,
bool *msgs_applied, int child_to_read);
bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors,
struct pivot_bounds const *const bounds,
const pivot_bounds &bounds,
MSN *const max_msn_in_path, int child_to_read);
void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
......
......@@ -360,7 +360,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
if (bn < npartitions-1) {
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, elts[last_i].keyp) <= 0);
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
......@@ -495,7 +495,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
if (bn < npartitions-1) {
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, les[last_i].keyp) <= 0);
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
......@@ -618,7 +618,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
if (bn < npartitions-1) {
uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn)->data);
uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data);
void* tmp = les[last_i].keyp;
uint32_t *CAST_FROM_VOIDP(item, tmp);
assert(*pivot >= *item);
......@@ -759,7 +759,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
if (bn < npartitions-1) {
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, (char*)(les[last_i].keyp)) <= 0);
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
......@@ -888,7 +888,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
if (bn < npartitions-1) {
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn)->data, (char*)(elts[last_i].keyp)) <= 0);
assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0);
}
// TODO for later, get a key comparison here as well
last_i++;
......@@ -1107,8 +1107,8 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
assert(dn->height == 1);
assert(dn->n_children==2);
assert(strcmp((char*)dn->pivotkeys.get_pivot(0)->data, "hello")==0);
assert(dn->pivotkeys.get_pivot(0)->size==6);
assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0);
assert(dn->pivotkeys.get_pivot(0).size==6);
assert(BP_BLOCKNUM(dn,0).b==30);
assert(BP_BLOCKNUM(dn,1).b==35);
......
......@@ -737,9 +737,8 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
BP_STATE(parentnode, 0) = PT_AVAIL;
parentnode->max_msn_applied_to_node_on_disk = max_parent_msn;
struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL };
bool msgs_applied;
toku_apply_ancestors_messages_to_node(t, child, &ancestors, &infinite_bounds, &msgs_applied, -1);
toku_apply_ancestors_messages_to_node(t, child, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1);
struct checkit_fn {
int operator()(FT_MSG UU(msg), bool is_fresh) {
......@@ -962,12 +961,11 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
parentnode->max_msn_applied_to_node_on_disk = max_parent_msn;
struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
DBT lbe, ubi;
const struct pivot_bounds bounds = {
.lower_bound_exclusive = toku_init_dbt(&lbe),
.upper_bound_inclusive = toku_clone_dbt(&ubi, childkeys[7])
};
toku_init_dbt(&lbe);
toku_clone_dbt(&ubi, childkeys[7]);
const pivot_bounds bounds(lbe, ubi);
bool msgs_applied;
toku_apply_ancestors_messages_to_node(t, child, &ancestors, &bounds, &msgs_applied, -1);
toku_apply_ancestors_messages_to_node(t, child, &ancestors, bounds, &msgs_applied, -1);
struct checkit_fn {
DBT *childkeys;
......@@ -1162,9 +1160,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
BP_STATE(parentnode, 0) = PT_AVAIL;
parentnode->max_msn_applied_to_node_on_disk = max_parent_msn;
struct ancestors ancestors = { .node = parentnode, .childnum = 0, .next = NULL };
const struct pivot_bounds infinite_bounds = { .lower_bound_exclusive = NULL, .upper_bound_inclusive = NULL };
bool msgs_applied;
toku_apply_ancestors_messages_to_node(t, child2, &ancestors, &infinite_bounds, &msgs_applied, -1);
toku_apply_ancestors_messages_to_node(t, child2, &ancestors, pivot_bounds::infinite_bounds(), &msgs_applied, -1);
struct checkit_fn {
int operator()(FT_MSG UU(msg), bool is_fresh) {
......
......@@ -260,11 +260,11 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
printf(" pivots:\n");
for (int i=0; i<n->n_children-1; i++) {
const DBT *piv = n->pivotkeys.get_pivot(i);
const DBT piv = n->pivotkeys.get_pivot(i);
printf(" pivot %2d:", i);
if (n->flags)
printf(" flags=%x ", n->flags);
print_item(piv->data, piv->size);
print_item(piv.data, piv.size);
printf("\n");
}
printf(" children:\n");
......
......@@ -317,6 +317,12 @@ bool toku_dbt_is_infinite(const DBT *dbt) {
return dbt == toku_dbt_positive_infinity() || dbt == toku_dbt_negative_infinity();
}
bool toku_dbt_is_empty(const DBT *dbt) {
// can't have a null data field with a non-zero size
paranoid_invariant(dbt->data != nullptr || dbt->size == 0);
return dbt->data == nullptr;
}
int toku_dbt_infinite_compare(const DBT *a, const DBT *b) {
if (a == b) {
return 0;
......
......@@ -129,6 +129,9 @@ const DBT *toku_dbt_negative_infinity(void);
// returns: true if the given dbt is either positive or negative infinity
bool toku_dbt_is_infinite(const DBT *dbt);
// returns: true if the given dbt has no data (ie: dbt->data == nullptr)
bool toku_dbt_is_empty(const DBT *dbt);
// effect: compares two potentially infinity-valued dbts
// requires: at least one is infinite (assert otherwise)
int toku_dbt_infinite_compare(const DBT *a, const DBT *b);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment