Commit 34abc6f4 authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

[t:3884] fixed the problem in brtleaf_split, added back the assert in...

[t:3884] fixed the problem in brtleaf_split, added back the assert in move_leafentries, and added a test (test3884.c).  this required exporting brtleaf_split in brt-internal.h

git-svn-id: file:///svn/toku/tokudb@34127 c7de825b-a66e-492c-adef-691d508d4ae1
parent 1b1c3dac
...@@ -759,6 +759,9 @@ typedef struct brt_status { ...@@ -759,6 +759,9 @@ typedef struct brt_status {
void toku_brt_get_status(BRT_STATUS); void toku_brt_get_status(BRT_STATUS);
void
brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node);
void void
brt_leaf_apply_cmd_once ( brt_leaf_apply_cmd_once (
BASEMENTNODE bn, BASEMENTNODE bn,
......
...@@ -1217,6 +1217,7 @@ move_leafentries( ...@@ -1217,6 +1217,7 @@ move_leafentries(
) )
//Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt //Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt
{ {
assert(lbi < ube);
OMTVALUE *MALLOC_N(ube-lbi, new_le); OMTVALUE *MALLOC_N(ube-lbi, new_le);
u_int32_t i = 0; u_int32_t i = 0;
*num_bytes_moved = 0; *num_bytes_moved = 0;
...@@ -1245,7 +1246,7 @@ move_leafentries( ...@@ -1245,7 +1246,7 @@ move_leafentries(
} }
} }
static void void
brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node) brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node)
// Effect: Split a leaf node. // Effect: Split a leaf node.
{ {
...@@ -1261,122 +1262,133 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, ...@@ -1261,122 +1262,133 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
//printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize); //printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize);
// variables that say where we will do the split. We do it in the basement node indexed at // variables that say where we will do the split. We do it in the basement node indexed at
// at split_node, and at the index split_at_in_node within that basement node. // at split_node, and at the index split_at_in_node within that basement node.
int split_node = 0; int split_node = 0;
int split_at_in_node = 0; int split_at_in_node = 0;
{ {
{ {
// TODO: (Zardosht) see if we can/should make this faster, we iterate over the rows twice // TODO: (Zardosht) see if we can/should make this faster, we iterate over the rows twice
u_int64_t sumlesizes=0; u_int64_t sumlesizes=0;
sumlesizes = brtleaf_disk_size(node); sumlesizes = brtleaf_disk_size(node);
// TODO: (Zardosht) #3537, figure out serial insertion optimization again later // TODO: (Zardosht) #3537, figure out serial insertion optimization again later
// split in half // split in half
brtleaf_get_split_loc( brtleaf_get_split_loc(
node, node,
sumlesizes, sumlesizes,
&split_node, &split_node,
&split_at_in_node &split_at_in_node
); );
} }
// Now we know where we are going to break it // did we split right on the boundary between basement nodes?
// the two nodes will have a total of n_children+1 basement nodes BOOL split_on_boundary = (split_at_in_node == ((int) toku_omt_size(BLB_BUFFER(node, split_node)) - 1));
// and n_children-1 pivots // Now we know where we are going to break it
// the left node, node, will have split_node+1 basement nodes // the two nodes will have a total of n_children+1 basement nodes
// the right node, B, will have n_children-split_node basement nodes // and n_children-1 pivots
// the pivots of node will be the first split_node pivots that originally exist // the left node, node, will have split_node+1 basement nodes
// the pivots of B will be the last (n_children - 1 - split_node) pivots that originally exist // the right node, B, will have n_children-split_node basement nodes
// the pivots of node will be the first split_node pivots that originally exist
//set up the basement nodes in the new node // the pivots of B will be the last (n_children - 1 - split_node) pivots that originally exist
int num_children_in_node = split_node + 1;
int num_children_in_b = node->n_children - split_node; //set up the basement nodes in the new node
if (create_new_node) { int num_children_in_node = split_node + 1;
toku_create_new_brtnode( int num_children_in_b = node->n_children - split_node - (split_on_boundary ? 1 : 0);
t, if (create_new_node) {
&B, toku_create_new_brtnode(
0, t,
num_children_in_b &B,
); 0,
assert(B->nodesize>0); num_children_in_b
} );
else { assert(B->nodesize>0);
B = *nodeb; }
REALLOC_N(num_children_in_b-1, B->childkeys); else {
REALLOC_N(num_children_in_b, B->bp); B = *nodeb;
B->n_children = num_children_in_b; REALLOC_N(num_children_in_b-1, B->childkeys);
REALLOC_N(num_children_in_b, B->bp);
B->n_children = num_children_in_b;
for (int i = 0; i < num_children_in_b; i++) { for (int i = 0; i < num_children_in_b; i++) {
BP_STATE(B,i) = PT_AVAIL; BP_STATE(B,i) = PT_AVAIL;
BP_OFFSET(B,i) = 0; BP_OFFSET(B,i) = 0;
BP_BLOCKNUM(B,i).b = 0; BP_BLOCKNUM(B,i).b = 0;
BP_SUBTREE_EST(B,i)= zero_estimates; BP_SUBTREE_EST(B,i)= zero_estimates;
BP_WORKDONE(B,i) = 0; BP_WORKDONE(B,i) = 0;
set_BLB(B, i, toku_create_empty_bn()); set_BLB(B, i, toku_create_empty_bn());
} }
} }
// //
// first move all the data // first move all the data
// //
// handle the move of a subset of data in split_node from node to B int curr_src_bn_index = split_node;
int curr_dest_bn_index = 0;
BP_STATE(B,0) = PT_AVAIL;
struct subtree_estimates se_diff = zero_estimates; // handle the move of a subset of data in split_node from node to B
u_int32_t diff_size = 0; if (!split_on_boundary) {
destroy_basement_node (BLB(B, 0)); // Destroy B's empty OMT, so I can rebuild it from an array BP_STATE(B,curr_dest_bn_index) = PT_AVAIL;
set_BNULL(B, 0); struct subtree_estimates se_diff = zero_estimates;
set_BLB(B, 0, toku_create_empty_bn_no_buffer()); u_int32_t diff_size = 0;
move_leafentries( destroy_basement_node (BLB(B, curr_dest_bn_index)); // Destroy B's empty OMT, so I can rebuild it from an array
&BLB_BUFFER(B, 0), set_BNULL(B, curr_dest_bn_index);
BLB_BUFFER(node, split_node), set_BLB(B, curr_dest_bn_index, toku_create_empty_bn_no_buffer());
split_at_in_node+1, move_leafentries(
toku_omt_size(BLB_BUFFER(node, split_node)), &BLB_BUFFER(B, curr_dest_bn_index),
&se_diff, BLB_BUFFER(node, curr_src_bn_index),
&diff_size split_at_in_node+1,
); toku_omt_size(BLB_BUFFER(node, curr_src_bn_index)),
BLB_NBYTESINBUF(node, split_node) -= diff_size; &se_diff,
BLB_NBYTESINBUF(B, 0) += diff_size; &diff_size
subtract_estimates(&BP_SUBTREE_EST(node,split_node), &se_diff); );
add_estimates(&BP_SUBTREE_EST(B,0), &se_diff); BLB_NBYTESINBUF(node, curr_src_bn_index) -= diff_size;
BLB_NBYTESINBUF(B, curr_dest_bn_index) += diff_size;
// move the rest of the basement nodes subtract_estimates(&BP_SUBTREE_EST(node,curr_src_bn_index), &se_diff);
int curr_dest_bn_index = 1; add_estimates(&BP_SUBTREE_EST(B,curr_dest_bn_index), &se_diff);
for (int i = num_children_in_node; i < node->n_children; i++, curr_dest_bn_index++) { curr_dest_bn_index++;
destroy_basement_node(BLB(B, curr_dest_bn_index)); } else {
set_BNULL(B, curr_dest_bn_index); curr_src_bn_index++;
B->bp[curr_dest_bn_index] = node->bp[i]; }
}
node->n_children = num_children_in_node; // move the rest of the basement nodes
for ( ; curr_src_bn_index < node->n_children; curr_src_bn_index++, curr_dest_bn_index++) {
// destroy_basement_node(BLB(B, curr_dest_bn_index));
// now handle the pivots set_BNULL(B, curr_dest_bn_index);
// B->bp[curr_dest_bn_index] = node->bp[curr_src_bn_index];
}
// make pivots in B node->n_children = num_children_in_node;
for (int i=0; i < num_children_in_b-1; i++) {
B->childkeys[i] = node->childkeys[i+split_node];
B->totalchildkeylens += toku_brt_pivot_key_len(node->childkeys[i+split_node]);
node->totalchildkeylens -= toku_brt_pivot_key_len(node->childkeys[i+split_node]);
node->childkeys[i+split_node] = NULL;
}
REALLOC_N(num_children_in_node, node->bp);
REALLOC_N(num_children_in_node-1, node->childkeys);
toku_brt_leaf_reset_calc_leaf_stats(node); //
toku_brt_leaf_reset_calc_leaf_stats(B); // now handle the pivots
//
// the child index in the original node that corresponds to the
// first node in the right node of the split
int base_index = (split_on_boundary ? split_node + 1 : split_node);
// make pivots in B
for (int i=0; i < num_children_in_b-1; i++) {
B->childkeys[i] = node->childkeys[i+base_index];
B->totalchildkeylens += toku_brt_pivot_key_len(node->childkeys[i+base_index]);
node->totalchildkeylens -= toku_brt_pivot_key_len(node->childkeys[i+base_index]);
node->childkeys[i+base_index] = NULL;
}
REALLOC_N(num_children_in_node, node->bp);
REALLOC_N(num_children_in_node-1, node->childkeys);
toku_brt_leaf_reset_calc_leaf_stats(node);
toku_brt_leaf_reset_calc_leaf_stats(B);
} }
if (splitk) { if (splitk) {
memset(splitk, 0, sizeof *splitk); memset(splitk, 0, sizeof *splitk);
OMTVALUE lev = 0; OMTVALUE lev = 0;
int r=toku_omt_fetch(BLB_BUFFER(node, split_node), toku_omt_size(BLB_BUFFER(node, split_node))-1, &lev); int r=toku_omt_fetch(BLB_BUFFER(node, split_node), toku_omt_size(BLB_BUFFER(node, split_node))-1, &lev);
assert_zero(r); // that fetch should have worked. assert_zero(r); // that fetch should have worked.
LEAFENTRY le=lev; LEAFENTRY le=lev;
splitk->size = le_keylen(le); splitk->size = le_keylen(le);
splitk->data = kv_pair_malloc(le_key(le), le_keylen(le), 0, 0); splitk->data = kv_pair_malloc(le_key(le), le_keylen(le), 0, 0);
splitk->flags=0; splitk->flags=0;
} }
node->max_msn_applied_to_node_on_disk= max_msn_applied_to_node; node->max_msn_applied_to_node_on_disk = max_msn_applied_to_node;
B ->max_msn_applied_to_node_on_disk = max_msn_applied_to_node; B->max_msn_applied_to_node_on_disk = max_msn_applied_to_node;
node->dirty = 1; node->dirty = 1;
B->dirty = 1; B->dirty = 1;
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id: test3856.c 33984 2011-08-17 03:03:54Z leifwalsh $"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
// it used to be the case that we copied the left and right keys of a
// range to be prelocked but never freed them, this test checks that they
// are freed (as of this time, this happens in destroy_bfe_for_prefetch)
#include "test.h"
#include "includes.h"
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static const char fname[]= __FILE__ ".brt";
static int omt_long_cmp(OMTVALUE p, void *q)
{
LEAFENTRY a = p, b = q;
void *ak, *bk;
u_int32_t al, bl;
ak = le_key_and_len(a, &al);
bk = le_key_and_len(b, &bl);
assert(al == sizeof(long) && bl == sizeof(long));
long *ai = (long *) ak;
long *bi = (long *) bk;
return (*ai > *bi) - (*ai < *bi);
}
static LEAFENTRY
le_fastmalloc(char *key, int keylen, char *val, int vallen)
{
LEAFENTRY r = toku_malloc(sizeof(r->type) + sizeof(r->keylen) + sizeof(r->u.clean.vallen) +
keylen + vallen);
resource_assert(r);
r->type = LE_CLEAN;
r->keylen = keylen;
r->u.clean.vallen = vallen;
memcpy(&r->u.clean.key_val[0], key, keylen);
memcpy(&r->u.clean.key_val[keylen], val, vallen);
return r;
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
toku_memory_check = 1;
const int nodesize = 1024, eltsize = 64, bnsize = 256;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
const int eltsperbn = bnsize / eltsize;
struct brtnode sn;
int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
int r;
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.nodesize = nodesize;
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
sn.layout_version = BRT_LAYOUT_VERSION;
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize;
sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0;
for (int bn = 0; bn < sn.n_children; ++bn) {
BP_SUBTREE_EST(&sn,bn).ndata = random() + (((long long)random())<<32);
BP_SUBTREE_EST(&sn,bn).nkeys = random() + (((long long)random())<<32);
BP_SUBTREE_EST(&sn,bn).dsize = random() + (((long long)random())<<32);
BP_SUBTREE_EST(&sn,bn).exact = (BOOL)(random()%2 != 0);
BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn());
BLB_NBYTESINBUF(&sn,bn) = 0;
BLB_OPTIMIZEDFORUPGRADE(&sn, bn) = BRT_LAYOUT_VERSION;
long k;
for (int i = 0; i < eltsperbn; ++i) {
k = bn * eltsperbn + i;
char val[vallen];
memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += OMT_ITEM_OVERHEAD + leafentry_disksize(elts[k]);
}
if (bn < sn.n_children - 1) {
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
sn.totalchildkeylens += (sizeof k);
}
}
CACHETABLE ct;
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0);
BRT brt;
r = toku_open_brt(fname, 1, &brt, nodesize, bnsize, ct, null_txn, toku_builtin_compare_fun, null_db); assert(r==0);
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt, &sn, &nodea, &nodeb, &splitk, TRUE);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment