Commit 5b5c016b authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

#3588 merge tokudb.3588 to main refs[t:3588]

git-svn-id: file:///svn/toku/tokudb@32669 c7de825b-a66e-492c-adef-691d508d4ae1
parent d7ec24ee
...@@ -32,6 +32,9 @@ db-benchmark-test-cxx.dir: cxx.dir ...@@ -32,6 +32,9 @@ db-benchmark-test-cxx.dir: cxx.dir
build: $(patsubst %,%.dir, $(BUILDDIRS)) build: $(patsubst %,%.dir, $(BUILDDIRS))
%.build:
cd $(patsubst %.build, %,$@) && $(MAKE) build
%.local: %.local:
cd $(patsubst %.local, %,$@) && $(MAKE) local cd $(patsubst %.local, %,$@) && $(MAKE) local
......
...@@ -435,10 +435,19 @@ toku_brt_search_which_child( ...@@ -435,10 +435,19 @@ toku_brt_search_which_child(
BRTNODE node, BRTNODE node,
brt_search_t *search brt_search_t *search
); );
u_int8_t u_int8_t
toku_brtnode_partition_state (struct brtnode_fetch_extra* bfe, int childnum); toku_brtnode_partition_state (struct brtnode_fetch_extra* bfe, int childnum);
// logs the memory allocation, but not the creation of the new node
// allocate a block number
// allocate and initialize a brtnode
// put the brtnode into the cache table
void toku_create_new_brtnode (BRT t, BRTNODE *result, int height, int n_children); void toku_create_new_brtnode (BRT t, BRTNODE *result, int height, int n_children);
// Effect: Fill in N as an empty brtnode.
void toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num_children,
int layout_version, unsigned int nodesize, unsigned int flags);
int toku_pin_brtnode (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash, int toku_pin_brtnode (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash,
UNLOCKERS unlockers, UNLOCKERS unlockers,
ANCESTORS ancestors, struct pivot_bounds const * const pbounds, ANCESTORS ancestors, struct pivot_bounds const * const pbounds,
......
...@@ -602,11 +602,6 @@ toku_serialize_brtnode_to_memory (BRTNODE node, ...@@ -602,11 +602,6 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
/*out*/ size_t *n_bytes_to_write, /*out*/ size_t *n_bytes_to_write,
/*out*/ char **bytes_to_write) /*out*/ char **bytes_to_write)
{ {
char* data = NULL;
char* curr_ptr = NULL;
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
if (node->height == 0) { if (node->height == 0) {
...@@ -658,8 +653,8 @@ toku_serialize_brtnode_to_memory (BRTNODE node, ...@@ -658,8 +653,8 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
// //
node->bp_offset = serialize_node_header_size(node) + sb_node_info.compressed_size + 4; node->bp_offset = serialize_node_header_size(node) + sb_node_info.compressed_size + 4;
data = toku_xmalloc(total_node_size); char *data = toku_xmalloc(total_node_size);
curr_ptr = data; char *curr_ptr = data;
// now create the final serialized node // now create the final serialized node
// write the header // write the header
......
...@@ -881,33 +881,32 @@ toku_brtheader_free (struct brt_header *h) { ...@@ -881,33 +881,32 @@ toku_brtheader_free (struct brt_header *h) {
brtheader_free(h); brtheader_free(h);
} }
static void void
initialize_empty_brtnode (BRT t, BRTNODE n, BLOCKNUM nodename, int height, int num_children) toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num_children, int layout_version, unsigned int nodesize, unsigned int flags)
// Effect: Fill in N as an empty brtnode. // Effect: Fill in N as an empty brtnode.
{ {
n->max_msn_applied_to_node_on_disk = MIN_MSN; // correct value for root node, harmless for others assert(layout_version != 0);
assert(height >= 0);
n->max_msn_applied_to_node_on_disk = MIN_MSN; // correct value for root node, harmless for others
n->max_msn_applied_to_node_in_memory = MIN_MSN; // correct value for root node, harmless for others n->max_msn_applied_to_node_in_memory = MIN_MSN; // correct value for root node, harmless for others
n->nodesize = t->h->nodesize; n->nodesize = nodesize;
n->flags = t->flags; n->flags = flags;
n->thisnodename = nodename; n->thisnodename = nodename;
assert(t->h->layout_version != 0); n->layout_version = layout_version;
n->layout_version = t->h->layout_version; n->layout_version_original = layout_version;
n->layout_version_original = t->h->layout_version; n->layout_version_read_from_disk = layout_version;
n->layout_version_read_from_disk = t->h->layout_version; n->height = height;
n->height = height;
n->dirty = 1; n->dirty = 1;
assert(height>=0);
n->totalchildkeylens = 0; n->totalchildkeylens = 0;
n->childkeys=0; n->childkeys = 0;
n->bp = 0; n->bp = 0;
n->n_children = num_children; n->n_children = num_children;
n->bp_offset = 0; n->bp_offset = 0;
if (num_children > 0) { if (num_children > 0) {
MALLOC_N(num_children-1, n->childkeys); XMALLOC_N(num_children-1, n->childkeys);
assert(n->childkeys); XMALLOC_N(num_children, n->bp);
MALLOC_N(num_children, n->bp);
assert(n->bp);
for (int i = 0; i < num_children; i++) { for (int i = 0; i < num_children; i++) {
BP_FULLHASH(n,i)=0; BP_FULLHASH(n,i)=0;
BP_HAVE_FULLHASH(n,i)=FALSE; BP_HAVE_FULLHASH(n,i)=FALSE;
...@@ -921,11 +920,11 @@ initialize_empty_brtnode (BRT t, BRTNODE n, BLOCKNUM nodename, int height, int n ...@@ -921,11 +920,11 @@ initialize_empty_brtnode (BRT t, BRTNODE n, BLOCKNUM nodename, int height, int n
n->bp[i].ptr = toku_malloc(sizeof(struct brtnode_nonleaf_childinfo)); n->bp[i].ptr = toku_malloc(sizeof(struct brtnode_nonleaf_childinfo));
memset(n->bp[i].ptr, 0, sizeof(struct brtnode_nonleaf_childinfo)); memset(n->bp[i].ptr, 0, sizeof(struct brtnode_nonleaf_childinfo));
int r = toku_fifo_create(&BNC_BUFFER(n,i)); int r = toku_fifo_create(&BNC_BUFFER(n,i));
assert(r==0); assert_zero(r);
BNC_NBYTESINBUF(n,i) = 0; BNC_NBYTESINBUF(n,i) = 0;
} }
else { else {
n->bp[i].ptr = toku_malloc(sizeof(struct brtnode_leaf_basement_node)); n->bp[i].ptr = toku_xmalloc(sizeof(struct brtnode_leaf_basement_node));
BASEMENTNODE bn = (BASEMENTNODE)n->bp[i].ptr; BASEMENTNODE bn = (BASEMENTNODE)n->bp[i].ptr;
memset(bn, 0, sizeof(struct brtnode_leaf_basement_node)); memset(bn, 0, sizeof(struct brtnode_leaf_basement_node));
toku_setup_empty_bn(bn); toku_setup_empty_bn(bn);
...@@ -941,14 +940,14 @@ brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKEY *r ...@@ -941,14 +940,14 @@ brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKEY *r
// Unpin nodea and nodeb. // Unpin nodea and nodeb.
// Leave the new root pinned. // Leave the new root pinned.
{ {
BRTNODE MALLOC(newroot); BRTNODE XMALLOC(newroot);
int new_height = nodea->height+1; int new_height = nodea->height+1;
BLOCKNUM newroot_diskoff; BLOCKNUM newroot_diskoff;
toku_allocate_blocknum(brt->h->blocktable, &newroot_diskoff, brt->h); toku_allocate_blocknum(brt->h->blocktable, &newroot_diskoff, brt->h);
assert(newroot); assert(newroot);
*rootp=newroot_diskoff; *rootp=newroot_diskoff;
assert(new_height > 0); assert(new_height > 0);
initialize_empty_brtnode (brt, newroot, newroot_diskoff, new_height, 2); toku_initialize_empty_brtnode (newroot, newroot_diskoff, new_height, 2, brt->h->layout_version, brt->h->nodesize, brt->flags);
//printf("new_root %lld %d %lld %lld\n", newroot_diskoff, newroot->height, nodea->thisnodename, nodeb->thisnodename); //printf("new_root %lld %d %lld %lld\n", newroot_diskoff, newroot->height, nodea->thisnodename, nodeb->thisnodename);
//printf("%s:%d Splitkey=%p %s\n", __FILE__, __LINE__, splitkey, splitkey); //printf("%s:%d Splitkey=%p %s\n", __FILE__, __LINE__, splitkey, splitkey);
newroot->childkeys[0] = splitk.data; newroot->childkeys[0] = splitk.data;
...@@ -978,26 +977,27 @@ brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKEY *r ...@@ -978,26 +977,27 @@ brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKEY *r
*newrootp = newroot; *newrootp = newroot;
} }
// logs the memory allocation, but not the creation of the new node void
void toku_create_new_brtnode (BRT t, BRTNODE *result, int height, int n_children) { toku_create_new_brtnode (BRT t, BRTNODE *result, int height, int n_children) {
BRTNODE MALLOC(n); assert(t->h->nodesize > 0);
int r; if (height == 0)
assert(n_children > 0);
BLOCKNUM name; BLOCKNUM name;
toku_allocate_blocknum(t->h->blocktable, &name, t->h); toku_allocate_blocknum(t->h->blocktable, &name, t->h);
if (height == 0) { assert(n_children > 0); }
assert(n); BRTNODE XMALLOC(n);
assert(t->h->nodesize>0); toku_initialize_empty_brtnode(n, name, height, n_children, t->h->layout_version, t->h->nodesize, t->flags);
initialize_empty_brtnode(t, n, name, height, n_children); assert(n->nodesize > 0);
*result = n;
assert(n->nodesize>0);
// n->brt = t;
//printf("%s:%d putting %p (%lld)\n", __FILE__, __LINE__, n, n->thisnodename);
u_int32_t fullhash = toku_cachetable_hash(t->cf, n->thisnodename); u_int32_t fullhash = toku_cachetable_hash(t->cf, n->thisnodename);
n->fullhash = fullhash; n->fullhash = fullhash;
r=toku_cachetable_put(t->cf, n->thisnodename, fullhash, int r = toku_cachetable_put(t->cf, n->thisnodename, fullhash,
n, brtnode_memory_size(n), n, brtnode_memory_size(n),
toku_brtnode_flush_callback, toku_brtnode_pe_callback, t->h); toku_brtnode_flush_callback, toku_brtnode_pe_callback, t->h);
assert_zero(r); assert_zero(r);
*result = n;
} }
static void static void
...@@ -3391,23 +3391,20 @@ int toku_open_brt (const char *fname, int is_create, BRT *newbrt, int nodesize, ...@@ -3391,23 +3391,20 @@ int toku_open_brt (const char *fname, int is_create, BRT *newbrt, int nodesize,
} }
static int setup_initial_brt_root_node (BRT t, BLOCKNUM blocknum) { static int setup_initial_brt_root_node (BRT t, BLOCKNUM blocknum) {
int r; BRTNODE XMALLOC(node);
BRTNODE MALLOC(node); toku_initialize_empty_brtnode(node, blocknum, 0, 1, t->h->layout_version, t->h->nodesize, t->flags);
assert(node);
//printf("%s:%d\n", __FILE__, __LINE__);
initialize_empty_brtnode(t, node, blocknum, 0, 1);
BP_STATE(node,0) = PT_AVAIL; BP_STATE(node,0) = PT_AVAIL;
u_int32_t fullhash = toku_cachetable_hash(t->cf, blocknum); u_int32_t fullhash = toku_cachetable_hash(t->cf, blocknum);
node->fullhash = fullhash; node->fullhash = fullhash;
r=toku_cachetable_put(t->cf, blocknum, fullhash, int r = toku_cachetable_put(t->cf, blocknum, fullhash,
node, brtnode_memory_size(node), node, brtnode_memory_size(node),
toku_brtnode_flush_callback, toku_brtnode_pe_callback, t->h); toku_brtnode_flush_callback, toku_brtnode_pe_callback, t->h);
if (r!=0) { if (r != 0)
toku_free(node); toku_free(node);
return r; else
} toku_unpin_brtnode(t, node);
toku_unpin_brtnode(t, node); return r;
return 0;
} }
// open a file for use by the brt // open a file for use by the brt
......
...@@ -143,24 +143,23 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) { ...@@ -143,24 +143,23 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) {
printf(" n_children=%d\n", n->n_children); printf(" n_children=%d\n", n->n_children);
printf(" total_childkeylens=%u\n", n->totalchildkeylens); printf(" total_childkeylens=%u\n", n->totalchildkeylens);
int i;
printf(" subleafentry_estimates={"); printf(" subleafentry_estimates={");
for (i=0; i<n->n_children; i++) { for (int i=0; i<n->n_children; i++) {
if (i>0) printf(" "); if (i>0) printf(" ");
struct subtree_estimates *est = &BP_SUBTREE_EST(n,i); struct subtree_estimates *est = &BP_SUBTREE_EST(n,i);
printf("{nkey=%" PRIu64 " ndata=%" PRIu64 " dsize=%" PRIu64 " %s }", est->nkeys, est->ndata, est->dsize, est->exact ? "T" : "F"); printf("{nkey=%" PRIu64 " ndata=%" PRIu64 " dsize=%" PRIu64 " %s }", est->nkeys, est->ndata, est->dsize, est->exact ? "T" : "F");
} }
printf("}\n"); printf("}\n");
printf(" pivots:\n"); printf(" pivots:\n");
for (i=0; i<n->n_children-1; i++) { for (int i=0; i<n->n_children-1; i++) {
struct kv_pair *piv = n->childkeys[i]; struct kv_pair *piv = n->childkeys[i];
printf(" pivot %d:", i); printf(" pivot %2d:", i);
assert(n->flags == 0); assert(n->flags == 0);
print_item(kv_pair_key_const(piv), kv_pair_keylen(piv)); print_item(kv_pair_key_const(piv), kv_pair_keylen(piv));
printf("\n"); printf("\n");
} }
printf(" children:\n"); printf(" children:\n");
for (i=0; i<n->n_children; i++) { for (int i=0; i<n->n_children; i++) {
if (n->height > 0) { if (n->height > 0) {
printf(" child %d: %" PRId64 "\n", i, BP_BLOCKNUM(n, i).b); printf(" child %d: %" PRId64 "\n", i, BP_BLOCKNUM(n, i).b);
unsigned int n_bytes = BNC_NBYTESINBUF(n, i); unsigned int n_bytes = BNC_NBYTESINBUF(n, i);
...@@ -203,9 +202,9 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) { ...@@ -203,9 +202,9 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) {
); );
} }
} else { } else {
printf(" optimized_for_upgrade=%u\n", BLB_OPTIMIZEDFORUPGRADE(n, i)); printf(" bn %2d: optimized_for_upgrade=%u", i, BLB_OPTIMIZEDFORUPGRADE(n, i));
printf(" n_bytes_in_buffer=%u\n", BLB_NBYTESINBUF(n, i)); printf(" n_bytes_in_buffer=%u", BLB_NBYTESINBUF(n, i));
printf(" items_in_buffer =%u\n", toku_omt_size(BLB_BUFFER(n, i))); printf(" items_in_buffer=%u\n", toku_omt_size(BLB_BUFFER(n, i)));
if (dump_data) toku_omt_iterate(BLB_BUFFER(n, i), print_le, 0); if (dump_data) toku_omt_iterate(BLB_BUFFER(n, i), print_le, 0);
} }
} }
......
...@@ -246,7 +246,7 @@ int toku_brt_loader_internal_init (/* out */ BRTLOADER *blp, ...@@ -246,7 +246,7 @@ int toku_brt_loader_internal_init (/* out */ BRTLOADER *blp,
void toku_brtloader_internal_destroy (BRTLOADER bl, BOOL is_error); void toku_brtloader_internal_destroy (BRTLOADER bl, BOOL is_error);
enum { disksize_row_overhead = 9 }; // how much overhead for a row in the fractal tree enum { disksize_row_overhead = 9 }; // how much overhead for a row in the fractal tree (#3588, 9 = cmd + keylen + vallen?)
// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.) // For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
uint64_t toku_brtloader_get_rowset_budget_for_testing (void); uint64_t toku_brtloader_get_rowset_budget_for_testing (void);
......
...@@ -2025,11 +2025,10 @@ struct dbuf { ...@@ -2025,11 +2025,10 @@ struct dbuf {
}; };
struct leaf_buf { struct leaf_buf {
int64_t blocknum; BLOCKNUM blocknum;
struct dbuf dbuf;
int nkeys, ndata, dsize, n_in_buf;
int nkeys_p, ndata_p, dsize_p, partitions_p, n_in_buf_p;
TXNID xid; TXNID xid;
int nkeys, ndata, dsize;
BRTNODE node;
}; };
...@@ -2160,61 +2159,22 @@ static void putbuf_int64 (struct dbuf *dbuf, unsigned long long v) { ...@@ -2160,61 +2159,22 @@ static void putbuf_int64 (struct dbuf *dbuf, unsigned long long v) {
putbuf_int32(dbuf, v&0xFFFFFFFF); putbuf_int32(dbuf, v&0xFFFFFFFF);
} }
static void putbuf_int32_at(struct dbuf *dbuf, int off, int v) {
const int nbytes = 4;
if (off+nbytes > dbuf->buflen) {
unsigned char *oldbuf = dbuf->buf;
int oldbuflen = dbuf->buflen;
dbuf->buflen += dbuf->off + nbytes;
dbuf->buflen *= 2;
REALLOC_N(dbuf->buflen, dbuf->buf);
if (dbuf->buf == NULL) {
dbuf->error = errno;
dbuf->buf = oldbuf;
dbuf->buflen = oldbuflen;
}
}
if (!dbuf->error)
memcpy(dbuf->buf + off, &v, 4);
}
static void putbuf_int64_at(struct dbuf *dbuf, int off, unsigned long long v) {
unsigned int a = v>>32;
unsigned int b = v&0xFFFFFFFF;
putbuf_int32_at(dbuf, off, a);
putbuf_int32_at(dbuf, off+4, b);
}
static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc), int64_t lblocknum, TXNID xid, uint32_t target_nodesize) { static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc), int64_t lblocknum, TXNID xid, uint32_t target_nodesize) {
invariant(lblocknum < out->n_translations_limit); invariant(lblocknum < out->n_translations_limit);
struct leaf_buf *XMALLOC(lbuf); struct leaf_buf *XMALLOC(lbuf);
lbuf->blocknum = lblocknum; lbuf->blocknum.b = lblocknum;
dbuf_init(&lbuf->dbuf); lbuf->xid = xid;
int height=0;
int flags=0;
int layout_version=BRT_LAYOUT_VERSION;
putbuf_bytes(&lbuf->dbuf, "tokuleaf", 8);
putbuf_int32(&lbuf->dbuf, layout_version);
putbuf_int32(&lbuf->dbuf, layout_version); // layout_version original
putbuf_int32(&lbuf->dbuf, BUILD_ID); // build_id (svn rev number) of software that wrote this node to disk
putbuf_int32(&lbuf->dbuf, target_nodesize);
putbuf_int32(&lbuf->dbuf, flags);
putbuf_int32(&lbuf->dbuf, height);
lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0; lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0;
lbuf->n_in_buf = 0;
// leave these uninitialized for now.
lbuf->nkeys_p = lbuf->dbuf.off; lbuf->dbuf.off+=8;
lbuf->ndata_p = lbuf->dbuf.off; lbuf->dbuf.off+=8;
lbuf->dsize_p = lbuf->dbuf.off; lbuf->dbuf.off+=8;
putbuf_int32(&lbuf->dbuf, 0); // optimized_for_upgrade BRTNODE XMALLOC(node);
lbuf->node = node;
lbuf->partitions_p = lbuf->dbuf.off; lbuf->dbuf.off+=4; lbuf->dbuf.off += stored_sub_block_map_size; // RFP partition map int height = 0;
lbuf->n_in_buf_p = lbuf->dbuf.off; lbuf->dbuf.off+=4; int n_bn = 1;
toku_initialize_empty_brtnode(node, lbuf->blocknum, height, n_bn, BRT_LAYOUT_VERSION, target_nodesize, 0);
BP_STATE(node, 0) = PT_AVAIL;
lbuf->xid = xid;
return lbuf; return lbuf;
} }
...@@ -2361,7 +2321,7 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl, ...@@ -2361,7 +2321,7 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
DBT val = make_dbt(output_rowset->data+output_rowset->rows[i].off + output_rowset->rows[i].klen, output_rowset->rows[i].vlen); DBT val = make_dbt(output_rowset->data+output_rowset->rows[i].off + output_rowset->rows[i].klen, output_rowset->rows[i].vlen);
used_estimate += key.size + val.size + disksize_row_overhead; used_estimate += key.size + val.size + disksize_row_overhead;
#if 0
// Spawn off a node if // Spawn off a node if
// a) there is at least one row in it, and // a) there is at least one row in it, and
// b) this item would make the nodesize too big, or // b) this item would make the nodesize too big, or
...@@ -2372,7 +2332,19 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl, ...@@ -2372,7 +2332,19 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
int used_here_with_next_key = used_here + key.size + val.size + disksize_row_overhead; int used_here_with_next_key = used_here + key.size + val.size + disksize_row_overhead;
if (lbuf->n_in_buf > 0 && if (lbuf->n_in_buf > 0 &&
((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && lbuf->dbuf.off > remaining_amount))) { ((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && lbuf->dbuf.off > remaining_amount))) {
#else
// Spawn off a node if
// a) there is at least one row in it, and
// b) this item would make the nodesize too big, or
// c) the remaining amount won't fit in the current node and the current node's data is more than the remaining amount
int remaining_amount = total_disksize_estimate - used_estimate;
int off = lbuf->dsize + lbuf->nkeys * disksize_row_overhead;
int used_here = off + 1000; // leave 1000 for various overheads.
int target_size = (target_nodesize*7L)/8; // use only 7/8 of the node.
int used_here_with_next_key = used_here + key.size + val.size + disksize_row_overhead;
if (lbuf->nkeys > 0 &&
((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && off > remaining_amount))) {
#endif
//if (used_here_with_next_key < target_size) { //if (used_here_with_next_key < target_size) {
// printf("%s:%d Runt avoidance: used_here=%d, remaining_amount=%d target_size=%d dbuf.off=%d\n", __FILE__, __LINE__, used_here, remaining_amount, target_size, lbuf->dbuf.off); // printf("%s:%d Runt avoidance: used_here=%d, remaining_amount=%d target_size=%d dbuf.off=%d\n", __FILE__, __LINE__, used_here, remaining_amount, target_size, lbuf->dbuf.off);
//} //}
...@@ -2775,28 +2747,19 @@ int toku_brt_loader_get_error(BRTLOADER bl, int *error) { ...@@ -2775,28 +2747,19 @@ int toku_brt_loader_get_error(BRTLOADER bl, int *error) {
} }
static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen) { static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen) {
lbuf->n_in_buf++;
lbuf->nkeys++; // assume NODUP lbuf->nkeys++; // assume NODUP
lbuf->ndata++; lbuf->ndata++;
lbuf->dsize+= keylen + vallen; lbuf->dsize += keylen + vallen;
int le_off = lbuf->dbuf.off; // append this key val pair to the leafnode
int le_len; // #3588 TODO just make a clean ule and append it to the omt
if (lbuf->xid == TXNID_NONE) { // #3588 TODO can do the rebalancing here and avoid a lot of work later
le_clean(key, keylen, val, vallen, putbuf_bytes, &lbuf->dbuf); BRTNODE leafnode = lbuf->node;
le_len = LE_CLEAN_MEMSIZE(keylen, vallen); uint32_t idx = toku_omt_size(BLB_BUFFER(leafnode, 0));
} DBT thekey = { .data = key, .size = keylen };
else { DBT theval = { .data = val, .size = vallen };
le_committed_mvcc(key, keylen, val, vallen, lbuf->xid, putbuf_bytes, &lbuf->dbuf); BRT_MSG_S cmd = { BRT_INSERT, ZERO_MSN, xids_get_root_xids(), .u.id = { &thekey, &theval } };
le_len = LE_MVCC_COMMITTED_MEMSIZE(keylen, vallen); brt_leaf_apply_cmd_once((BASEMENTNODE)leafnode->bp[0].ptr, &BP_SUBTREE_EST(leafnode,0), &cmd, idx, NULL, NULL);
}
if (!lbuf->dbuf.error) {
invariant(le_off + le_len == lbuf->dbuf.off);
u_int32_t this_x = x1764_memory(lbuf->dbuf.buf + le_off, le_len);
if (0) {
printf("%s:%d x1764(buf+%d, %d)=%8x\n", __FILE__, __LINE__, le_off, le_len, this_x);
}
}
} }
static int write_literal(struct dbout *out, void*data, size_t len) { static int write_literal(struct dbout *out, void*data, size_t len) {
...@@ -2811,114 +2774,33 @@ CILK_BEGIN ...@@ -2811,114 +2774,33 @@ CILK_BEGIN
static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, BRTLOADER bl) { static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, BRTLOADER bl) {
int result = 0; int result = 0;
//printf(" finishing leaf node progress=%d fin at %d\n", bl->progress, bl->progress+progress_allocation); // serialize leaf to buffer
putbuf_int64_at(&lbuf->dbuf, lbuf->nkeys_p, lbuf->nkeys); size_t serialized_leaf_size = 0;
putbuf_int64_at(&lbuf->dbuf, lbuf->ndata_p, lbuf->ndata); char *serialized_leaf = NULL;
putbuf_int64_at(&lbuf->dbuf, lbuf->dsize_p, lbuf->dsize); result = toku_serialize_brtnode_to_memory(lbuf->node, &serialized_leaf_size, &serialized_leaf);
// RFP abstract this
const int32_t n_partitions = 1;
struct sub_block_map partition_map;
sub_block_map_init(&partition_map, 0, 0, 0);
putbuf_int32_at(&lbuf->dbuf, lbuf->partitions_p, n_partitions);
putbuf_int32_at(&lbuf->dbuf, lbuf->partitions_p+4, partition_map.idx);
putbuf_int32_at(&lbuf->dbuf, lbuf->partitions_p+8, partition_map.offset);
putbuf_int32_at(&lbuf->dbuf, lbuf->partitions_p+12, partition_map.size);
putbuf_int32_at(&lbuf->dbuf, lbuf->n_in_buf_p, lbuf->n_in_buf);
u_int32_t xsum = x1764_memory(lbuf->dbuf.buf, lbuf->dbuf.off);
putbuf_int32(&lbuf->dbuf, xsum);
result = lbuf->dbuf.error;
if (result == 0) {
//print_bytestring(lbuf->dbuf.buf, lbuf->dbuf.off, 200);
int n_uncompressed_bytes_at_beginning = (8 // tokuleaf
+4 // layout version
+4 // layout version original
+4 // build_id
);
int uncompressed_len = lbuf->dbuf.off - n_uncompressed_bytes_at_beginning;
// choose sub block size and number
int sub_block_size, n_sub_blocks;
choose_sub_block_size(uncompressed_len, max_sub_blocks, &sub_block_size, &n_sub_blocks);
int header_len = n_uncompressed_bytes_at_beginning + sub_block_header_size(n_sub_blocks) + sizeof (uint32_t);
// initialize the sub blocks
// struct sub_block sub_block[n_sub_blocks]; RFP cilk++ dynamic array bug, use malloc instead
struct sub_block *XMALLOC_N(n_sub_blocks, sub_block);
for (int i = 0; i < n_sub_blocks; i++)
sub_block_init(&sub_block[i]);
set_all_sub_block_sizes(uncompressed_len, sub_block_size, n_sub_blocks, sub_block);
// allocate space for the compressed bufer
int bound = get_sum_compressed_size_bound(n_sub_blocks, sub_block);
unsigned char *MALLOC_N(header_len + bound, compressed_buf);
if (compressed_buf == NULL) {
result = errno;
} else {
// compress and checksum the sub blocks
int compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block,
(char *) (lbuf->dbuf.buf + n_uncompressed_bytes_at_beginning),
(char *) (compressed_buf + header_len), 1, NULL);
// cppy the uncompressed header to the compressed buffer // write it out
memcpy(compressed_buf, lbuf->dbuf.buf, n_uncompressed_bytes_at_beginning); if (result == 0) {
dbout_lock(out);
int uncompressed_header_size = n_uncompressed_bytes_at_beginning + sizeof(n_sub_blocks); long long off_of_leaf = out->current_off;
result = write_literal(out, serialized_leaf, serialized_leaf_size);
// serialize the sub block header if (result == 0) {
memcpy(compressed_buf+n_uncompressed_bytes_at_beginning, &n_sub_blocks, 4); out->translation[lbuf->blocknum.b].off = off_of_leaf;
for (int i = 0; i < n_sub_blocks; i++) { out->translation[lbuf->blocknum.b].size = serialized_leaf_size;
memcpy(compressed_buf+uncompressed_header_size+12*i+0, &sub_block[i].compressed_size, 4); seek_align_locked(out);
memcpy(compressed_buf+uncompressed_header_size+12*i+4, &sub_block[i].uncompressed_size, 4);
memcpy(compressed_buf+uncompressed_header_size+12*i+8, &sub_block[i].xsum, 4);
}
// compute the header checksum and serialize it
u_int32_t header_xsum = x1764_memory(compressed_buf, header_len - sizeof (u_int32_t));
memcpy(compressed_buf + header_len - sizeof (u_int32_t), &header_xsum, 4);
dbout_lock(out);
long long off_of_leaf = out->current_off;
int size = header_len + compressed_len;
if (0) {
fprintf(stderr, "uncompressed buf size=%d (amount of data compressed)\n", uncompressed_len);
fprintf(stderr, "compressed buf size=%d, off=%lld\n", compressed_len, off_of_leaf);
fprintf(stderr, "compressed bytes are:");
//for (int i=0; i<compressed_len; i++) {
// unsigned char c = compressed_buf[28+i];
// if (isprint(c)) fprintf(stderr, "%c", c);
// else fprintf(stderr, "\\%03o", compressed_buf[28+i]);
//}
fprintf(stderr, "\ntotal bytes written = %d, last byte is \\%o\n", size, compressed_buf[size-1]);
}
result = write_literal(out, compressed_buf, size);
if (result == 0) {
//printf("translation[%lld].off = %lld\n", lbuf->blocknum, off_of_leaf);
out->translation[lbuf->blocknum].off = off_of_leaf;
out->translation[lbuf->blocknum].size = size;
seek_align_locked(out);
}
dbout_unlock(out);
} }
dbout_unlock(out);
toku_free(sub_block); // RFP cilk++ bug
toku_free(compressed_buf);
} }
dbuf_destroy(&lbuf->dbuf); // free the node
if (serialized_leaf)
toku_free(serialized_leaf);
toku_brtnode_free(&lbuf->node);
toku_free(lbuf); toku_free(lbuf);
//printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX); //printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX);
if (result == 0) { if (result == 0)
result = update_progress(progress_allocation, bl, "wrote node"); result = update_progress(progress_allocation, bl, "wrote node");
}
if (result) if (result)
brt_loader_set_panic(bl, result, TRUE); brt_loader_set_panic(bl, result, TRUE);
...@@ -3094,25 +2976,13 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu ...@@ -3094,25 +2976,13 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu
struct subtree_info *subtree_info, int height, const DESCRIPTOR UU(desc), uint32_t target_nodesize) struct subtree_info *subtree_info, int height, const DESCRIPTOR UU(desc), uint32_t target_nodesize)
{ {
//Nodes do not currently touch descriptors //Nodes do not currently touch descriptors
invariant(height>0); invariant(height > 0);
int result = 0; int result = 0;
BRTNODE XMALLOC(node); BRTNODE XMALLOC(node);
node->nodesize = target_nodesize; toku_initialize_empty_brtnode(node, make_blocknum(blocknum_of_new_node), height, n_children,
node->thisnodename = make_blocknum(blocknum_of_new_node); BRT_LAYOUT_VERSION, target_nodesize, 0);
node->layout_version = BRT_LAYOUT_VERSION;
node->layout_version_original = BRT_LAYOUT_VERSION;
node->build_id = BUILD_ID;
node->max_msn_applied_to_node_on_disk = MIN_MSN;
node->max_msn_applied_to_node_in_memory = MIN_MSN;
node->height=height;
node->n_children = n_children;
node->flags = 0;
XMALLOC_N(n_children-1, node->childkeys);
for (int i=0; i<n_children-1; i++)
node->childkeys[i] = NULL;
unsigned int totalchildkeylens = 0; unsigned int totalchildkeylens = 0;
for (int i=0; i<n_children-1; i++) { for (int i=0; i<n_children-1; i++) {
struct kv_pair *childkey = kv_pair_malloc(pivots[i].data, pivots[i].size, NULL, 0); struct kv_pair *childkey = kv_pair_malloc(pivots[i].data, pivots[i].size, NULL, 0);
...@@ -3124,18 +2994,11 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu ...@@ -3124,18 +2994,11 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu
totalchildkeylens += kv_pair_keylen(childkey); totalchildkeylens += kv_pair_keylen(childkey);
} }
node->totalchildkeylens = totalchildkeylens; node->totalchildkeylens = totalchildkeylens;
XMALLOC_N(n_children, node->bp);
for (int i=0; i<n_children; i++) { for (int i = 0; i < n_children; i++) {
node->bp[i].ptr = toku_xmalloc(sizeof(struct brtnode_nonleaf_childinfo)); BP_SUBTREE_EST(node, i) = subtree_info[i].subtree_estimates;
BP_BLOCKNUM(node,i)= make_blocknum(subtree_info[i].block); BP_BLOCKNUM(node, i) = make_blocknum(subtree_info[i].block);
BP_SUBTREE_EST(node,i) = subtree_info[i].subtree_estimates; BP_STATE(node, i) = PT_AVAIL;
BP_HAVE_FULLHASH(node,i) = FALSE;
BP_FULLHASH(node,i) = 0;
BP_STATE(node,i) = PT_AVAIL;
int r = toku_fifo_create(&BNC_BUFFER(node,i));
if (r != 0)
result = r;
BNC_NBYTESINBUF(node,i)= 0;
} }
if (result == 0) { if (result == 0) {
......
...@@ -195,25 +195,25 @@ int test_main (int argc, const char *argv[]) { ...@@ -195,25 +195,25 @@ int test_main (int argc, const char *argv[]) {
const char *progname=argv[0]; const char *progname=argv[0];
argc--; argv++; argc--; argv++;
while (argc>0) { while (argc>0) {
if (strcmp(argv[0],"-h")==0) { if (strcmp(argv[0], "-h") == 0 || strcmp(argv[0], "--help") == 0) {
return usage(progname); return usage(progname);
} else if (strcmp(argv[0],"-v")==0) { } else if (strcmp(argv[0], "-v") == 0 || strcmp(argv[0], "--verbose") == 0) {
verbose=1; verbose=1;
} else if (strcmp(argv[0],"-q")==0) { } else if (strcmp(argv[0], "-q") == 0) {
verbose=0; verbose=0;
} else if (strcmp(argv[0],"-r") == 0) { } else if (strcmp(argv[0], "-r") == 0) {
argc--; argv++; argc--; argv++;
nrows = atoi(argv[0]); nrows = atoi(argv[0]);
} else if (strcmp(argv[0],"-s") == 0) { } else if (strcmp(argv[0], "-s") == 0) {
toku_brtloader_set_size_factor(1); toku_brtloader_set_size_factor(1);
} else if (argc!=1) { } else if (argv[0][0] == '-' || argc != 1) {
return usage(progname); return usage(progname);
} else { } else {
break; break;
} }
argc--; argv++; argc--; argv++;
} }
assert(argc==1); // argv[1] is the directory in which to do the test. assert(argc == 1); // argv[1] is the directory in which to do the test.
const char* directory = argv[0]; const char* directory = argv[0];
char unlink_all[strlen(directory)+20]; char unlink_all[strlen(directory)+20];
snprintf(unlink_all, strlen(directory)+20, "rm -rf %s", directory); snprintf(unlink_all, strlen(directory)+20, "rm -rf %s", directory);
...@@ -234,9 +234,10 @@ int test_main (int argc, const char *argv[]) { ...@@ -234,9 +234,10 @@ int test_main (int argc, const char *argv[]) {
test_write_dbfile(template, nrows, output_name); test_write_dbfile(template, nrows, output_name);
#if 0
r = system(unlink_all); r = system(unlink_all);
CKERR(r); CKERR(r);
#endif
return 0; return 0;
} }
......
...@@ -20,8 +20,8 @@ static void ...@@ -20,8 +20,8 @@ static void
append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) {
assert(leafnode->height == 0); assert(leafnode->height == 0);
DBT thekey; toku_fill_dbt(&thekey, key, keylen); DBT thekey = { .data = key, .size = keylen };
DBT theval; toku_fill_dbt(&theval, val, vallen); DBT theval = { .data = val, .size = vallen };
// get an index that we can use to create a new leaf entry // get an index that we can use to create a new leaf entry
uint32_t idx = toku_omt_size(BLB_BUFFER(leafnode, 0)); uint32_t idx = toku_omt_size(BLB_BUFFER(leafnode, 0));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment