Commit c1931e7b authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Many things:

 Make txn->abort work without bombing out for {{{test_txn_abort5}}} one case. Fixes #431.
 Start writing {{{test_txn_abort6}}}.   Addresses #443.
 Try to write a test for #399 and #400, but haven't managed to reproduce the problem.  Addresses #399, #400.
 Write a test for #332.  Addresses #332.


git-svn-id: file:///svn/tokudb@2406 c7de825b-a66e-492c-adef-691d508d4ae1
parent 636bb009
...@@ -52,11 +52,14 @@ REGRESSION_TESTS = \ ...@@ -52,11 +52,14 @@ REGRESSION_TESTS = \
cachetable-test \ cachetable-test \
cachetable-test2 \ cachetable-test2 \
fifo-test \ fifo-test \
test-brt-delete-both \
brt-test \ brt-test \
test_oexcl \ test_oexcl \
test-assert \ test-assert \
test-primes \ test-primes \
list-test \ list-test \
test-inc-split \
test-del-inorder \
# This line intentially kept commented so I can have a \ on the end of the previous line # This line intentially kept commented so I can have a \ on the end of the previous line
BINS = $(REGRESSION_TESTS) \ BINS = $(REGRESSION_TESTS) \
...@@ -91,6 +94,7 @@ CHECKS = \ ...@@ -91,6 +94,7 @@ CHECKS = \
cachetable-test \ cachetable-test \
cachetable-test2 \ cachetable-test2 \
brt-serialize-test \ brt-serialize-test \
test-brt-delete-both \
brt-test \ brt-test \
fifo-test \ fifo-test \
test_toku_malloc_plain_free \ test_toku_malloc_plain_free \
...@@ -122,12 +126,15 @@ check-fanout: ...@@ -122,12 +126,15 @@ check-fanout:
let BRT_FANOUT=BRT_FANOUT+1; \ let BRT_FANOUT=BRT_FANOUT+1; \
done done
pma-test benchmark-test brt-test brt-serialize-test brtdump: LDFLAGS+=-lz pma-test benchmark-test brt-test test-brt-delete-both brt-serialize-test brtdump test-inc-split test-del-inorder: LDFLAGS+=-lz
# pma: PROF_FLAGS=-fprofile-arcs -ftest-coverage # pma: PROF_FLAGS=-fprofile-arcs -ftest-coverage
BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h pma.h brt.h brttypes.h yerror.h ybt.h log.h ../include/db.h kv-pair.h memory.h crc.h BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h pma.h brt.h brttypes.h yerror.h ybt.h log.h ../include/db.h kv-pair.h memory.h crc.h
key.o: brttypes.h key.h key.o: brttypes.h key.h
list-test: list-test.o toku_assert.o list-test: list-test.o toku_assert.o
test-brt-delete-both: ybt.o brt.o fifo.o pma.o memory.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o
test-inc-split: test-inc-split.o brt.o toku_assert.o key.o ybt.o cachetable.o memory.o log.o pma.o log_code.o fifo.o fingerprint.o brt-serialize.o brt-verify.o roll.o primes.o mempool.o
test-del-inorder: test-del-inorder.o brt.o toku_assert.o key.o ybt.o cachetable.o memory.o log.o pma.o log_code.o fifo.o fingerprint.o brt-serialize.o brt-verify.o roll.o primes.o mempool.o
pma-test.o: $(BRT_INTERNAL_H_INCLUDES) pma-internal.h pma.h list.h mempool.h pma-test.o: $(BRT_INTERNAL_H_INCLUDES) pma-internal.h pma.h list.h mempool.h
pma-test: pma.o memory.o key.o ybt.o log.o mempool.o fingerprint.o brt-serialize.o fifo.o primes.o toku_assert.o log_code.o roll.o brt.o cachetable.o brt-verify.o pma-test: pma.o memory.o key.o ybt.o log.o mempool.o fingerprint.o brt-serialize.o fifo.o primes.o toku_assert.o log_code.o roll.o brt.o cachetable.o brt-verify.o
pma.o: pma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h log.h ../include/db.h log_header.h pma.o: pma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h log.h ../include/db.h log_header.h
......
...@@ -29,12 +29,6 @@ enum { BUFFER_HEADER_SIZE = (4 // height// ...@@ -29,12 +29,6 @@ enum { BUFFER_HEADER_SIZE = (4 // height//
+ TREE_FANOUT * 8 // children + TREE_FANOUT * 8 // children
) }; ) };
struct brtnode_nonleaf_pivotinfo {
struct kv_pair *pivotkey; /* For DUPSORT keys, the keys are whole key-value pairs.
* For nonduplicate and DUPSORT keys we have
* Child 0's keys <= pivotkey[0] < Child 1's keys <= pivotkey[1] < ... pivotkey[N-1] < child N's keys <= pivotkey[N] ...
*/
};
struct brtnode_nonleaf_childinfo { struct brtnode_nonleaf_childinfo {
u_int32_t subtree_fingerprint; u_int32_t subtree_fingerprint;
DISKOFF diskoff; DISKOFF diskoff;
...@@ -203,4 +197,12 @@ struct brt_cursor { ...@@ -203,4 +197,12 @@ struct brt_cursor {
DBT val; DBT val;
}; };
/* Stuff for testing */
int toku_testsetup_leaf(BRT brt, DISKOFF *diskoff);
int toku_testsetup_nonleaf (BRT brt, int height, DISKOFF *diskoff, int n_children, DISKOFF *children, u_int32_t *subtree_fingerprints, char **keys, int *keylens);
int toku_testsetup_root(BRT brt, DISKOFF diskoff);
int toku_testsetup_get_sersize(BRT brt, DISKOFF diskoff); // Return the size on disk.
int toku_testsetup_insert_to_leaf (BRT brt, DISKOFF diskoff, char *key, int keylen, char *val, int vallen, u_int32_t *leaf_fingerprint);
int toku_testsetup_insert_to_nonleaf (BRT brt, DISKOFF diskoff, enum brt_cmd_type, char *key, int keylen, char *val, int vallen, u_int32_t *subtree_fingerprint);
#endif #endif
...@@ -502,6 +502,15 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT ...@@ -502,6 +502,15 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
DBT *v = cmd->u.id.val; DBT *v = cmd->u.id.val;
unsigned int newsize = toku_serialize_brtnode_size(child) + k->size + v->size + KEY_VALUE_OVERHEAD; unsigned int newsize = toku_serialize_brtnode_size(child) + k->size + v->size + KEY_VALUE_OVERHEAD;
newsize += (child->height > 0) ? BRT_CMD_OVERHEAD : PMA_ITEM_OVERHEAD; newsize += (child->height > 0) ? BRT_CMD_OVERHEAD : PMA_ITEM_OVERHEAD;
#if 0
// This stuff is wrong. And we don't have a test to differentiate this from the previous line of code.
unsigned int additionaloverhead = (child->height > 0) ? BRT_CMD_OVERHEAD : PMA_ITEM_OVERHEAD;
newsize += additionaloverhead; // PMA_ITEM_OVERHEAD; // Was this
printf("pbcdofiwpmeph newsize=%d\n", newsize);
if (newsize<=node->nodesize && newsize+additionaloverhead-PMA_ITEM_OVERHEAD>node->nodesize) {
printf("%s:%d\n", __FILE__, __LINE__);
}
#endif
int to_child = newsize <= child->nodesize; int to_child = newsize <= child->nodesize;
if (toku_brt_debug_mode) { if (toku_brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node); printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
...@@ -2339,3 +2348,145 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags, TOKUTXN txn) { ...@@ -2339,3 +2348,145 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags, TOKUTXN txn) {
return r; return r;
} }
int toku_brt_height_of_root(BRT brt, int *height) {
// for an open brt, return the current height.
int r;
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt);
void *node_v;
if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) {
goto died0;
}
BRTNODE node = node_v;
*height = node->height;
r = unpin_brtnode(brt, node); assert(r==0);
r = toku_unpin_brt_header(brt); assert(r==0);
return 0;
}
int toku_testsetup_leaf(BRT brt, DISKOFF *diskoff) {
BRTNODE node;
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h);
if (r!=0) return r;
create_new_brtnode(brt, &node, 0, (TOKULOGGER)0);
*diskoff = node->thisnodename;
r = unpin_brtnode(brt, node);
if (r!=0) return r;
r = toku_unpin_brt_header(brt);
if (r!=0) return r;
return 0;
}
// Don't bother to clean up carefully if something goes wrong. (E.g., it's OK to have malloced stuff that hasn't been freed.)
int toku_testsetup_nonleaf (BRT brt, int height, DISKOFF *diskoff, int n_children, DISKOFF *children, u_int32_t *subtree_fingerprints, char **keys, int *keylens) {
BRTNODE node;
assert(n_children<=BRT_FANOUT);
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h);
if (r!=0) return r;
create_new_brtnode(brt, &node, height, (TOKULOGGER)0);
node->u.n.n_children=n_children;
node->u.n.totalchildkeylens=0;
node->u.n.n_bytes_in_buffers=0;
int i;
for (i=0; i<n_children; i++) {
node->u.n.childinfos[i] = (struct brtnode_nonleaf_childinfo){ .subtree_fingerprint = subtree_fingerprints[i],
.diskoff = children[i],
.n_bytes_in_buffer = 0 };
r = toku_fifo_create(&BNC_BUFFER(node,i)); if (r!=0) return r;
}
for (i=0; i+1<n_children; i++) {
node->u.n.childkeys[i] = kv_pair_malloc(keys[i], keylens[i], 0, 0);
node->u.n.totalchildkeylens += keylens[i];
}
*diskoff = node->thisnodename;
r = unpin_brtnode(brt, node);
if (r!=0) return r;
r = toku_unpin_brt_header(brt);
if (r!=0) return r;
return 0;
}
int toku_testsetup_root(BRT brt, DISKOFF diskoff) {
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h);
if (r!=0) return r;
brt->h->unnamed_root = diskoff;
r = toku_unpin_brt_header(brt);
return r;
}
int toku_testsetup_get_sersize(BRT brt, DISKOFF diskoff) // Return the size on disk
{
void *node_v;
int r = toku_cachetable_get_and_pin(brt->cf, diskoff, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt);
assert(r==0);
int size = toku_serialize_brtnode_size(node_v);
r = unpin_brtnode(brt, node_v);
assert(r==0);
return size;
}
int toku_testsetup_insert_to_leaf (BRT brt, DISKOFF diskoff, char *key, int keylen, char *val, int vallen, u_int32_t *subtree_fingerprint) {
void *node_v;
int r;
r = toku_cachetable_get_and_pin(brt->cf, diskoff, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt);
if (r!=0) return r;
BRTNODE node=node_v;
assert(node->height==0);
DBT k,v;
int replaced_v_size;
enum pma_errors pma_status =
toku_pma_insert_or_replace(node->u.l.buffer,
toku_fill_dbt(&k, key, keylen),
toku_fill_dbt(&v, val, vallen),
&replaced_v_size,
(TOKULOGGER)0, (TXNID)0,
toku_cachefile_filenum(brt->cf),
node->thisnodename, node->rand4fingerprint,
&node->local_fingerprint,
&node->log_lsn);
assert(pma_status==BRT_OK);
if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v.size - replaced_v_size;
} else {
node->u.l.n_bytes_in_buffer += k.size + v.size + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
node->dirty=1;
*subtree_fingerprint = node->local_fingerprint;
r = unpin_brtnode(brt, node_v);
return r;
}
int toku_testsetup_insert_to_nonleaf (BRT brt, DISKOFF diskoff, enum brt_cmd_type cmdtype, char *key, int keylen, char *val, int vallen, u_int32_t *subtree_fingerprint) {
void *node_v;
int r;
r = toku_cachetable_get_and_pin(brt->cf, diskoff, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt);
if (r!=0) return r;
BRTNODE node=node_v;
assert(node->height>0);
DBT k,v;
int childnum = brtnode_right_child(node,
toku_fill_dbt(&k, key, keylen),
toku_fill_dbt(&v, val, vallen),
brt);
r = toku_fifo_enq(BNC_BUFFER(node, childnum), key, keylen, val, vallen, cmdtype, (TXNID)0);
assert(r==0);
u_int32_t fdelta = node->rand4fingerprint * toku_calccrc32_cmd(cmdtype, (TXNID)0, key, keylen, val, vallen);
node->local_fingerprint += fdelta;
*subtree_fingerprint += fdelta;
int sizediff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
node->u.n.n_bytes_in_buffers += sizediff;
BNC_NBYTESINBUF(node, childnum) += sizediff;
node->dirty = 1;
r = unpin_brtnode(brt, node_v);
return r;
}
...@@ -28,7 +28,7 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags); ...@@ -28,7 +28,7 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags);
int toku_brt_insert (BRT, DBT *, DBT *, TOKUTXN); int toku_brt_insert (BRT, DBT *, DBT *, TOKUTXN);
int toku_brt_lookup (BRT brt, DBT *k, DBT *v); int toku_brt_lookup (BRT brt, DBT *k, DBT *v);
int toku_brt_delete (BRT brt, DBT *k, TOKUTXN); int toku_brt_delete (BRT brt, DBT *k, TOKUTXN);
int toku_brt_delete_both (BRT brt, DBT *k, DBT *v, TOKUTXN); int toku_brt_delete_both (BRT brt, DBT *k, DBT *v, TOKUTXN); // Delete a pair only if both k and v are equal according to the comparison function.
int toku_close_brt (BRT); int toku_close_brt (BRT);
int toku_dump_brt (BRT brt); int toku_dump_brt (BRT brt);
void brt_fsync (BRT); /* fsync, but don't clear the caches. */ void brt_fsync (BRT); /* fsync, but don't clear the caches. */
...@@ -64,4 +64,6 @@ int toku_brt_dbt_set_value (BRT, DBT*, bytevec val, ITEMLEN vallen); ...@@ -64,4 +64,6 @@ int toku_brt_dbt_set_value (BRT, DBT*, bytevec val, ITEMLEN vallen);
int toku_brt_get_fd(BRT, int *); int toku_brt_get_fd(BRT, int *);
int toku_brt_height_of_root(BRT, int *height); // for an open brt, return the current height.
#endif #endif
...@@ -50,6 +50,7 @@ void dump_node (int f, DISKOFF off, struct brt_header *h) { ...@@ -50,6 +50,7 @@ void dump_node (int f, DISKOFF off, struct brt_header *h) {
assert(r==0); assert(r==0);
printf("brtnode\n"); printf("brtnode\n");
printf(" nodesize =%u\n", n->nodesize); printf(" nodesize =%u\n", n->nodesize);
printf(" sizeonddisk =%d\n", toku_serialize_brtnode_size(n));
printf(" flags =%u\n", n->flags); printf(" flags =%u\n", n->flags);
printf(" thisnodename=%lld\n", n->thisnodename); printf(" thisnodename=%lld\n", n->thisnodename);
printf(" disk_lsn =%" PRId64 "\n", n->disk_lsn.lsn); printf(" disk_lsn =%" PRId64 "\n", n->disk_lsn.lsn);
...@@ -97,6 +98,7 @@ void dump_node (int f, DISKOFF off, struct brt_header *h) { ...@@ -97,6 +98,7 @@ void dump_node (int f, DISKOFF off, struct brt_header *h) {
printf(" "); printf(" ");
print_item(data, datalen); print_item(data, datalen);
} }
printf("\n");
}) })
); );
} }
......
...@@ -608,7 +608,7 @@ int toku_abort_logentry_commit (struct logtype_commit *le __attribute__((__unuse ...@@ -608,7 +608,7 @@ int toku_abort_logentry_commit (struct logtype_commit *le __attribute__((__unuse
int toku_logger_abort(TOKUTXN txn) { int toku_logger_abort(TOKUTXN txn) {
// Must undo everything. Must undo it all in reverse order. // Must undo everything. Must undo it all in reverse order.
// Build the reverse list // Build the reverse list
struct log_entry *item=txn->oldest_logentry; struct log_entry *item;
while ((item=txn->newest_logentry)) { while ((item=txn->newest_logentry)) {
txn->newest_logentry = item->prev; txn->newest_logentry = item->prev;
int r; int r;
......
...@@ -129,13 +129,17 @@ const struct logtype logtypes[] = { ...@@ -129,13 +129,17 @@ const struct logtype logtypes[] = {
{"u_int32_t", "oldfingerprint", "%08x"}, {"u_int32_t", "oldfingerprint", "%08x"},
{"u_int32_t", "newfingerprint", "%08x"}, {"u_int32_t", "newfingerprint", "%08x"},
NULLFIELD}}, NULLFIELD}},
{"insertinleaf", 'I'+GEN_ROLLBACK, FA{{"TXNID", "txnid", 0}, {"insertinleaf", 'I', FA{{"TXNID", "txnid", 0},
{"FILENUM", "filenum", 0}, {"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0}, {"DISKOFF", "diskoff", 0},
{"u_int32_t", "pmaidx", 0}, {"u_int32_t", "pmaidx", 0},
{"BYTESTRING", "key", 0}, {"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0}, {"BYTESTRING", "data", 0},
NULLFIELD}}, NULLFIELD}},
{"tl_delete", 'K'GEN_ROLLBACK, FA{{"FILENUM", "filenum", 0}, // Note a delete for rollback.
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
NULLFIELD}},
{"deleteinleaf", 'd', FA{{"TXNID", "txnid", 0}, {"deleteinleaf", 'd', FA{{"TXNID", "txnid", 0},
{"FILENUM", "filenum", 0}, {"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0}, {"DISKOFF", "diskoff", 0},
...@@ -301,7 +305,6 @@ void generate_log_writer (void) { ...@@ -301,7 +305,6 @@ void generate_log_writer (void) {
fprintf(cf, " return r;\n"); fprintf(cf, " return r;\n");
fprintf(cf, "}\n\n"); fprintf(cf, "}\n\n");
})); }));
} }
void generate_log_reader (void) { void generate_log_reader (void) {
......
...@@ -725,11 +725,7 @@ int toku_pma_insert (PMA pma, DBT *k, DBT *v, TOKULOGGER logger, TXNID xid, FILE ...@@ -725,11 +725,7 @@ int toku_pma_insert (PMA pma, DBT *k, DBT *v, TOKULOGGER logger, TXNID xid, FILE
if (r!=0) goto freeit; if (r!=0) goto freeit;
if (0!=toku_txnid2txn(logger, xid, &txn)) goto freeit; if (0!=toku_txnid2txn(logger, xid, &txn)) goto freeit;
/* if no txn then, the txn is completed, so we don't bother with rollback. /* Don't save rollback info, instead we'll reinsert the command at the root, if the insert fails. */
* In particular, if the txn committed, we don't rollback.
* If the txn aborted, then we already inserted a delete command when we rolled it back.
*/
r = toku_logger_save_rollback_insertinleaf(txn, xid, pma->filenum, diskoff, idx, key, data);
if (0) { freeit: toku_free(key.data); toku_free(data.data); } if (0) { freeit: toku_free(key.data); toku_free(data.data); }
return r; return r;
} }
...@@ -776,7 +772,6 @@ static int pma_delete_nodup (PMA pma, DBT *k, DBT *v, u_int32_t rand4sem, u_int3 ...@@ -776,7 +772,6 @@ static int pma_delete_nodup (PMA pma, DBT *k, DBT *v, u_int32_t rand4sem, u_int3
} }
int toku_pma_delete (PMA pma, DBT *k, DBT *v, u_int32_t rand4sem, u_int32_t *fingerprint, u_int32_t *deleted_size) { int toku_pma_delete (PMA pma, DBT *k, DBT *v, u_int32_t rand4sem, u_int32_t *fingerprint, u_int32_t *deleted_size) {
v = v;
u_int32_t my_deleted_size; u_int32_t my_deleted_size;
if (!deleted_size) if (!deleted_size)
deleted_size = &my_deleted_size; deleted_size = &my_deleted_size;
...@@ -863,12 +858,26 @@ int toku_pma_insert_or_replace (PMA pma, DBT *k, DBT *v, ...@@ -863,12 +858,26 @@ int toku_pma_insert_or_replace (PMA pma, DBT *k, DBT *v,
struct kv_pair *kv = pma->pairs[idx]; struct kv_pair *kv = pma->pairs[idx];
*replaced_v_size = kv->vallen; *replaced_v_size = kv->vallen;
*fingerprint -= rand4fingerprint*toku_calccrc32_kvpair(kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv)); *fingerprint -= rand4fingerprint*toku_calccrc32_kvpair(kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
{
{ {
const BYTESTRING deletedkey = { kv->keylen, kv_pair_key(kv) }; const BYTESTRING deletedkey = { kv->keylen, kv_pair_key(kv) };
const BYTESTRING deleteddata = { kv->vallen, kv_pair_val(kv) }; const BYTESTRING deleteddata = { kv->vallen, kv_pair_val(kv) };
r=toku_log_deleteinleaf(logger, xid, pma->filenum, diskoff, idx, deletedkey, deleteddata); r=toku_log_deleteinleaf(logger, xid, pma->filenum, diskoff, idx, deletedkey, deleteddata);
if (r!=0) return r; if (r!=0) return r;
} }
if (logger) {
const BYTESTRING deletedkey = { kv->keylen, toku_memdup(kv_pair_key(kv), kv->keylen) };
const BYTESTRING deleteddata = { kv->vallen, toku_memdup(kv_pair_val(kv), kv->vallen) };
TOKUTXN txn;
if (0!=toku_txnid2txn(logger, xid, &txn)) return -1;
r=toku_logger_save_rollback_tl_delete(txn, pma->filenum, deletedkey, deleteddata);
if (r!=0) {
toku_free(deletedkey.data);
toku_free(deleteddata.data);
return r;
}
}
}
if (logger && node_lsn) *node_lsn = toku_logger_last_lsn(logger); if (logger && node_lsn) *node_lsn = toku_logger_last_lsn(logger);
if (v->size == (unsigned int) kv_pair_vallen(kv)) { if (v->size == (unsigned int) kv_pair_vallen(kv)) {
memcpy(kv_pair_val(kv), v->data, v->size); memcpy(kv_pair_val(kv), v->data, v->size);
...@@ -895,20 +904,13 @@ int toku_pma_insert_or_replace (PMA pma, DBT *k, DBT *v, ...@@ -895,20 +904,13 @@ int toku_pma_insert_or_replace (PMA pma, DBT *k, DBT *v,
//printf("%s:%d txn=%p\n", __FILE__, __LINE__, txn); //printf("%s:%d txn=%p\n", __FILE__, __LINE__, txn);
logit_and_update_fingerprint: logit_and_update_fingerprint:
{ {
const struct kv_pair *pair = pma->pairs[idx]; struct kv_pair *pair = pma->pairs[idx];
const BYTESTRING key = { pair->keylen, toku_memdup(kv_pair_key_const(pair), pair->keylen) }; BYTESTRING key = { pair->keylen, kv_pair_key(pair) };
const BYTESTRING data = { pair->vallen, toku_memdup(kv_pair_val_const(pair), pair->vallen) }; BYTESTRING data = { pair->vallen, kv_pair_val(pair) };
r = toku_log_insertinleaf (logger, xid, pma->filenum, diskoff, idx, key, data); r = toku_log_insertinleaf (logger, xid, pma->filenum, diskoff, idx, key, data);
if (logger && node_lsn) *node_lsn = toku_logger_last_lsn(logger); if (logger && node_lsn) *node_lsn = toku_logger_last_lsn(logger);
if (r!=0) goto freeit; if (r!=0) return r;
TOKUTXN txn; /* We don't record the insert here for rollback. The insert should have been logged at the top-level. */
if (0!=toku_txnid2txn(logger, xid, &txn)) goto freeit;
/* the txn is completed, so we don't bother with rollback.
* In particular, if the txn committed, we don't rollback.
* If the txn aborted, then we already inserted a delete command when we rolled it back.
*/
r = toku_logger_save_rollback_insertinleaf(txn, xid, pma->filenum, diskoff, idx, key, data);
if (0) { freeit: toku_free(key.data); toku_free(data.data); }
} }
*fingerprint += rand4fingerprint*toku_calccrc32_kvpair(k->data, k->size, v->data, v->size); *fingerprint += rand4fingerprint*toku_calccrc32_kvpair(k->data, k->size, v->data, v->size);
return r; return r;
......
...@@ -53,10 +53,14 @@ enum pma_errors toku_pma_insert (PMA, DBT*, DBT*, TOKULOGGER, TXNID, FILENUM, DI ...@@ -53,10 +53,14 @@ enum pma_errors toku_pma_insert (PMA, DBT*, DBT*, TOKULOGGER, TXNID, FILENUM, DI
/* This returns an error if the key is NOT present. */ /* This returns an error if the key is NOT present. */
int pma_replace (PMA, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen); int pma_replace (PMA, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen);
/* delete pairs from the pma /* Delete pairs from the pma.
if val is 0 then delete all pairs from the pma that match the key * If val is 0 then delete all pairs from the pma that match the key.
if val is not 0 then only delete the pair that matches both the key and the val */ * If val is not 0 then only delete the pair that matches both the key and the val.
* (This even works if there is no such pair (in which case DB_NOTFOUND is returned, and
* no changes are made.)
* The case where val!=0 should work for both DUP and NODUP dictionaries.
* For NODUP dictionaries, the value is deleted only if both the key and the value match.
*/
int toku_pma_delete (PMA, DBT */*key*/, DBT */*val*/, u_int32_t /*random for fingerprint*/, u_int32_t */*fingerprint*/, u_int32_t *deleted_size); int toku_pma_delete (PMA, DBT */*key*/, DBT */*val*/, u_int32_t /*random for fingerprint*/, u_int32_t */*fingerprint*/, u_int32_t *deleted_size);
int toku_pma_insert_or_replace (PMA /*pma*/, DBT */*k*/, DBT */*v*/, int toku_pma_insert_or_replace (PMA /*pma*/, DBT */*k*/, DBT */*v*/,
......
...@@ -442,23 +442,23 @@ void toku_recover_deleteinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKO ...@@ -442,23 +442,23 @@ void toku_recover_deleteinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKO
toku_free_BYTESTRING(databs); toku_free_BYTESTRING(databs);
} }
int toku_rollback_deleteinleaf (struct logtype_deleteinleaf *c, TOKUTXN txn) { void toku_recover_tl_delete (LSN lsn __attribute__((__unused__)), FILENUM filenum __attribute__((__unused__)), BYTESTRING key __attribute__((__unused__)), BYTESTRING data __attribute__((__unused__))) {
return; // tl_delete should not appear in the log.
}
int toku_rollback_tl_delete (FILENUM filenum,
BYTESTRING key,BYTESTRING data,TOKUTXN txn) {
CACHEFILE cf; CACHEFILE cf;
BRT brt; BRT brt;
void *node_v; int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf, &brt);
int r = toku_cachefile_of_filenum(txn->logger->ct, c->filenum, &cf, &brt);
assert(r==0); assert(r==0);
r = toku_cachetable_get_and_pin(cf, c->diskoff, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt); DBT key_dbt,data_dbt;
if (r!=0) return r; r = toku_brt_insert(brt,
BRTNODE node = node_v; toku_fill_dbt(&key_dbt, key.data, key.len),
DBT key,data; toku_fill_dbt(&data_dbt, data.data, data.len),
r = toku_pma_set_at_index(node->u.l.buffer, c->pmaidx, toku_fill_dbt(&key, c->key.data, c->key.len), toku_fill_dbt(&data, c->data.data, c->data.len)); txn);
if (r!=0) return r;
node->local_fingerprint += node->rand4fingerprint*toku_calccrc32_kvpair(c->key.data, c->key.len,c->data.data, c->data.len);
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + c->key.len + c->data.len;
VERIFY_COUNTS(node);
node->log_lsn = c->lsn;
r = toku_cachetable_unpin(cf, c->diskoff, 1, toku_serialize_brtnode_size(node));
return r; return r;
} }
......
/* The goal of this test. Make sure that inserts stay behind deletes. */
#include "brt.h"
#include "key.h"
#include "toku_assert.h"
#include "brt-internal.h"
#include <stdio.h>
#include <string.h>
#include <unistd.h>
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, PSIZE=20 };
CACHETABLE ct;
BRT t;
int fnamelen;
char *fname;
void doit (void) {
DISKOFF nodea,nodeb;
u_int32_t fingerprinta=0;
int r;
fnamelen = strlen(__FILE__) + 20;
fname = malloc(fnamelen);
assert(fname!=0);
snprintf(fname, fnamelen, "%s.brt", __FILE__);
r = toku_brt_create_cachetable(&ct, 16*1024, ZERO_LSN, NULL_LOGGER); assert(r==0);
unlink(fname);
r = toku_open_brt(fname, 0, 1, &t, NODESIZE, ct, null_txn, toku_default_compare_fun, null_db);
assert(r==0);
r = toku_testsetup_leaf(t, &nodea);
assert(r==0);
r = toku_testsetup_nonleaf(t, 1, &nodeb, 1, &nodea, &fingerprinta, 0, 0);
assert(r==0);
u_int32_t fingerprint=0;
r = toku_testsetup_insert_to_nonleaf(t, nodeb, BRT_DELETE, "hello", 6, 0, 0, &fingerprint);
assert(r==0);
r = toku_testsetup_root(t, nodeb);
assert(r==0);
DBT k,v;
r = toku_brt_insert(t,
toku_fill_dbt(&k, "hello", 6),
toku_fill_dbt(&v, "there", 6),
null_txn);
assert(r==0);
memset(&v, 0, sizeof(v));
r = toku_brt_lookup(t, &k, &v);
assert(r==0);
r = toku_close_brt(t); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
doit();
return 0;
}
/* The goal of this test: Make sure that when we aggressively promote
* that we don't get a fencepost error on the size. (#399, I think)
*
* For various values of I do the following:
*
* Make a tree of height 3 (that is, the root is of height 2)
* use small nodes (say 4KB)
* you have this tree:
* A
* B
* C0 C1 C2 .. C15
* A has only one child. B has as many children as it can get.
* Fill the C nodes (the leaves) all almost full.
* Fill B's buffer up with a big message X for C15, and a slightly smaller message Y for C1.
* Put into A's buffer a little message Z aimed at C0.
* Now when insert a message of size I aimed at C0. I and Z together are too big to fit in A.
* First: X will be pushed into C15, resulting in this split
* A
* B0
* C0 C1 ... C8
* B1
* C9 C10 ... C15 C16
* At this point C0 through C14 are full, Y is in B0's buffer, and A's buffer contains I and Z.
* So we try to push Z if it fits. Which it does.
* So then we try to I if it fits. If we calculated wrong, everything breaks now.
*
*/
#include "brt.h"
#include "key.h"
#include "toku_assert.h"
#include "brt-internal.h"
#include <stdio.h>
#include <string.h>
#include <unistd.h>
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, PSIZE=20 };
CACHETABLE ct;
BRT t;
int fnamelen;
char *fname;
void doit (int ksize) {
DISKOFF cnodes[BRT_FANOUT], bnode, anode;
u_int32_t fingerprints[BRT_FANOUT];
char *keys[BRT_FANOUT-1];
int keylens[BRT_FANOUT-1];
int i;
int r;
fnamelen = strlen(__FILE__) + 20;
fname = malloc(fnamelen);
assert(fname!=0);
snprintf(fname, fnamelen, "%s.brt", __FILE__);
r = toku_brt_create_cachetable(&ct, 16*1024, ZERO_LSN, NULL_LOGGER); assert(r==0);
unlink(fname);
r = toku_open_brt(fname, 0, 1, &t, NODESIZE, ct, null_txn, toku_default_compare_fun, null_db);
assert(r==0);
for (i=0; i<BRT_FANOUT; i++) {
r=toku_testsetup_leaf(t, &cnodes[i]);
assert(r==0);
fingerprints[i]=0;
char key[KSIZE+10];
int keylen = 1+snprintf(key, KSIZE, "%08d%0*d", i*10000+1, KSIZE-9, 0);
char val[1];
char vallen=0;
r=toku_testsetup_insert_to_leaf(t, cnodes[i], key, keylen, val, vallen, &fingerprints[i]);
assert(r==0);
}
// Now we have a bunch of leaves, all of which are with 100 bytes of full.
for (i=0; i+1<BRT_FANOUT; i++) {
char key[PSIZE];
keylens[i]=1+snprintf(key, PSIZE, "%08d", (i+1)*10000);
keys[i]=strdup(key);
}
r = toku_testsetup_nonleaf(t, 1, &bnode, BRT_FANOUT, cnodes, fingerprints, keys, keylens);
assert(r==0);
u_int32_t bfingerprint=0;
{
const int magic_size = (NODESIZE-toku_testsetup_get_sersize(t, bnode))/2-25;
printf("magic_size=%d\n", magic_size);
char key [KSIZE];
int keylen = 1+snprintf(key, KSIZE, "%08d%0*d", 150002, magic_size, 0);
char val[1];
char vallen=0;
r=toku_testsetup_insert_to_nonleaf(t, bnode, BRT_INSERT, key, keylen, val, vallen, &bfingerprint);
keylen = 1+snprintf(key, KSIZE, "%08d%0*d", 2, magic_size-1, 0);
r=toku_testsetup_insert_to_nonleaf(t, bnode, BRT_INSERT, key, keylen, val, vallen, &bfingerprint);
}
printf("%lld sersize=%d\n", bnode, toku_testsetup_get_sersize(t, bnode));
// Now we have an internal node which has full children and the buffers are nearly full
r = toku_testsetup_nonleaf(t, 2, &anode, 1, &bnode, &bfingerprint, 0, 0);
assert(r==0);
{
char key[20];
int keylen = 1+snprintf(key, 20, "%08d", 3);
char val[1];
char vallen=0;
r=toku_testsetup_insert_to_nonleaf(t, anode, BRT_INSERT, key, keylen, val, vallen, &bfingerprint);
}
if (0)
{
const int magic_size = 1; //NODESIZE-toku_testsetup_get_sersize(t, anode)-100;
DBT k,v;
char key[20];
char data[magic_size];
int keylen=1+snprintf(key, sizeof(key), "%08d", 4);
int vallen=magic_size;
snprintf(data, magic_size, "%*s", magic_size-1, " ");
r=toku_brt_insert(t,
toku_fill_dbt(&k, key, keylen),
toku_fill_dbt(&v, data, vallen),
null_txn);
}
r = toku_testsetup_root(t, anode);
assert(r==0);
r = toku_close_brt(t); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
printf("ksize=%d, unused\n", ksize);
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
int i;
doit(53); exit(0);
for (i=1; i<NODESIZE/2; i++) {
printf("extrasize=%d\n", i);
doit(i);
}
return 0;
}
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int verbose=0; int verbose=0;
void default_parse_args (int argc, const char *argv[]) { void default_parse_args (int argc, const char *argv[]) {
......
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <db.h>
#include "test.h"
#define N_TXNS 1
void test_txn_abort(int n, int which_guys_to_abort) {
if (verbose) printf("test_txn_abort:%d\n", n);
system("rm -rf " DIR);
mkdir(DIR, 0777);
int r;
DB_ENV *env;
r = db_env_create(&env, 0); assert(r == 0);
r = env->set_data_dir(env, DIR);
r = env->set_lg_dir(env, DIR);
r = env->open(env, 0, DB_INIT_MPOOL + DB_INIT_LOG + DB_INIT_LOCK + DB_INIT_TXN + DB_PRIVATE + DB_CREATE, 0777);
if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r));
assert(r == 0);
DB *db;
{
DB_TXN *txn;
r = env->txn_begin(env, 0, &txn, 0); assert(r == 0);
r = db_create(&db, env, 0); assert(r == 0);
r = db->open(db, txn, "test.db", 0, DB_BTREE, DB_CREATE, 0777); assert(r == 0);
r = txn->commit(txn, 0); assert(r == 0);
}
{
DB_TXN *txns[N_TXNS];
{
int j;
for (j=0; j<N_TXNS; j++) {
r = env->txn_begin(env, 0, &txns[j], 0); assert(r == 0);
}
}
{
int i;
for (i=0; i<n; i++) {
int j;
for (j=N_TXNS; j>0; j--) {
if (i%j==0) { // This is guaranteed to be true when j==1, so someone will do it.
DBT key, val;
r = db->put(db, txns[j], dbt_init(&key, &i, sizeof i), dbt_init(&val, &i, sizeof i), 0);
if (r != 0) printf("%s:%d:%d:%s\n", __FILE__, __LINE__, r, db_strerror(r));
assert(r == 0);
break;
}
}
}
}
{
int j;
for (j=0; j<N_TXNS; j++) {
if (which_guys_to_abort&(1<<j)) {
r = txns[j]->abort(txns[j]);
} else {
r = txns[j]->commit(txns[j], 0);
}
}
}
}
#if 0
assert(r == 0);
#else
if (r != 0) printf("%s:%d:abort:%d\n", __FILE__, __LINE__, r);
#endif
r = db->close(db, 0); assert(r == 0);
r = env->close(env, 0); assert(r == 0);
}
int main(int argc, char *argv[]) {
int i,j;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
if (strcmp(arg, "-v") == 0 || strcmp(arg, "--verbose") == 0) {
verbose++;
continue;
}
}
for (j=0; j<(1<<N_TXNS); j++)
for (i=1; i<100; i*=2)
test_txn_abort(i, j);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment