Commit 84afc27d authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Merge the tokudb.558 branch back to to the main branch with:

{{{
svn merge -r3272:3320 https://svn.tokutek.com/tokudb/tokudb.558
}}}
No conflicts.


git-svn-id: file:///svn/tokudb@3322 c7de825b-a66e-492c-adef-691d508d4ae1
parent 46f17b46
......@@ -58,6 +58,7 @@ REGRESSION_TESTS = \
brt-test2 \
brt-test3 \
brt-test4 \
brt-test5 \
cachetable-test \
cachetable-test2 \
fifo-test \
......@@ -75,6 +76,7 @@ REGRESSION_TESTS = \
test-gpma-blackbox \
test-gpma-glassbox \
test-gpma-glassbox \
test-gpma-leftmost-dup \
test-inc-split \
test-primes \
test_oexcl \
......@@ -177,7 +179,7 @@ check-fanout:
let BRT_FANOUT=BRT_FANOUT+1; \
done
log-test log-test2 log-test3 log-test4 log-test5 log-test6 benchmark-test brt-test brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 test-brt-delete-both brt-serialize-test brtdump test-inc-split test-del-inorder: LDFLAGS+=-lz
log-test log-test2 log-test3 log-test4 log-test5 log-test6 benchmark-test brt-test brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 brt-test5 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 test-brt-delete-both brt-serialize-test brtdump test-inc-split test-del-inorder: LDFLAGS+=-lz
# pma: PROF_FLAGS=-fprofile-arcs -ftest-coverage
BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h gpma.h brt.h brt-search.h brttypes.h yerror.h ybt.h log.h ../include/db.h kv-pair.h memory.h crc.h mempool.h leafentry.h
......@@ -193,18 +195,19 @@ pma.o: gpma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h log.h ../i
test-gpma-glassbox.o: test-gpma-glassbox.c gpma.h gpma-internal.h toku_assert.h memory.h
test-gpma-glassbox: test-gpma-glassbox.o toku_assert.o memory-debug.o gpma.o
test-gpma-blackbox: test-gpma-blackbox.o toku_assert.o memory.o gpma.o
test-gpma-blackbox.o: test-gpma-blackbox.c gpma.h memory.h toku_assert.h
test-gpma-worstinsert: test-gpma-worstinsert.o toku_assert.o memory.o gpma.o
test-gpma-worstinsert.o test-gpma-blackbox.o: gpma.h memory.h toku_assert.h
test-gpma-leftmost-dup: test-gpma-leftmost-dup.o toku_assert.o memory.o gpma.o
test-gpma-worstinsert.o test-gpma-blackbox.o test-gpma-leftmost-dup.o: gpma.h memory.h toku_assert.h
: gpma.h memory.h toku_assert.h
gpma.o: gpma.c gpma.h
ybt.o: ybt.h brttypes.h ../include/db.h
ybt-test: ybt-test.o ybt.o memory.o toku_assert.o
ybt-test.o: ybt.h ../include/db.h
cachetable.o: cachetable.h hashfun.h memory.h
brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 brt-test: ybt.o brt.o fifo.o gpma.o leafentry.o memory.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o
brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 brt-test5 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 brt-test: ybt.o brt.o fifo.o gpma.o leafentry.o memory.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o
log.o: log_header.h log-internal.h log.h wbuf.h crc.h brttypes.h $(BRT_INTERNAL_H_INCLUDES)
logformat: logformat.o toku_assert.o
brt-test0.o brt-test1.o brt-test2.o brt-test3.o brt-test4.o test-brt-overflow.h brt-test-named-db.o brt-test-cursor.o brt-test-cursor-2.o brt-test.o brt.o: brt.h brt-search.h ../include/db.h fifo.h gpma.h brttypes.h cachetable.h memory.h $(BRT_INTERNAL_H_INCLUDES)
brt-test0.o brt-test1.o brt-test2.o brt-test3.o brt-test4.o brt-test5.o test-brt-overflow.h brt-test-named-db.o brt-test-cursor.o brt-test-cursor-2.o brt-test.o brt.o: brt.h brt-search.h ../include/db.h fifo.h gpma.h brttypes.h cachetable.h memory.h $(BRT_INTERNAL_H_INCLUDES)
brt-serialize-test.o: $(BRT_INTERNAL_H_INCLUDES)
brt.o: $(BRT_INTERNAL_H_INCLUDES) key.h log_header.h
fifo.o: fifo.h brttypes.h
......
......@@ -160,8 +160,8 @@ extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt);
static const BRTNODE null_brtnode=0;
extern u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen);
extern u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp);
//extern u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen);
//extern u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp);
extern u_int32_t toku_calccrc32_cmd (u_int32_t type, TXNID xid, const void *key, u_int32_t keylen, const void *val, u_int32_t vallen);
extern u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd);
......@@ -193,25 +193,17 @@ int toku_testsetup_insert_to_nonleaf (BRT brt, DISKOFF diskoff, enum brt_cmd_typ
int toku_set_func_fsync (int (*fsync_function)(int));
/* allocate a kv pair from a kv memory pool */
//static inline struct kv_pair *kv_pair_malloc_mempool(const void *key, int keylen, const void *val, int vallen, struct mempool *mp) {
// struct kv_pair *kv = toku_mempool_malloc(mp, sizeof (struct kv_pair) + keylen + vallen, 4);
// if (kv)
// kv_pair_init(kv, key, keylen, val, vallen);
// return kv;
//}
static inline struct kv_pair *brtnode_malloc_kv_pair (GPMA pma, struct mempool *mp, const void *key, unsigned int keylen, const void *val, unsigned int vallen) {
struct kv_pair *kv = mempool_malloc_from_gpma(pma, mp, sizeof (struct kv_pair) + keylen + vallen);
kv_pair_init(kv, key, keylen, val, vallen);
return kv;
}
// used for the leaf compare fun
struct lc_pair {
// These two go together to do lookups in a brtnode using the keys in a command.
struct cmd_leafval_bessel_extra {
BRT t;
int compare_both; // compare_both is set if it is a DUPSORT database and both keys are needed (e.g, for DB_DELETE_ANY)
BRT_CMD cmd;
int compare_both_keys; // Set to 1 for DUPSORT databases that are not doing a DELETE_BOTH
};
int toku_brtleaf_compare_fun (u_int32_t alen __attribute__((__unused__)), void *aval, u_int32_t blen __attribute__((__unused__)), void *bval, void *lc /*this is (struct lc_pair *) cast to (void*). */) ;
int toku_cmd_leafval_bessel (u_int32_t dlen, void *leafentry, void *extra);
int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger);
int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp);
void *mempool_malloc_from_gpma(GPMA pma, struct mempool *mp, size_t size);
#endif
......@@ -62,9 +62,8 @@ static unsigned int toku_serialize_brtnode_size_slow(BRTNODE node) {
GPMA_ITERATE(node->u.l.buffer,
idx, vlen, vdata,
({
struct kv_pair *p=vdata;
assert(vlen==sizeof(*p)+kv_pair_keylen(p)+kv_pair_vallen(p));
hsize+=PMA_ITEM_OVERHEAD+KEY_VALUE_OVERHEAD+kv_pair_keylen(p)+kv_pair_vallen(p);
LEAFENTRY le=vdata;
hsize+= PMA_ITEM_OVERHEAD + leafentry_disksize(le);
}));
assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* the PMA size */
......@@ -97,7 +96,7 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) {
return result;
}
void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node) {
void toku_serialize_brtnode_to (int fd, DISKOFF off, DISKOFF size, BRTNODE node) {
//printf("%s:%d serializing\n", __FILE__, __LINE__);
struct wbuf w;
int i;
......@@ -105,7 +104,7 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node)
assert(calculated_size<=size);
//char buf[size];
char *MALLOC_N(size,buf);
toku_verify_counts(node);
//toku_verify_counts(node);
assert(size>0);
wbuf_init(&w, buf, size);
//printf("%s:%d serializing %lld w height=%d p0=%p\n", __FILE__, __LINE__, off, node->height, node->mdicts[0]);
......@@ -174,19 +173,14 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node)
assert(check_local_fingerprint==node->local_fingerprint);
}
} else {
//printf(" n_entries=%d\n", toku_pma_n_entries(node->u.l.buffer));
//printf("%s:%d writing node %lld n_entries=%d\n", __FILE__, __LINE__, node->thisnodename, toku_gpma_n_entries(node->u.l.buffer));
wbuf_uint(&w, toku_gpma_n_entries(node->u.l.buffer));
wbuf_uint(&w, toku_gpma_index_limit(node->u.l.buffer));
GPMA_ITERATE(node->u.l.buffer, idx, vlen, vdata,
({
struct kv_pair *p=vdata;
assert((char*)node->u.l.buffer_mempool.base<= (char*)p && (char*)p < (char*)node->u.l.buffer_mempool.base+node->u.l.buffer_mempool.size );
u_int32_t keylen=kv_pair_keylen(p);
u_int32_t datalen=kv_pair_vallen(p);
assert(vlen==sizeof(*p)+keylen+datalen);
//printf(" %s:%d idx=%d\n", __FILE__, __LINE__, idx);
wbuf_uint(&w, idx);
wbuf_bytes(&w, kv_pair_key(p), keylen);
wbuf_bytes(&w, kv_pair_val(p), datalen);
wbuf_LEAFENTRY(&w, vdata);
}));
}
assert(w.ndone<=w.size);
......@@ -343,7 +337,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, unsign
int diff;
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
toku_verify_counts(result);
//toku_verify_counts(result);
int type = rbuf_char(&rc);
TXNID xid = rbuf_ulonglong(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
......@@ -387,19 +381,24 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, unsign
}
u_int32_t actual_sum = 0;
//printf("%s:%d node %lld, reading %d items\n", __FILE__, __LINE__, off, n_in_buf);
for (i=0; i<n_in_buf; i++) {
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
LEAFENTRY tmp_le;
//printf("%s:%d reading %dth item\n", __FILE__, __LINE__, i);
int idx = rbuf_int(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen);
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
struct kv_pair *pair = brtnode_malloc_kv_pair(result->u.l.buffer, &result->u.l.buffer_mempool, key, keylen, val, vallen);
assert(pair);
int pairlen = kv_pair_size(pair);
toku_gpma_set_at_index(result->u.l.buffer, idx, pairlen, pair);
actual_sum += result->rand4fingerprint*toku_calccrc32_kvpair_struct(pair);
// printf("%s:%d rand4=%08x actual=%08x this=%08x expect=%08x\n", __FILE__, __LINE__, result->rand4fingerprint, actual_sum, toku_calccrc32_kvpair_struct(pair), result->local_fingerprint);
//printf("%s:%d idx=%d\n", __FILE__, __LINE__, idx);
u_int32_t memsize, disksize;
rbuf_LEAFENTRY(&rc, &memsize, &disksize, &tmp_le);
LEAFENTRY le = mempool_malloc_from_gpma(result->u.l.buffer, &result->u.l.buffer_mempool, memsize);
assert(le);
memcpy(le, tmp_le, memsize);
toku_free(tmp_le);
assert(disksize==leafentry_disksize(le));
result->u.l.n_bytes_in_buffer += disksize + PMA_ITEM_OVERHEAD;
//printf("idx=%d\n", idx);
toku_gpma_set_at_index(result->u.l.buffer, idx, memsize, le);
actual_sum += result->rand4fingerprint*toku_le_crc(le);
//printf("%s:%d rand4=%08x fp=%08x \n", __FILE__, __LINE__, result->rand4fingerprint, actual_sum);
}
if (r!=0) goto died_21;
......@@ -411,7 +410,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, unsign
//fprintf(stderr, "%s:%d Good checksum=%08x height=%d\n", __FILE__, __LINE__, actual_sum, result->height);
}
toku_verify_counts(result);
//toku_verify_counts(result);
}
{
unsigned int n_read_so_far = rc.ndone;
......@@ -430,7 +429,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, unsign
//printf("%s:%d Ok got %lld n_children=%d\n", __FILE__, __LINE__, result->thisnodename, result->n_children);
toku_free(rc.buf);
*brtnode = result;
toku_verify_counts(result);
//toku_verify_counts(result);
return 0;
}
......@@ -444,8 +443,8 @@ void toku_verify_counts (BRTNODE node) {
GPMA_ITERATE(node->u.l.buffer, idx, dlen, ddata,
({
count++;
sum+=(PMA_ITEM_OVERHEAD + dlen);
fp += toku_calccrc32_kvpair_struct(ddata);
sum+= PMA_ITEM_OVERHEAD + leafentry_disksize(ddata); // use the disk size, not the memory size.
fp += toku_le_crc(ddata);
}));
assert(count==toku_gpma_n_entries(node->u.l.buffer));
assert(sum==node->u.l.n_bytes_in_buffer);
......
......@@ -76,27 +76,38 @@ int toku_testsetup_insert_to_leaf (BRT brt, DISKOFF diskoff, char *key, int keyl
toku_verify_counts(node);
assert(node->height==0);
struct kv_pair *kv = brtnode_malloc_kv_pair(node->u.l.buffer, &node->u.l.buffer_mempool, key, keylen, val, vallen);
struct lc_pair lc = {brt, node->flags & TOKU_DB_DUPSORT};
u_int32_t lesize, disksize;
LEAFENTRY tmp_leafentry;
r = le_committed(keylen, key, vallen, val, &lesize, &disksize, &tmp_leafentry);
LEAFENTRY leafentry = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, lesize);
memcpy(leafentry, tmp_leafentry, lesize);
toku_free(tmp_leafentry);
u_int32_t storedlen;
void *storeddata;
u_int32_t idx;
r = toku_gpma_lookup_item(node->u.l.buffer, kv_pair_size(kv), kv, toku_brtleaf_compare_fun, &lc, &storedlen, &storeddata, &idx);
DBT keydbt,valdbt;
BRT_CMD_S cmd = {BRT_INSERT, 0, .u.id={toku_fill_dbt(&keydbt, key, keylen),
toku_fill_dbt(&valdbt, val, vallen)}};
struct cmd_leafval_bessel_extra be = {brt, &cmd, node->flags & TOKU_DB_DUPSORT};
r = toku_gpma_lookup_bessel(node->u.l.buffer, toku_cmd_leafval_bessel, 0, &be, &storedlen, &storeddata, &idx);
if (r==0) {
// It's already there. So now we have to remove it and put the new one back in.
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + storedlen;
node->local_fingerprint -= node->rand4fingerprint*toku_calccrc32_kvpair_struct(storeddata);
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(storeddata);
node->local_fingerprint -= node->rand4fingerprint*toku_le_crc(storeddata);
toku_mempool_mfree(&node->u.l.buffer_mempool, storeddata, storedlen);
// Now put the new kv in.
toku_gpma_set_at_index(node->u.l.buffer, idx, kv_pair_size(kv), kv);
toku_gpma_set_at_index(node->u.l.buffer, idx, lesize, leafentry);
} else {
r = toku_gpma_insert(node->u.l.buffer, kv_pair_size(kv), kv, toku_brtleaf_compare_fun, &lc, 0, 0, 0);
r = toku_gpma_insert_bessel(node->u.l.buffer, lesize, leafentry, toku_cmd_leafval_bessel, &be, 0, 0, 0);
assert(r==0);
}
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + kv_pair_size(kv);
node->local_fingerprint += node->rand4fingerprint*toku_calccrc32_kvpair_struct(kv);
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + disksize;
node->local_fingerprint += node->rand4fingerprint*toku_le_crc(leafentry);
node->dirty=1;
*subtree_fingerprint = node->local_fingerprint;
......
......@@ -21,53 +21,6 @@
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static void test5 (void) {
int r;
BRT t;
int limit=100000;
int *values;
int i;
CACHETABLE ct;
char fname[]="testbrt.brt";
toku_memory_check_all_free();
MALLOC_N(limit,values);
for (i=0; i<limit; i++) values[i]=-1;
unlink(fname);
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0);
r = toku_open_brt(fname, 0, 1, &t, 1<<12, ct, null_txn, toku_default_compare_fun, null_db); assert(r==0);
for (i=0; i<limit/2; i++) {
char key[100],val[100];
int rk = random()%limit;
int rv = random();
if (i%1000==0 && verbose) { printf("w"); fflush(stdout); }
values[rk] = rv;
snprintf(key, 100, "key%d", rk);
snprintf(val, 100, "val%d", rv);
DBT k,v;
toku_brt_insert(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), null_txn);
}
if (verbose) printf("\n");
for (i=0; i<limit/2; i++) {
int rk = random()%limit;
if (values[rk]>=0) {
char key[100], valexpected[100];
DBT k,v;
if (i%1000==0 && verbose) { printf("r"); fflush(stdout); }
snprintf(key, 100, "key%d", rk);
snprintf(valexpected, 100, "val%d", values[rk]);
r = toku_brt_lookup(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_init_dbt(&v));
assert(r==0);
assert(v.size==(1+strlen(valexpected)));
assert(memcmp(v.data,valexpected,v.size)==0);
}
}
if (verbose) printf("\n");
toku_free(values);
r = toku_close_brt(t); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_memory_check_all_free();
}
static void test_dump_empty_db (void) {
BRT t;
CACHETABLE ct;
......@@ -1518,8 +1471,6 @@ static void brt_blackbox_test (void) {
toku_memory_check_all_free();
test_multiple_dbs();
toku_memory_check_all_free();
if (verbose) printf("test5\n");
test5();
if (verbose) printf("test_multiple_files\n");
test_multiple_files();
......
......@@ -10,7 +10,7 @@
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static void test2 (int memcheck) {
static void test2 (int memcheck, int limit) {
BRT t;
int r;
int i;
......@@ -24,7 +24,7 @@ static void test2 (int memcheck) {
r = toku_open_brt(fname, 0, 1, &t, 1024, ct, null_txn, toku_default_compare_fun, null_db);
if (verbose) printf("%s:%d did setup\n", __FILE__, __LINE__);
assert(r==0);
for (i=0; i<4096; i++) {
for (i=0; i<limit; i++) { // 4096
DBT k,v;
char key[100],val[100];
snprintf(key,100,"hello%d",i);
......@@ -42,6 +42,7 @@ static void test2 (int memcheck) {
}
}
if (verbose) printf("%s:%d inserted\n", __FILE__, __LINE__);
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_memory_check_all_free();
......@@ -50,10 +51,12 @@ static void test2 (int memcheck) {
int main (int argc , const char *argv[]) {
default_parse_args(argc, argv);
if (verbose) printf("test2 checking memory\n");
// if (verbose) printf("test2 checking memory\n");
// test2(1);
if (verbose) printf("test2 faster\n");
test2(0);
test2(0, 2);
test2(0, 212);
test2(0, 4096);
toku_malloc_cleanup();
if (verbose) printf("test1 ok\n");
return 0;
......
......@@ -42,6 +42,7 @@ static void test3 (int nodesize, int count, int memcheck) {
snprintf(val,100,"there%d",i);
toku_brt_insert(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), null_txn);
}
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_memory_check_all_free();
......
......@@ -42,6 +42,7 @@ static void test4 (int nodesize, int count, int memcheck) {
snprintf(val,100,"there%d",i);
toku_brt_insert(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), null_txn);
}
r = toku_verify_brt(t); assert(r==0);
r = toku_close_brt(t); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_memory_check_all_free();
......@@ -53,6 +54,9 @@ static void test4 (int nodesize, int count, int memcheck) {
}
static void brt_blackbox_test (void) {
test4(2048, 1<<14, 1);
return;
if (verbose) printf("test4 slow\n");
test4(2048, 1<<15, 1);
......
......@@ -19,15 +19,6 @@
#include "toku_assert.h"
#include "kv-pair.h"
static void gpma_verify_fingerprint (GPMA pma, u_int32_t rand4fingerprint, u_int32_t fingerprint) {
u_int32_t actual_fingerprint=0;
GPMA_ITERATE(pma, idx, len, val,
actual_fingerprint+=rand4fingerprint*toku_calccrc32_kvpair_struct(val)
);
assert(actual_fingerprint==fingerprint);
}
static void verify_local_fingerprint (BRTNODE node) {
u_int32_t fp=0;
int i;
......@@ -39,8 +30,33 @@ static void verify_local_fingerprint (BRTNODE node) {
}));
assert(fp==node->local_fingerprint);
} else {
gpma_verify_fingerprint(node->u.l.buffer, node->rand4fingerprint, node->local_fingerprint);
toku_verify_counts(node);
}
}
static int compare_pairs (BRT brt, struct kv_pair *a, struct kv_pair *b) {
DBT x,y;
int cmp = brt->compare_fun(brt->db,
toku_fill_dbt(&x, kv_pair_key(a), kv_pair_keylen(a)),
toku_fill_dbt(&y, kv_pair_key(b), kv_pair_keylen(b)));
if (cmp==0 && (brt->flags & TOKU_DB_DUPSORT)) {
cmp = brt->dup_compare(brt->db,
toku_fill_dbt(&x, kv_pair_val(a), kv_pair_vallen(a)),
toku_fill_dbt(&y, kv_pair_val(b), kv_pair_vallen(b)));
}
return cmp;
}
static int compare_leafentries (BRT brt, LEAFENTRY a, LEAFENTRY b) {
DBT x,y;
int cmp = brt->compare_fun(brt->db,
toku_fill_dbt(&x, le_any_key(a), le_any_keylen(a)),
toku_fill_dbt(&y, le_any_key(b), le_any_keylen(b)));
if (cmp==0 && (brt->flags & TOKU_DB_DUPSORT)) {
cmp = brt->dup_compare(brt->db,
toku_fill_dbt(&x, le_any_val(a), le_any_vallen(a)),
toku_fill_dbt(&y, le_any_val(b), le_any_vallen(b)));
}
return cmp;
}
int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse) {
......@@ -56,7 +72,7 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b
verify_local_fingerprint(node);
if (node->height>0) {
int i;
for (i=0; i< node->u.n.n_children-1; i++) {
for (i=0; i< node->u.n.n_children; i++) {
bytevec thislorange,thishirange;
ITEMLEN thislolen, thishilen;
if (node->u.n.n_children==0 || i==0) {
......@@ -89,8 +105,14 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b
toku_fifo_iterate(BNC_BUFFER(node,i), verify_pair, 0);
}
}
//if (lorange) printf("%s:%d lorange=%s\n", __FILE__, __LINE__, (char*)lorange);
//if (hirange) printf("%s:%d lorange=%s\n", __FILE__, __LINE__, (char*)hirange);
for (i=0; i<node->u.n.n_children-2; i++) {
assert(compare_pairs(brt, node->u.n.childkeys[i], node->u.n.childkeys[i+1])<0);
}
for (i=0; i<node->u.n.n_children; i++) {
if (i>0) {
//printf(" %s:%d i=%d %p v=%s\n", __FILE__, __LINE__, i, node->u.n.childkeys[i-1], (char*)kv_pair_key(node->u.n.childkeys[i-1]));
if (lorange) assert(toku_keycompare(lorange,lolen, kv_pair_key(node->u.n.childkeys[i-1]), toku_brt_pivot_key_len(brt, node->u.n.childkeys[i-1]))<0);
if (hirange) assert(toku_keycompare(kv_pair_key(node->u.n.childkeys[i-1]), toku_brt_pivot_key_len(brt, node->u.n.childkeys[i-1]), hirange, hilen)<=0);
}
......@@ -103,6 +125,16 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b
recurse);
}
}
} else {
// Make sure that they are in increasing order.
void *prev=0;
GPMA_ITERATE(node->u.l.buffer, idx, dlen, data,
({
if (prev==0)
prev=data;
else
assert(compare_leafentries(brt, prev, data)<0);
}));
}
if ((r = toku_cachetable_unpin(brt->cf, off, 0, 0))) return r;
return result;
......
This diff is collapsed.
......@@ -86,8 +86,12 @@ void dump_node (int f, DISKOFF off, struct brt_header *h) {
switch ((enum brt_cmd_type)typ) {
case BRT_NONE: printf("NONE"); goto ok;
case BRT_INSERT: printf("INSERT"); goto ok;
case BRT_DELETE: printf("DELETE"); goto ok;
case BRT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
case BRT_DELETE_BOTH: printf("DELETE_BOTH"); goto ok;
case BRT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
case BRT_ABORT_BOTH: printf("ABORT_BOTH"); goto ok;
case BRT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
case BRT_COMMIT_BOTH: printf("COMMIT_BOTH"); goto ok;
}
printf("HUH?");
ok:
......@@ -106,10 +110,7 @@ void dump_node (int f, DISKOFF off, struct brt_header *h) {
printf(" items_in_buffer =%d\n", toku_gpma_n_entries(n->u.l.buffer));
GPMA_ITERATE(n->u.l.buffer, idx, len, data,
({
printf("%d: ", idx);
print_item(kv_pair_key(data), kv_pair_keylen(data));
printf(" ");
print_item(kv_pair_val(data), kv_pair_vallen(data));
print_leafentry(stdout, data);
printf("\n");
}));
}
......
......@@ -27,11 +27,6 @@ typedef struct {
char *data;
} BYTESTRING;
typedef struct {
int len;
DISKOFF *array;
} DISKOFFARRAY;
/* Make the LSN be a struct instead of an integer so that we get better type checking. */
typedef struct __toku_lsn { u_int64_t lsn; } LSN;
#define ZERO_LSN ((LSN){0})
......@@ -79,8 +74,12 @@ typedef struct cachefile *CACHEFILE;
enum brt_cmd_type {
BRT_NONE = 0,
BRT_INSERT = 1,
BRT_DELETE = 2,
BRT_DELETE_ANY = 2, // Delete any matching key. This used to be called BRT_DELETE.
BRT_DELETE_BOTH = 3,
BRT_ABORT_ANY = 4, // Abort any commands on any matching key.
BRT_ABORT_BOTH = 5, // Abort commands that match both the key and the value
BRT_COMMIT_ANY = 6,
BRT_COMMIT_BOTH = 7
};
/* tree commands */
......
......@@ -62,7 +62,12 @@ struct fileid {
struct cachefile {
CACHEFILE next;
int refcount; /* CACHEFILEs are shared. Use a refcount to decide when to really close it. */
u_int64_t refcount; /* CACHEFILEs are shared. Use a refcount to decide when to really close it.
* The reference count is one for every open DB.
* Plus one for every commit/rollback record. (It would be harder to keep a count for every open transaction,
* because then we'd have to figure out if the transaction was already counted. If we simply use a count for
* every record in the transaction, we'll be ok. Hence we use a 64-bit counter to make sure we don't run out.
*/
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
CACHETABLE cachetable;
struct fileid fileid;
......
......@@ -17,7 +17,8 @@ static inline u_int32_t toku_calc_more_crc32_kvpair (u_int32_t crc, const void *
return crc;
}
u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen) {
#if 0
u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen) {
return toku_calc_more_crc32_kvpair(toku_null_crc, key, keylen, val, vallen);
}
......@@ -25,6 +26,7 @@ u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp) {
return toku_calccrc32_kvpair(kv_pair_key_const(kvp), kv_pair_keylen(kvp),
kv_pair_val_const(kvp), kv_pair_vallen(kvp));
}
#endif
u_int32_t toku_calccrc32_cmd (u_int32_t type, TXNID xid, const void *key, u_int32_t keylen, const void *val, u_int32_t vallen) {
unsigned char type_c = type;
......@@ -38,10 +40,18 @@ u_int32_t toku_calccrc32_cmd (u_int32_t type, TXNID xid, const void *key, u_int3
}
u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd) {
if (cmd->type <= BRT_DELETE_BOTH)
switch (cmd->type) {
case BRT_INSERT:
case BRT_DELETE_ANY:
case BRT_DELETE_BOTH:
case BRT_COMMIT_ANY:
case BRT_COMMIT_BOTH:
case BRT_ABORT_ANY:
case BRT_ABORT_BOTH:
return toku_calccrc32_cmd (cmd->type, cmd->xid, cmd->u.id.key->data, cmd->u.id.key->size, cmd->u.id.val->data, cmd->u.id.val->size);
else {
assert(0); /* Should not have come here. */
case BRT_NONE:
return 0;
}
assert(0); /* Should not have come here. */
return 0;
}
......@@ -86,11 +86,14 @@ u_int32_t toku_gpma_index_limit(GPMA pma) {
}
// If direction==0 then find any match for which the bessel gives 0. *found is set to 1 iff something with 0. The return value is the place where the zero is (if found), or the place where it would go (if there's a value there, then that value goes after the zero.)
// If more than one value returns 0, return the left most such value.
// If direction>0 then find the first match for which bessel gives >0. *found is set to 1 iff something with >0. The return value is the index of the leftmost such value (if found). In the not-found case, all items are <=0 and the return value is pma->N.
// If direction<0 then find the last match for which bessel gives <0. *found is set to 1 iff something with <0. The return value is the index of the rightmost such value (if found). In the not-found case, all items are >=0 and the return value is 0.
u_int32_t toku_gpma_find_index_bes (GPMA pma, gpma_besselfun_t besf, int direction, void *extra, int *found) {
if (direction==0) {
int lo=0, hi=pma->N;
int foundone = 0;
u_int32_t foundidx = 0;
while (lo<hi) {
int mi = (lo+hi)/2;
int look = mi;
......@@ -102,8 +105,10 @@ u_int32_t toku_gpma_find_index_bes (GPMA pma, gpma_besselfun_t besf, int directi
int cmp = besf(pma->items[look].len, pma->items[look].data, extra);
if (cmp==0) {
/* We found a match. */
*found=1;
return look;
foundone = 1;
foundidx=look;
/* But keep looking to the left. */
hi=mi;
} else if (cmp>0) {
hi=mi;
} else {
......@@ -111,8 +116,9 @@ u_int32_t toku_gpma_find_index_bes (GPMA pma, gpma_besselfun_t besf, int directi
}
}
}
*found = 0;
return lo;
*found = foundone;
if (foundone) return foundidx;
else return lo;
} else if (direction<0) {
// Find the rightmost negative value.
......@@ -371,15 +377,12 @@ int toku_make_space_at (GPMA pma, u_int32_t idx, u_int32_t *newidx, gpma_renumbe
return toku_gpma_smooth_region (pma, lo, hi, count, idx, newidx, rcall, extra, pma->N);
}
int toku_gpma_insert(GPMA pma,
static int finish_insert (GPMA pma,
u_int32_t len, void*data,
gpma_compare_fun_t compare, void *extra_for_compare,
gpma_renumber_callback_t rcall, void*extra_for_rcall, // if anything gets renumbered, let the caller know
u_int32_t *idxp
u_int32_t idx,
u_int32_t *idxp // store idx into *idxp (but only do it when we succeed.)
) {
int found;
u_int32_t idx = toku_gpma_find_index(pma, len, data, compare, extra_for_compare, &found);
if (found) return DB_KEYEXIST;
assert(idx<=toku_gpma_index_limit(pma));
if (idx==toku_gpma_index_limit(pma) || pma->items[idx].data) {
u_int32_t newidx;
......@@ -395,6 +398,32 @@ int toku_gpma_insert(GPMA pma,
return 0;
}
int toku_gpma_insert(GPMA pma,
u_int32_t len, void*data,
gpma_compare_fun_t compare, void *extra_for_compare,
gpma_renumber_callback_t rcall, void*extra_for_rcall, // if anything gets renumbered, let the caller know
u_int32_t *idxp
) {
int found;
u_int32_t idx = toku_gpma_find_index(pma, len, data, compare, extra_for_compare, &found);
if (found) return DB_KEYEXIST;
return finish_insert(pma, len, data, rcall, extra_for_rcall, idx, idxp);
}
int toku_gpma_insert_bessel (GPMA pma,
u_int32_t len, void *data,
gpma_besselfun_t besf, void *extra_for_besself,
gpma_renumber_callback_t renumberf, void*extra_for_renumberf, // if anything gets renumbered, let the caller know
u_int32_t *indexp // Where did the item get stored?
) {
int found;
u_int32_t idx = toku_gpma_find_index_bes(pma, besf, 0, extra_for_besself, &found);
if (found) return DB_KEYEXIST;
return finish_insert(pma, len, data, renumberf, extra_for_renumberf, idx, indexp);
}
inline int toku_max_int (int a, int b) {
return a<b ? b : a;
}
......@@ -520,9 +549,13 @@ int toku_gpma_delete_bessel (GPMA pma,
// Now we know the range and how many items will be deleted.
for (i=minidx; i<=maxidx; i++) {
if (pma->items[i].data) {
if (deletef) {
r = deletef(i, pma->items[i].len, pma->items[i].data, extra_for_deletef);
pma->items[i].data = 0;
if (r!=0) return r;
} else {
pma->items[i].data = 0;
}
}
}
// Now we must find a region that is sufficiently densely packed and spread things out.
......@@ -566,10 +599,10 @@ int toku_gpma_lookup_item (GPMA pma,
int toku_gpma_lookup_bessel(GPMA pma, gpma_besselfun_t besf, int direction, void*extra, u_int32_t *resultlen, void **resultdata, u_int32_t *idxp) {
int found;
u_int32_t idx = toku_gpma_find_index_bes(pma, besf, direction, extra, &found);
if (idxp) *idxp=idx;
if (found) {
*resultlen =pma->items[idx].len;
*resultdata=pma->items[idx].data;
if (idxp) *idxp=idx;
return 0;
} else {
return DB_NOTFOUND;
......@@ -699,7 +732,7 @@ void toku_gpma_set_at_index (GPMA pma, u_int32_t idx, u_int32_t len, void *data)
void toku_gpma_clear_at_index (GPMA pma, u_int32_t idx) {
assert(idx<pma->N);
if (pma->items[idx].data==0) {
if (pma->items[idx].data) {
pma->n_items_present--;
}
pma->items[idx].data = 0;
......
......@@ -42,6 +42,15 @@ int toku_gpma_insert (GPMA,
gpma_renumber_callback_t renumberf, void*extra_for_renumberf, // if anything gets renumbered, let the caller know
u_int32_t *indexp // Where did the item get stored?
);
// Use a bessel function to determine where to insert the data.
// Puts the new value between the rightmost -1 and the leftmost +1.
// Requires: Nothing in the pma returns 0.
int toku_gpma_insert_bessel (GPMA pma,
u_int32_t len, void *data,
gpma_besselfun_t, void *extra_for_besself,
gpma_renumber_callback_t renumberf, void*extra_for_renumberf, // if anything gets renumbered, let the caller know
u_int32_t *indexp // Where did the item get stored?
);
// Delete anything for which the besselfun is zero. The besselfun must be monotonically increasing compared to the comparison function.
// That is, if two othings compare to be < then their besselfun's must yield <=, and if the compare to be = their besselfuns must be =, and if they are > then their besselfuns must be >=
......@@ -69,7 +78,8 @@ int toku_gpma_delete_item (GPMA,
int toku_gpma_lookup_item (GPMA, u_int32_t len, void *data, gpma_compare_fun_t compf, void*extra, u_int32_t *resultlen, void **resultdata, u_int32_t *idx);
// Lookup something according to the besselfun.
// If direction==0 then return something for which the besselfun is zero (or return DB_NOTFOUND).
// If direction==0 then return something for which the besselfun is zero (or return DB_NOTFOUND and set the idx to point at the spot where the item would go. That spot may already have an element in it, or it may be off the end.)
// If more than one value is zero, return the leftmost such value.
// If direction>0 then return the first thing for which the besselfun is positive (or return DB_NOTFOUND).
// If direction<0 then return the last thing for which the besselfun is negative (or return DB_NOTFOUND).
int toku_gpma_lookup_bessel (GPMA, gpma_besselfun_t, int direction, void*extra, u_int32_t *len, void **data, u_int32_t *idx);
......
This diff is collapsed.
......@@ -30,19 +30,104 @@
#include "mempool.h"
#include "brttypes.h"
#include "gpma.h"
#include "rbuf.h"
typedef struct leafentry *LEAFENTRY;
u_int32_t le_crc(LEAFENTRY v);
u_int32_t toku_le_crc(LEAFENTRY v);
int le_committed (ITEMLEN klen, bytevec kval, ITEMLEN dlen, bytevec dval, GPMA pma, struct mempool *mp, LEAFENTRY *result);
int le_both (ITEMLEN cklen, bytevec ckval, ITEMLEN cdlen, bytevec cdval, ITEMLEN pdlen, bytevec pdval,
struct mempool *mp, LEAFENTRY *result);
int le_provdel (ITEMLEN klen, bytevec kval, ITEMLEN dlen, bytevec dval, struct mempool *mp, LEAFENTRY *result);
int le_provpair (ITEMLEN klen, bytevec kval, ITEMLEN dlen, bytevec dval, struct mempool *mp, LEAFENTRY *result);
int le_committed (u_int32_t klen, void* kval, u_int32_t dlen, void* dval, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *result);
int le_both (TXNID xid, u_int32_t cklen, void* ckval, u_int32_t cdlen, void* cdval, u_int32_t pdlen, void* pdval,
u_int32_t *memsize, u_int32_t *disksize, LEAFENTRY *result);
int le_provdel (TXNID xid, u_int32_t klen, void* kval, u_int32_t dlen, void* dval,
u_int32_t *resultsize, u_int32_t *memsize, LEAFENTRY *result);
int le_provpair (TXNID xid, u_int32_t klen, void* kval, u_int32_t dlen, void* dval,
u_int32_t *memsize, u_int32_t *disksize, LEAFENTRY *result);
int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp);
void *mempool_malloc_from_gpma(GPMA pma, struct mempool *mp, size_t size);
enum le_state { LE_COMMITTED=1, // A committed pair.
LE_BOTH, // A committed pair and a provisional pair.
LE_PROVDEL, // A committed pair that has been provisionally deleted
LE_PROVPAIR }; // No committed value, but a provisional pair.
struct contents_committed;
struct contents_both;
struct contents_provdelorpair;
u_int32_t leafentry_memsize (LEAFENTRY);
enum le_state get_le_state(LEAFENTRY);
void *get_le_contents(LEAFENTRY);
enum typ_tag get_le_tag(LEAFENTRY);
u_int32_t committed_keylen (void*cev);
void* committed_key (void*cev);
u_int32_t committed_vallen (struct contents_committed *ce);
void* committed_val (struct contents_committed *ce);
TXNID both_xid (struct contents_both *ce);
u_int32_t both_keylen (struct contents_both *ce);
u_int32_t both_committed_vallen (struct contents_both *ce);
u_int32_t both_prov_vallen (struct contents_both *ce);
void* both_key (struct contents_both *ce);
void* both_committed_val (struct contents_both *ce);
void* both_prov_val (struct contents_both*ce);
TXNID provdelorpair_xid (struct contents_provdelorpair *ce);
u_int32_t provdelorpair_keylen (struct contents_provdelorpair *ce);
u_int32_t provdelorpair_vallen (struct contents_provdelorpair *ce);
void* provdelorpair_key (struct contents_provdelorpair *ce);
void* provdelorpair_val (struct contents_provdelorpair *ce);
#define LESWITCHCALL(le,funname, ...) ({ \
assert(get_le_tag(le)==TYP_LEAFENTRY); \
switch(get_le_state(le)) { \
case LE_COMMITTED: return funname ## _le_committed( committed_keylen((struct contents_committed*)(get_le_contents(le))), \
committed_key((struct contents_committed*)(get_le_contents(le))), \
committed_vallen((struct contents_committed*)(get_le_contents(le))), \
committed_val((struct contents_committed*)(get_le_contents(le))), \
## __VA_ARGS__); \
case LE_BOTH: return funname ## _le_both( both_xid((struct contents_both*)(get_le_contents(le))), \
both_keylen((struct contents_both*)(get_le_contents(le))), \
both_key((struct contents_both*)(get_le_contents(le))), \
both_committed_vallen((struct contents_both*)(get_le_contents(le))), \
both_committed_val((struct contents_both*)(get_le_contents(le))), \
both_prov_vallen((struct contents_both*)(get_le_contents(le))), \
both_prov_val((struct contents_both*)(get_le_contents(le))), \
## __VA_ARGS__); \
case LE_PROVDEL: return funname ## _le_provdel ( provdelorpair_xid((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_keylen((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_key((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_vallen((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_val((struct contents_provdelorpair*)(get_le_contents(le))), \
## __VA_ARGS__); \
case LE_PROVPAIR: return funname ## _le_provpair(provdelorpair_xid((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_keylen((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_key((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_vallen((struct contents_provdelorpair*)(get_le_contents(le))), \
provdelorpair_val((struct contents_provdelorpair*)(get_le_contents(le))), \
## __VA_ARGS__); \
} abort(); })
u_int32_t leafentry_memsize (LEAFENTRY le); // the size of a leafentry in memory.
u_int32_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LEAFENTRY. The size of a leafentry on disk.
u_int32_t toku_logsizeof_LEAFENTRY(LEAFENTRY le);
void wbuf_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
void rbuf_LEAFENTRY(struct rbuf *r, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *le);
int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, u_int32_t *crc, u_int32_t *len); // read a leafentry from a log
int toku_logprint_LEAFENTRY(FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *format); // read a leafentry from a log and then print it in human-readable form.
void toku_free_LEAFENTRY(LEAFENTRY le);
int print_leafentry (FILE *outf, LEAFENTRY v); // Print a leafentry out in human-readable form.
int le_is_provdel(LEAFENTRY le); // Return true if it is a provisional delete.
void* le_latest_key (LEAFENTRY le); // Return the latest key (return NULL for provisional deletes)
u_int32_t le_latest_keylen (LEAFENTRY le); // Return the latest keylen.
void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes)
u_int32_t le_latest_vallen (LEAFENTRY le); // Return the latest vallen. Returns 0 for provisional deletes.
// Return any key or value (even if it's only provisional)
void* le_any_key (LEAFENTRY le);
u_int32_t le_any_keylen (LEAFENTRY le);
void* le_any_val (LEAFENTRY le);
u_int32_t le_any_vallen (LEAFENTRY le);
#endif
......@@ -343,10 +343,19 @@ int toku_logger_finish (TOKULOGGER logger, struct logbytes *logbytes, struct wbu
}
int toku_logger_commit (TOKUTXN txn, int nosync) {
// printf("%s:%d committing\n", __FILE__, __LINE__);
// panic handled in log_commit
int r = toku_log_commit(txn->logger, (LSN*)0, (txn->parent==0) && !nosync, txn->txnid64); // exits holding neither of the tokulogger locks.
if (r!=0) goto free_and_return;
if (txn->parent!=0) {
if (r!=0) {
struct roll_entry *item;
broken:
while ((item=txn->newest_logentry)) {
txn->newest_logentry = item->prev;
rolltype_dispatch(item, toku_free_rolltype_);
toku_free(item);
}
r = 0;
} else if (txn->parent!=0) {
// Append the list to the front.
if (txn->oldest_logentry) {
// There are some entries, so link them in.
......@@ -357,18 +366,23 @@ int toku_logger_commit (TOKUTXN txn, int nosync) {
txn->parent->oldest_logentry = txn->oldest_logentry;
}
txn->newest_logentry = txn->oldest_logentry = 0;
}
free_and_return:
{
r = 0;
} else {
// do the commit calls and free everything
// we do the commit calls in reverse order too.
struct roll_entry *item;
//printf("%s:%d abort\n", __FILE__, __LINE__);
while ((item=txn->newest_logentry)) {
txn->newest_logentry = item->prev;
rolltype_dispatch_assign(item, toku_commit_, r, txn);
if (r!=0) goto broken;
rolltype_dispatch(item, toku_free_rolltype_);
toku_free(item);
}
r = 0;
}
list_remove(&txn->live_txns_link);
toku_free(txn);
}
return r;
}
......@@ -402,7 +416,7 @@ int toku_logger_log_fcreate (TOKUTXN txn, const char *fname, int mode) {
BYTESTRING bs = { .len=strlen(fname), .data = strdup(fname) };
int r = toku_log_fcreate (txn->logger, (LSN*)0, 0, toku_txn_get_txnid(txn), bs, mode);
if (r!=0) return r;
r = toku_logger_save_rollback_fcreate(txn, bs);
r = toku_logger_save_rollback_fcreate(txn, toku_txn_get_txnid(txn), bs);
return r;
}
......@@ -569,23 +583,30 @@ int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname, u_int
return 0;
}
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *format __attribute__((__unused__))) {
BYTESTRING bs;
int r = toku_fread_BYTESTRING(inf, &bs, crc, len);
if (r!=0) return r;
fprintf(outf, " %s={len=%d data=\"", fieldname, bs.len);
void toku_print_BYTESTRING (FILE *outf, u_int32_t len, char *data) {
fprintf(outf, "{len=%d data=\"", len);
u_int32_t i;
for (i=0; i<bs.len; i++) {
switch (bs.data[i]) {
for (i=0; i<len; i++) {
switch (data[i]) {
case '"': fprintf(outf, "\\\""); break;
case '\\': fprintf(outf, "\\\\"); break;
case '\n': fprintf(outf, "\\n"); break;
default:
if (isprint(bs.data[i])) fprintf(outf, "%c", bs.data[i]);
else fprintf(outf, "\\%03o", bs.data[i]);
if (isprint(data[i])) fprintf(outf, "%c", data[i]);
else fprintf(outf, "\\%03o", (unsigned char)(data[i]));
}
}
fprintf(outf, "\"}");
}
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *format __attribute__((__unused__))) {
BYTESTRING bs;
int r = toku_fread_BYTESTRING(inf, &bs, crc, len);
if (r!=0) return r;
fprintf(outf, " %s=", fieldname);
toku_print_BYTESTRING(outf, bs.len, bs.data);
toku_free(bs.data);
return 0;
}
......@@ -671,6 +692,7 @@ int toku_abort_logentry_commit (struct logtype_commit *le __attribute__((__unuse
}
int toku_logger_abort(TOKUTXN txn) {
//printf("%s:%d aborting\n", __FILE__, __LINE__);
// Must undo everything. Must undo it all in reverse order.
// Build the reverse list
struct roll_entry *item;
......@@ -770,7 +792,8 @@ int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) {
//printf("%s:%d file=%s firstlsn=%lld checkpoint_lsns={%lld %lld}\n", __FILE__, __LINE__, all_logs[i], (long long)earliest_lsn_seen.lsn, (long long)logger->checkpoint_lsns[0].lsn, (long long)logger->checkpoint_lsns[1].lsn);
if ((earliest_lsn_seen.lsn <= logger->checkpoint_lsns[0].lsn)&&
(earliest_lsn_seen.lsn <= logger->checkpoint_lsns[1].lsn)) {
(earliest_lsn_seen.lsn <= logger->checkpoint_lsns[1].lsn)&&
(earliest_lsn_seen.lsn <= oldest_live_txn_lsn.lsn)) {
break;
}
}
......
......@@ -70,6 +70,9 @@ int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname,
int toku_logprint_LOGGEDBRTHEADER (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *);
int toku_logprint_INTPAIRARRAY (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *);
// Useful thing for printing a bytestring.
void toku_print_BYTESTRING (FILE *outf, u_int32_t len, char *data);
int toku_read_and_print_logmagic (FILE *f, u_int32_t *version);
TXNID toku_txn_get_txnid (TOKUTXN);
......@@ -110,9 +113,6 @@ static inline int toku_copy_BYTESTRING(BYTESTRING *target, BYTESTRING val) {
static inline void toku_free_BYTESTRING(BYTESTRING val) {
toku_free(val.data);
}
static inline void toku_free_DISKOFFARRAY(DISKOFFARRAY val) {
toku_free(val.array);
}
static inline int toku_copy_LOGGEDBRTHEADER(LOGGEDBRTHEADER *target, LOGGEDBRTHEADER val) {
*target = val;
......
......@@ -39,23 +39,38 @@ struct logtype {
int logformat_version_number = 0;
const struct logtype rollbacks[] = {
{"fcreate", 'F', FA{{"BYTESTRING", "fname", 0},
{"fcreate", 'F', FA{{"TXNID", "xid", 0},
{"BYTESTRING", "fname", 0},
NULLFIELD}},
// {"fclose", 'c', FA{{"FILENUM", "filenum", 0},
// {"BYTESTRING", "fname", 0},
// NULLFIELD}},
{"deleteatleaf", 'd', FA{{"FILENUM", "filenum", 0}, // Note a delete for rollback. The delete takes place in a leaf.
{"cmdinsert", 'i', FA{{"TXNID", "xid", 0},
{"FILENUM", "filenum", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
NULLFIELD}},
{"insertatleaf", 'i', FA{{"FILENUM", "filenum", 0}, // Note an insert for rollback. The insert takes place in a leaf.
{"cmddeleteboth", 'D', FA{{"TXNID", "xid", 0},
{"FILENUM", "filenum", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
NULLFIELD}},
{"xactiontouchednonleaf", 'n', FA{{"FILENUM", "filenum", 0},
{"DISKOFFARRAY", "parents", 0},
{"DISKOFF", "diskoff", 0},
{"cmddelete", 'd', FA{{"TXNID", "xid", 0},
{"FILENUM", "filenum", 0},
{"BYTESTRING", "key", 0},
NULLFIELD}},
// {"fclose", 'c', FA{{"FILENUM", "filenum", 0},
// {"BYTESTRING", "fname", 0},
// NULLFIELD}},
// {"deleteatleaf", 'd', FA{{"FILENUM", "filenum", 0}, // Note a delete for rollback. The delete takes place in a leaf.
// {"BYTESTRING", "key", 0},
// {"BYTESTRING", "data", 0},
// NULLFIELD}},
// {"insertatleaf", 'i', FA{{"FILENUM", "filenum", 0}, // Note an insert for rollback. The insert takes place in a leaf.
// {"BYTESTRING", "key", 0},
// {"BYTESTRING", "data", 0},
// NULLFIELD}},
// {"xactiontouchednonleaf", 'n', FA{{"FILENUM", "filenum", 0},
// {"DISKOFFARRAY", "parents", 0},
// {"DISKOFF", "diskoff", 0},
// NULLFIELD}},
{0,0,FA{NULLFIELD}}
};
......@@ -152,12 +167,28 @@ const struct logtype logtypes[] = {
{"u_int32_t", "oldfingerprint", "%08x"},
{"u_int32_t", "newfingerprint", "%08x"},
NULLFIELD}},
{"insertinleaf", 'I', FA{{"TXNID", "txnid", 0},
{"FILENUM", "filenum", 0},
// {"insertinleaf", 'I', FA{{"TXNID", "txnid", 0},
// {"FILENUM", "filenum", 0},
// {"DISKOFF", "diskoff", 0},
// {"u_int32_t", "pmaidx", 0},
// {"BYTESTRING", "key", 0},
// {"BYTESTRING", "data", 0},
// NULLFIELD}},
// {"replaceleafentry", 'L', FA{{"FILENUM", "filenum", 0},
// {"DISKOFF", "diskoff", 0},
// {"u_int32_t", "pmaidx", 0},
// {"LEAFENTRY", "oldleafentry", 0},
// {"LEAFENTRY", "newleafentry", 0},
// NULLFIELD}},
{"insertleafentry", 'I', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
{"u_int32_t", "pmaidx", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
{"LEAFENTRY", "newleafentry", 0},
NULLFIELD}},
{"deleteleafentry", 'D', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
{"u_int32_t", "pmaidx", 0},
{"LEAFENTRY", "oldleafentry", 0},
NULLFIELD}},
{"deleteinleaf", 'd', FA{{"TXNID", "txnid", 0},
{"FILENUM", "filenum", 0},
......@@ -259,6 +290,9 @@ void generate_log_struct (void) {
fprintf(hf, "int toku_rollback_%s (", lt->name);
DO_FIELDS(ft, lt, fprintf(hf, "%s %s,", ft->type, ft->name));
fprintf(hf, "TOKUTXN txn);\n");
fprintf(hf, "int toku_commit_%s (", lt->name);
DO_FIELDS(ft, lt, fprintf(hf, "%s %s,", ft->type, ft->name));
fprintf(hf, "TOKUTXN txn);\n");
}));
fprintf(hf, "struct log_entry {\n");
fprintf(hf, " enum lt_cmd cmd;\n");
......
......@@ -10,12 +10,13 @@
/* Generally: errno is set to 0 or a value to indicate problems. */
enum typ_tag { TYP_BRTNODE = 0xdead0001,
enum typ_tag { TYP_BRTNODE = 3735879681, //0xdead0001,
TYP_CACHETABLE, TYP_PAIR, /* for cachetables */
TYP_PMA,
TYP_GPMA,
TYP_TOKULOGGER,
TYP_TOKUTXN
TYP_TOKUTXN,
TYP_LEAFENTRY
};
/* Everything should call toku_malloc() instead of malloc(), and toku_calloc() instead of calloc() */
......
......@@ -45,6 +45,7 @@ void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) {
assert(mp->free_offset <= mp->size);
void *vp;
size_t offset = (mp->free_offset + (alignment-1)) & ~(alignment-1);
//printf("mempool_malloc size=%ld base=%p free_offset=%ld mp->size=%ld offset=%ld\n", size, mp->base, mp->free_offset, mp->size, offset);
if (offset + size > mp->size) {
vp = 0;
} else {
......@@ -54,11 +55,14 @@ void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment) {
assert(mp->free_offset <= mp->size);
assert(((long)vp & (alignment-1)) == 0);
assert(vp == 0 || (mp->base <= vp && vp + size <= mp->base + mp->size));
//printf("mempool returning %p\n", vp);
return vp;
}
// if vp is null then we are freeing something, but not specifying what. The data won't be freed until compression is done.
void toku_mempool_mfree(struct mempool *mp, void *vp, int size) {
assert(size >= 0 && mp->base <= vp && vp + size <= mp->base + mp->size);
assert(size >= 0);
if (vp) assert(toku_mempool_inrange(mp, vp, size));
mp->frag_size += size;
assert(mp->frag_size <= mp->size);
}
......@@ -3,7 +3,7 @@
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include <assert.h>
#include "toku_assert.h"
struct rbuf {
unsigned char *buf;
......@@ -34,19 +34,19 @@ static inline void rbuf_literal_bytes (struct rbuf *r, bytevec *bytes, unsigned
}
/* Return a pointer into the middle of the buffer. */
static void rbuf_bytes (struct rbuf *r, bytevec *bytes, unsigned int *n_bytes)
static inline void rbuf_bytes (struct rbuf *r, bytevec *bytes, unsigned int *n_bytes)
{
*n_bytes = rbuf_int(r);
rbuf_literal_bytes(r, bytes, *n_bytes);
}
static unsigned long long rbuf_ulonglong (struct rbuf *r) {
static inline unsigned long long rbuf_ulonglong (struct rbuf *r) {
unsigned i0 = rbuf_int(r);
unsigned i1 = rbuf_int(r);
return ((unsigned long long)(i0)<<32) | ((unsigned long long)(i1));
}
static DISKOFF rbuf_diskoff (struct rbuf *r) {
static inline DISKOFF rbuf_diskoff (struct rbuf *r) {
unsigned i0 = rbuf_int(r);
unsigned i1 = rbuf_int(r);
return ((unsigned long long)(i0)<<32) | ((unsigned long long)(i1));
......
......@@ -21,7 +21,7 @@
#include <sys/stat.h>
#include <unistd.h>
#define DO_VERIFY_COUNTS
//#define DO_VERIFY_COUNTS
#ifdef DO_VERIFY_COUNTS
#define VERIFY_COUNTS(n) toku_verify_counts(n)
#else
......@@ -390,7 +390,7 @@ void toku_recover_fopen (LSN UU(lsn), TXNID UU(txnid), BYTESTRING fname, FILENUM
toku_free_BYTESTRING(fname);
}
void toku_recover_insertinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, BYTESTRING keybs, BYTESTRING databs) {
void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, LEAFENTRY newleafentry) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
......@@ -401,25 +401,84 @@ void toku_recover_insertinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKO
BRTNODE node = node_v;
assert(node->height==0);
VERIFY_COUNTS(node);
struct kv_pair *kvp = brtnode_malloc_kv_pair(node->u.l.buffer, &node->u.l.buffer_mempool, keybs.data, keybs.len, databs.data, databs.len);
assert(pair);
toku_gpma_set_at_index(node->u.l.buffer, pmaidx, kv_pair_size(kvp), kvp);
node->local_fingerprint += node->rand4fingerprint*toku_calccrc32_kvpair(keybs.data, keybs.len, databs.data, databs.len);
// printf("%s:%d local_fingerprint=%08x (this=%08x)\n", __FILE__, __LINE__, node->local_fingerprint, toku_calccrc32_kvpair(keybs.data, keybs.len, databs.data, databs.len));
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + keybs.len + databs.len;
// PMA_ITERATE_IDX(node->u.l.buffer, idx, skey, keylen __attribute__((__unused__)), sdata, datalen __attribute__((__unused__)),
// printf("%d: %s %s\n", idx, (char*)skey, (char*)sdata));
node->log_lsn = lsn;
{
int memsize = leafentry_memsize(newleafentry);
void *mem = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, memsize);
memcpy(mem, newleafentry, memsize);
toku_gpma_set_at_index(node->u.l.buffer, pmaidx, memsize, mem);
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + leafentry_disksize(newleafentry);
node->local_fingerprint += node->rand4fingerprint * toku_le_crc(newleafentry);
}
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0);
toku_free_LEAFENTRY(newleafentry);
}
void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, LEAFENTRY oldleafentry) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
void *node_v;
assert(pair->brt);
r = toku_cachetable_get_and_pin(pair->cf, diskoff, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
assert(r==0);
BRTNODE node = node_v;
assert(node->height==0);
VERIFY_COUNTS(node);
node->log_lsn = lsn;
{
u_int32_t len; void *data;
r=toku_gpma_get_from_index(node->u.l.buffer, pmaidx, &len, &data);
assert(r==0);
assert(len==leafentry_memsize(oldleafentry));
assert(memcmp(oldleafentry, data, len)==0);
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(data);
node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(data);
toku_mempool_mfree(&node->u.l.buffer_mempool, data, len);
toku_gpma_clear_at_index(node->u.l.buffer, pmaidx);
}
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0);
toku_free_BYTESTRING(keybs);
toku_free_BYTESTRING(databs);
toku_free_LEAFENTRY(oldleafentry);
}
//void toku_recover_replaceleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, LEAFENTRY oldleafentry, LEAFENTRY newleafentry) {
// struct cf_pair *pair = NULL;
// int r = find_cachefile(filenum, &pair);
// assert(r==0);
// void *node_v;
// assert(pair->brt);
// r = toku_cachetable_get_and_pin(pair->cf, diskoff, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
// assert(r==0);
// BRTNODE node = node_v;
// assert(node->height==0);
// VERIFY_COUNTS(node);
// node->log_lsn = lsn;
// {
// u_int32_t len; void *data;
// r=toku_gpma_get_from_index(node->u.l.buffer, pmaidx, &len, &data);
// assert(r==0);
// assert(len==leafentry_memsize(oldleafentry));
// assert(memcmp(oldleafentry, data, len)==0);
// node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(data);
// node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(data);
// toku_mempool_mfree(&node->u.l.buffer_mempool, data, len);
// }
// {
// int memsize = leafentry_memsize(newleafentry);
// void *mem = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, memsize);
// memcpy(mem, newleafentry, memsize);
// toku_gpma_set_at_index(node->u.l.buffer, pmaidx, memsize, mem);
// node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + leafentry_disksize(newleafentry);
// node->local_fingerprint += node->rand4fingerprint * toku_le_crc(newleafentry);
// }
// r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
// assert(r==0);
// toku_free_LEAFENTRY(oldleafentry);
// toku_free_LEAFENTRY(newleafentry);
//}
void toku_recover_deleteinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, BYTESTRING keybs, BYTESTRING databs) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
......@@ -440,7 +499,8 @@ void toku_recover_deleteinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKO
}
}
toku_gpma_clear_at_index(node->u.l.buffer, pmaidx);
node->local_fingerprint -= node->rand4fingerprint*toku_calccrc32_kvpair(keybs.data, keybs.len, databs.data, databs.len);
assert(!"kvpair");
//node->local_fingerprint -= node->rand4fingerprint*toku_calccrc32_kvpair(keybs.data, keybs.len, databs.data, databs.len);
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + keybs.len + databs.len;
VERIFY_COUNTS(node);
node->log_lsn = lsn;
......@@ -449,7 +509,6 @@ void toku_recover_deleteinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKO
toku_free_BYTESTRING(keybs);
toku_free_BYTESTRING(databs);
}
// a newbrtnode should have been done before this
void toku_recover_resizepma (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t oldsize __attribute__((__unused__)), u_int32_t newsize) {
struct cf_pair *pair = NULL;
......@@ -490,9 +549,9 @@ int move_indices (GPMA from, struct mempool *from_mempool,
struct gitem item = from->items[idx];
items[i]=item;
from->items[idx].data = 0;
fp += toku_calccrc32_kvpair_struct(item.data);
sizediff += PMA_ITEM_OVERHEAD + item.len;
assert(kv_pair_size(item.data)==item.len);
fp += toku_le_crc(item.data);
sizediff += PMA_ITEM_OVERHEAD + leafentry_disksize(item.data);
assert(leafentry_memsize(item.data)==item.len);
}
from->n_items_present -= fromto.size;
......@@ -512,7 +571,7 @@ int move_indices (GPMA from, struct mempool *from_mempool,
to->items[to_idx] = (struct gitem){items[i].len, new_data};
toku_mempool_mfree(from_mempool, items[i].data, items[i].len);
}
assert(kv_pair_size(to->items[to_idx].data)==to->items[to_idx].len);
assert(leafentry_memsize(to->items[to_idx].data)==to->items[to_idx].len);
}
to->n_items_present += fromto.size;
*a_fp -= a_rand * fp;
......@@ -520,8 +579,8 @@ int move_indices (GPMA from, struct mempool *from_mempool,
*a_nbytes -= sizediff;
*b_nbytes += sizediff;
toku_free(items);
toku_verify_gpma(from);
toku_verify_gpma(to);
//toku_verify_gpma(from);
//toku_verify_gpma(to);
return 0;
}
......
......@@ -12,7 +12,14 @@
#include "cachetable.h"
#include "key.h"
int toku_rollback_fcreate (BYTESTRING bs_fname,
int toku_commit_fcreate (TXNID xid __attribute__((__unused__)),
BYTESTRING bs_fname __attribute__((__unused__)),
TOKUTXN txn __attribute__((__unused__))) {
return 0;
}
int toku_rollback_fcreate (TXNID xid __attribute__((__unused__)),
BYTESTRING bs_fname,
TOKUTXN txn __attribute__((__unused__))) {
char *fname = fixup_fname(&bs_fname);
char *directory = txn->logger->directory;
......@@ -26,84 +33,62 @@ int toku_rollback_fcreate (BYTESTRING bs_fname,
return 0;
}
#if 0
int toku_rollback_fclose (FILENUM filenum, BYTESTRING bs_fname, TOKUTXN txn) {
abort();
filenum=filenum;
bs_fname=bs_fname;
txn=txn;
#if 0
char *fixedfname = fixup_fname(&bs_fname);
int fd = open(fixedfname, O_RDWR, 0);
assert(fd>=0);
BRT MALLOC(brt);
assert(errno==0 && brt!=0);
brt->database_name = fixedfname;
brt->h=0;
list_init(&brt->cursors);
brt->compare_fun = 0;
brt->dup_compare = 0;
brt->db = 0;
CACHETABLE cf;
int r = toku_cachetable_openfd(&cf, /*ct*/0, fd, brt);
int toku_commit_cmdinsert (TXNID xid, FILENUM filenum, BYTESTRING key,BYTESTRING data,TOKUTXN txn) {
CACHEFILE cf;
BRT brt;
//printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf, &brt);
assert(r==0);
brt->skey = brt->sval = 0;
brt->cf = cf;
toku_recover_note_cachefile(filenum, cf, brt);
printf("%s:%d Must remember to close the file again after txn %p finishes aborting\n", __FILE__, __LINE__, txn);
return 0;
#endif
DBT key_dbt,data_dbt;
BRT_CMD_S brtcmd = { BRT_COMMIT_BOTH, xid,
.u.id={toku_fill_dbt(&key_dbt, key.data, key.len),
toku_fill_dbt(&data_dbt, data.data, data.len)}};
return toku_brt_root_put_cmd(brt, &brtcmd, toku_txn_logger(txn));
}
#endif
//int toku_rollback_newbrtnode (struct logtype_newbrtnode *le, TOKUTXN txn) {
// // All that must be done is to put the node on the freelist.
// // Since we don't have a freelist right now, we don't have anything to do.
// // We'll fix this later (See #264)
// le=le;
// txn=txn;
// return 0;
//}
int toku_rollback_insertatleaf (FILENUM filenum, BYTESTRING key,BYTESTRING data, TOKUTXN txn) {
int toku_rollback_cmdinsert (TXNID xid, FILENUM filenum, BYTESTRING key,BYTESTRING data,TOKUTXN txn) {
CACHEFILE cf;
BRT brt;
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf, &brt);
assert(r==0);
//printf("%s:%d aborting insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
DBT key_dbt,data_dbt;
r = toku_brt_delete_both(brt,
toku_fill_dbt(&key_dbt, key.data, key.len),
toku_fill_dbt(&data_dbt, data.data, data.len),
0);
return r;
BRT_CMD_S brtcmd = { BRT_ABORT_BOTH, xid,
.u.id={toku_fill_dbt(&key_dbt, key.data, key.len),
toku_fill_dbt(&data_dbt, data.data, data.len)}};
return toku_brt_root_put_cmd(brt, &brtcmd, toku_txn_logger(txn));
}
int toku_commit_cmddeleteboth (TXNID xid, FILENUM filenum, BYTESTRING key,BYTESTRING data,TOKUTXN txn) {
return toku_commit_cmdinsert(xid, filenum, key, data, txn);
}
int toku_rollback_cmddeleteboth (TXNID xid, FILENUM filenum, BYTESTRING key,BYTESTRING data,TOKUTXN txn) {
return toku_rollback_cmdinsert(xid, filenum, key, data, txn);
}
int toku_rollback_deleteatleaf (FILENUM filenum, BYTESTRING key, BYTESTRING data,TOKUTXN txn) {
int toku_commit_cmddelete (TXNID xid, FILENUM filenum, BYTESTRING key,TOKUTXN txn) {
CACHEFILE cf;
BRT brt;
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf, &brt);
assert(r==0);
//printf("%s:%d aborting delete %s %s\n", __FILE__, __LINE__, key.data, data.data);
DBT key_dbt,data_dbt;
r = toku_brt_insert(brt,
toku_fill_dbt(&key_dbt, key.data, key.len),
toku_fill_dbt(&data_dbt, data.data, data.len),
0); // Do the insertion unconditionally
return r;
BRT_CMD_S brtcmd = { BRT_COMMIT_ANY, xid,
.u.id={toku_fill_dbt(&key_dbt, key.data, key.len),
toku_init_dbt(&data_dbt)}};
return toku_brt_root_put_cmd(brt, &brtcmd, toku_txn_logger(txn));
}
int toku_rollback_xactiontouchednonleaf(FILENUM filenum, DISKOFFARRAY array __attribute__((__unused__)), DISKOFF diskoff, TOKUTXN txn) {
int toku_rollback_cmddelete (TXNID xid, FILENUM filenum, BYTESTRING key,TOKUTXN txn) {
CACHEFILE cf;
BRT brt;
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf, &brt);
assert(r==0);
r = toku_brt_nonleaf_expunge_xaction(brt, diskoff, txn->txnid64);
assert(r==0);
//printf("%s:%d node=%lld has Rollback parents = {", __FILE__, __LINE__, (long long)diskoff);
//int i; for (i=0; i<array.len; i++) printf(" %lld", array.array[i]);
//printf("}\n");
if (array.len!=0) printf("%s:%d array.len!=0 and we didn't fix up the fingerprints.\n", __FILE__, __LINE__);
return 0;
//printf("%s:%d aborting delete %s %s\n", __FILE__, __LINE__, key.data, data.data);
DBT key_dbt,data_dbt;
BRT_CMD_S brtcmd = { BRT_ABORT_ANY, xid,
.u.id={toku_fill_dbt(&key_dbt, key.data, key.len),
toku_init_dbt(&data_dbt)}};
return toku_brt_root_put_cmd(brt, &brtcmd, toku_txn_logger(txn));
}
......@@ -44,7 +44,7 @@ void doit (void) {
assert(r==0);
u_int32_t fingerprint=0;
r = toku_testsetup_insert_to_nonleaf(t, nodeb, BRT_DELETE, "hello", 6, 0, 0, &fingerprint);
r = toku_testsetup_insert_to_nonleaf(t, nodeb, BRT_DELETE_ANY, "hello", 6, 0, 0, &fingerprint);
assert(r==0);
r = toku_testsetup_root(t, nodeb);
......
......@@ -51,10 +51,8 @@ TDB_CPPFLAGS = -I../../include
SRCS = $(sort $(wildcard *.c))
TDB_TESTS = $(patsubst %.c,%.tdb,$(SRCS))
BDB_DONTRUN = bug627
BDB_DONTRUN = bug627 test_abort1
BDB_TESTS = $(patsubst %.c,%.bdb,$(filter-out $(patsubst %,%.c,$(BDB_DONTRUN)),$(SRCS)))
foobdb:
echo $(BDB_TESTS)
ALL_TESTS = $(TDB_TESTS) $(BDB_TESTS)
......
......@@ -96,10 +96,12 @@ void test_db_put_aborts (void) {
key.size=4;
data.data="now";
data.size=4;
r=db->put(db, tid, &key, &data, 0);
r=db->put(db, tid2, &key, &data, 0);
CKERR(r);
}
//printf("%s:%d aborting\n", __FILE__, __LINE__);
r=tid->abort(tid); assert(r==0);
//printf("%s:%d committing\n", __FILE__, __LINE__);
r=tid2->commit(tid2,0); assert(r==0);
}
// The database should exist
......@@ -109,6 +111,7 @@ void test_db_put_aborts (void) {
assert(r==0);
}
// But the item should not be in it.
if (1)
{
DB_TXN *tid;
r=env->txn_begin(env, 0, &tid, 0); assert(r==0);
......@@ -122,6 +125,15 @@ void test_db_put_aborts (void) {
assert(r!=0);
assert(r==DB_NOTFOUND);
}
{
DBT key,data;
memset(&key, 0, sizeof(key));
memset(&data, 0, sizeof(data));
key.data="bye";
key.size=4;
r=db->get(db, tid, &key, &data, 0);
CKERR(r);
}
r=tid->commit(tid,0); assert(r==0);
}
......
......@@ -49,7 +49,7 @@ void do_test_abort2 (void) {
r=db->close(db, 0); CKERR(r);
r=env->close(env, 0); CKERR(r);
printf("%s:%d\n", __FILE__, __LINE__);
//printf("%s:%d\n", __FILE__, __LINE__);
// Now do a few inserts that abort.
r=db_env_create(&env, 0); assert(r==0);
......@@ -78,17 +78,17 @@ void do_test_abort2 (void) {
r=txn->abort(txn); CKERR(r);
printf("%s:%d\n", __FILE__, __LINE__);
//printf("%s:%d\n", __FILE__, __LINE__);
//r=db->close(db,0); CKERR(r); r=env->close(env, 0); CKERR(r); return;
// Don't do a lookup on "hello7", because that will force things out of the buffer.
r=db->close(db, 0); CKERR(r);
printf("%s:%d\n", __FILE__, __LINE__);
//printf("%s:%d\n", __FILE__, __LINE__);
r=db_create(&db, env, 0); CKERR(r);
r=env->txn_begin(env, 0, &txn, 0); assert(r==0);
r=db->open(db, txn, "foo.db", 0, DB_BTREE, 0, 0777); CKERR(r);
r=txn->abort(txn); CKERR(r);
printf("%s:%d\n", __FILE__, __LINE__);
r=txn->commit(txn, 0); CKERR(r);
//printf("%s:%d\n", __FILE__, __LINE__);
r=env->txn_begin(env, 0, &txn, 0); assert(r==0);
{
......@@ -96,7 +96,7 @@ void do_test_abort2 (void) {
memset(&data, 0, sizeof(data));
r = db->get(db, txn, dbt_init(&key, "hello7", strlen("hello7")+1), &data, 0);
CKERR(r);
printf("data is %s\n", (char*)data.data);
//printf("data is %s\n", (char*)data.data);
assert(((char*)data.data)[0]=='0');
}
r=txn->abort(txn); CKERR(r);
......
......@@ -106,7 +106,7 @@ void test_dup_delete(int n, int dup_mode) {
int k = htonl(n/2);
DBT key, val;
r = db->get(db, null_txn, dbt_init(&key, &k, sizeof k), dbt_init_malloc(&val), 0);
assert(r != 0);
assert(r == DB_NOTFOUND);
}
/* verify all dups are removed using a cursor */
......
......@@ -178,6 +178,7 @@ static void verify_items (DB_ENV *env, DB *db) {
snprintf(hello, sizeof(hello), "hello%d.%d", kv, dv);
snprintf(there, sizeof(hello), "there%d", dv);
k2.data = hello; k2.size=strlen(hello)+1;
printf("kv=%d dv=%d\n", kv, dv);
r=db->get(db, txn, &k2, &v2, 0);
assert(r==0);
assert(strcmp(v2.data, there)==0);
......
......@@ -2663,6 +2663,9 @@ char *db_strerror(int error) {
if (error==DB_BADFORMAT) {
return "Database Bad Format (probably a corrupted database)";
}
if (error==DB_NOTFOUND) {
return "Not found";
}
static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string.
errorstr = unknown_result;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment