Commit 95cd588f authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Merge in the final hash shortcuts.

{{{
svn merge -r4633:4638 https://svn.tokutek.com/tokudb/tokudb.906
}}}

Addresses #906.


git-svn-id: file:///svn/tokudb@4639 c7de825b-a66e-492c-adef-691d508d4ae1
parent 068a8be7
......@@ -107,6 +107,13 @@ enum {
BRT_PIVOT_FRONT_COMPRESS = 8,
};
struct remembered_hash {
BOOL valid; // set to FALSE if the fullhash is invalid
FILENUM fnum;
DISKOFF root;
u_int32_t fullhash; // fullhash is the hashed value of fnum and root.
};
struct brt_header {
int dirty;
u_int32_t fullhash;
......@@ -117,6 +124,7 @@ struct brt_header {
int n_named_roots; /* -1 if the only one is unnamed */
char **names; // an array of names. NULL if subdatabases are not allowed.
DISKOFF *roots; // an array of DISKOFFs. Element 0 holds the element if no subdatabases allowed.
struct remembered_hash *root_hashes; // an array of hashes of the root offsets.
unsigned int *flags_array; // an array of flags. Element 0 holds the element if no subdatabases allowed.
FIFO fifo; // all the abort and commit commands. If the header gets flushed to disk, we write the fifo contents beyond the unused_memory.
......@@ -180,7 +188,7 @@ extern cachetable_flush_func_t toku_brtnode_flush_callback, toku_brtheader_flush
extern cachetable_fetch_func_t toku_brtnode_fetch_callback, toku_brtheader_fetch_callback;
extern int toku_read_and_pin_brt_header (CACHEFILE cf, struct brt_header **header);
extern int toku_unpin_brt_header (BRT brt);
extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt);
extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt, u_int32_t *root_hash);
static const BRTNODE null_brtnode=0;
......
......@@ -595,10 +595,12 @@ static int deserialize_brtheader_6_or_earlier (int fd, DISKOFF off, struct brt_h
MALLOC_N(h->n_named_roots, h->flags_array);
for (i=0; i<h->n_named_roots; i++) h->flags_array[i]=flags_for_all;
MALLOC_N(h->n_named_roots, h->roots);
if (h->n_named_roots > 0 && h->roots == NULL) {ret = ENOMEM; goto died1;}
MALLOC_N(h->n_named_roots, h->root_hashes);
if (h->n_named_roots > 0 && (h->roots == NULL || h->root_hashes==NULL)) {ret = ENOMEM; goto died1;}
if (0) {
died2:
toku_free(h->roots);
toku_free(h->root_hashes);
goto died1;
}
MALLOC_N(h->n_named_roots, h->names);
......@@ -615,6 +617,7 @@ static int deserialize_brtheader_6_or_earlier (int fd, DISKOFF off, struct brt_h
for (i=0; i<h->n_named_roots; i++) {
bytevec nameptr;
unsigned int len;
h->root_hashes[i].valid = FALSE;
h->roots[i] = rbuf_diskoff(&rc);
rbuf_bytes(&rc, &nameptr, &len);
if (strlen(nameptr)+1!=len) {ret = EINVAL; goto died3;}
......@@ -625,9 +628,10 @@ static int deserialize_brtheader_6_or_earlier (int fd, DISKOFF off, struct brt_h
} else {
MALLOC_N(1, h->flags_array);
MALLOC_N(1, h->roots);
MALLOC_N(1, h->root_hashes);
h->flags_array[0]=flags_for_all;
h->roots[0] = rbuf_diskoff(&rc);
h->root_hashes[0].valid = FALSE;
h->names = 0;
}
if (rc.ndone!=rc.size) {ret = EINVAL; goto died3;}
......@@ -665,8 +669,10 @@ int deserialize_brtheader_7_or_later(u_int32_t size, int fd, DISKOFF off, struct
int n_to_malloc = (h->n_named_roots == 0) ? 1 : h->n_named_roots;
MALLOC_N(n_to_malloc, h->flags_array); if (h->flags_array==0) { ret=errno; if (0) { died2: free(h->flags_array); } goto died1; }
MALLOC_N(n_to_malloc, h->roots); if (h->roots==0) { ret=errno; if (0) { died3: if (h->n_named_roots>=0) free(h->roots); } goto died2; }
MALLOC_N(n_to_malloc, h->names); if (h->names==0) { ret=errno; if (0) { died4: if (h->n_named_roots>=0) free(h->names); } goto died3; }
MALLOC_N(n_to_malloc, h->root_hashes); if (h->root_hashes==0) { ret=errno; if (0) { died4: if (h->n_named_roots>=0) free(h->root_hashes); } goto died3; }
MALLOC_N(n_to_malloc, h->names); if (h->names==0) { ret=errno; if (0) { died5: if (h->n_named_roots>=0) free(h->names); } goto died4; }
for (i=0; i<h->n_named_roots; i++) {
h->root_hashes[i].valid = FALSE;
h->roots[i] = rbuf_diskoff(&rc);
h->flags_array[i] = rbuf_int(&rc);
bytevec nameptr;
......@@ -680,11 +686,13 @@ int deserialize_brtheader_7_or_later(u_int32_t size, int fd, DISKOFF off, struct
int n_to_malloc = 1;
MALLOC_N(n_to_malloc, h->flags_array); if (h->flags_array==0) { ret=errno; goto died1; }
MALLOC_N(n_to_malloc, h->roots); if (h->roots==0) { ret=errno; goto died2; }
MALLOC_N(n_to_malloc, h->root_hashes); if (h->root_hashes==0) { ret=errno; goto died3; }
h->names = 0;
h->roots[0] = rbuf_diskoff(&rc);
h->root_hashes[0].valid = FALSE;
h->flags_array[0] = rbuf_int(&rc);
}
if (rc.ndone!=rc.size) {ret = EINVAL; goto died4;}
if (rc.ndone!=rc.size) {ret = EINVAL; goto died5;}
toku_free(rc.buf);
*brth = h;
return 0;
......
......@@ -51,6 +51,7 @@ int toku_testsetup_root(BRT brt, DISKOFF diskoff) {
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h);
if (r!=0) return r;
brt->h->roots[0] = diskoff;
brt->h->root_hashes[0].valid = FALSE;
r = toku_unpin_brt_header(brt);
return r;
}
......
......@@ -151,7 +151,8 @@ int toku_verify_brt (BRT brt) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
rootp = toku_calculate_root_offset_pointer(brt);
u_int32_t root_hash;
rootp = toku_calculate_root_offset_pointer(brt, &root_hash);
if ((r=toku_verify_brtnode(brt, *rootp, 0, 0, 0, 0, 1))) goto died0;
if ((r = toku_unpin_brt_header(brt))!=0) return r;
return 0;
......
......@@ -206,6 +206,7 @@ void toku_brtheader_free (struct brt_header *h) {
}
toku_fifo_free(&h->fifo);
toku_free(h->roots);
toku_free(h->root_hashes);
toku_free(h->flags_array);
toku_free(h);
}
......@@ -2056,6 +2057,32 @@ int toku_brt_get_fd(BRT brt, int *fdp) {
return 0;
}
static void compute_and_fill_remembered_hash (BRT brt, int rootnum) {
struct remembered_hash *rh = &brt->h->root_hashes[rootnum];
assert(brt->cf); // if cf is null, we'll be hosed.
rh->valid = TRUE;
rh->fnum=toku_cachefile_filenum(brt->cf);
rh->root=brt->h->roots[rootnum];
rh->fullhash = toku_cachetable_hash(brt->cf, rh->root);
}
static u_int32_t get_roothash (BRT brt, int rootnum) {
struct remembered_hash *rh = &brt->h->root_hashes[rootnum];
CACHEKEY root = brt->h->roots[rootnum];
// compare cf first, since cf is NULL for invalid entries.
assert(rh);
//printf("v=%d\n", rh->valid);
if (rh->valid) {
//printf("f=%d\n", rh->fnum.fileid);
//printf("cf=%d\n", toku_cachefile_filenum(brt->cf).fileid);
if (rh->fnum.fileid == toku_cachefile_filenum(brt->cf).fileid)
if (rh->root == root)
return rh->fullhash;
}
compute_and_fill_remembered_hash(brt, rootnum);
return rh->fullhash;
}
int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char *dbname, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db) {
/* If dbname is NULL then we setup to hold a single tree. Otherwise we setup an array. */
......@@ -2144,11 +2171,15 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
t->h->n_named_roots = 1;
if ((MALLOC_N(1, t->h->names))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died4: if (dbname) toku_free(t->h->names); } goto died3; }
if ((MALLOC_N(1, t->h->roots))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died5: if (dbname) toku_free(t->h->roots); } goto died4; }
if ((t->h->names[0] = toku_strdup(dbname))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died6: if (dbname) toku_free(t->h->names[0]); } goto died5; }
if ((MALLOC_N(1, t->h->root_hashes))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died6: if (dbname) toku_free(t->h->root_hashes); } goto died5; }
if ((t->h->names[0] = toku_strdup(dbname))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died7: if (dbname) toku_free(t->h->names[0]); } goto died6; }
t->h->roots[0] = t->nodesize;
compute_and_fill_remembered_hash(t, 0);
} else {
MALLOC_N(1, t->h->roots); assert(t->h->roots);
MALLOC_N(1, t->h->root_hashes); assert(t->h->root_hashes);
t->h->roots[0] = t->nodesize;
compute_and_fill_remembered_hash(t, 0);
t->h->n_named_roots = -1;
t->h->names=0;
}
......@@ -2166,13 +2197,13 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
} else {
lh.u.one.root = t->h->roots[0];
}
if ((r=toku_log_fheader(toku_txn_logger(txn), (LSN*)0, 0, toku_txn_get_txnid(txn), toku_cachefile_filenum(t->cf), lh))) { goto died6; }
if ((r=toku_log_fheader(toku_txn_logger(txn), (LSN*)0, 0, toku_txn_get_txnid(txn), toku_cachefile_filenum(t->cf), lh))) { goto died7; }
}
if ((r=setup_initial_brt_root_node(t, t->nodesize, toku_txn_logger(txn)))!=0) { goto died6; }
if ((r=setup_initial_brt_root_node(t, t->nodesize, toku_txn_logger(txn)))!=0) { goto died7; }
//printf("%s:%d putting %p (%d)\n", __FILE__, __LINE__, t->h, 0);
u_int32_t fullhash = toku_cachefile_fullhash_of_header(t->cf);
t->h->fullhash = fullhash;
if ((r=toku_cachetable_put(t->cf, 0, fullhash, t->h, 0, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0))) { goto died6; }
if ((r=toku_cachetable_put(t->cf, 0, fullhash, t->h, 0, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0))) { goto died7; }
}
else if (r!=0) {
goto died_after_read_and_pin;
......@@ -2197,6 +2228,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
}
if ((t->h->names = toku_realloc(t->h->names, (1+t->h->n_named_roots)*sizeof(*t->h->names))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died_after_read_and_pin; }
if ((t->h->roots = toku_realloc(t->h->roots, (1+t->h->n_named_roots)*sizeof(*t->h->roots))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died_after_read_and_pin; }
if ((t->h->root_hashes = toku_realloc(t->h->root_hashes, (1+t->h->n_named_roots)*sizeof(*t->h->root_hashes))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died_after_read_and_pin; }
if ((t->h->flags_array = toku_realloc(t->h->flags_array, (1+t->h->n_named_roots)*sizeof(*t->h->flags_array))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died_after_read_and_pin; }
t->h->flags_array[t->h->n_named_roots] = t->flags;
t->h->n_named_roots++;
......@@ -2205,6 +2237,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
r = malloc_diskblock_header_is_in_memory(&t->h->roots[t->h->n_named_roots-1], t, t->h->nodesize, toku_txn_logger(txn));
if (r!=0) goto died_after_read_and_pin;
t->h->dirty = 1;
compute_and_fill_remembered_hash(t, t->h->n_named_roots-1);
if ((r=setup_initial_brt_root_node(t, t->h->roots[t->h->n_named_roots-1], toku_txn_logger(txn)))!=0) goto died_after_read_and_pin;
}
} else {
......@@ -2272,14 +2305,16 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) {
//TODO: Free Diskblocks including root
for (i = found + 1; i < brt->h->n_named_roots; i++) {
brt->h->names[i - 1] = brt->h->names[i];
brt->h->roots[i - 1] = brt->h->roots[i];
brt->h->names[i - 1] = brt->h->names[i];
brt->h->roots[i - 1] = brt->h->roots[i];
brt->h->root_hashes[i - 1] = brt->h->root_hashes[i];
}
brt->h->n_named_roots--;
brt->h->dirty = 1;
//TODO: What if n_named_roots becomes 0? Should we handle it specially? Should we delete the file?
// Q: What if n_named_roots becomes 0? A: Don't do anything. an empty list of named roots is OK.
if ((brt->h->names = toku_realloc(brt->h->names, (brt->h->n_named_roots)*sizeof(*brt->h->names))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto error; }
if ((brt->h->roots = toku_realloc(brt->h->roots, (brt->h->n_named_roots)*sizeof(*brt->h->roots))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto error; }
if ((brt->h->root_hashes = toku_realloc(brt->h->root_hashes, (brt->h->n_named_roots)*sizeof(*brt->h->root_hashes))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto error; }
error:
r2 = toku_unpin_brt_header(brt);
......@@ -2345,14 +2380,16 @@ int toku_close_brt (BRT brt, TOKULOGGER logger) {
int toku_brt_debug_mode = 0;//strcmp(key,"hello387")==0;
CACHEKEY* toku_calculate_root_offset_pointer (BRT brt) {
CACHEKEY* toku_calculate_root_offset_pointer (BRT brt, u_int32_t *roothash) {
if (brt->database_name==0) {
assert(brt->h->n_named_roots==-1);
*roothash = get_roothash(brt, 0);
return &brt->h->roots[0];
} else {
int i;
for (i=0; i<brt->h->n_named_roots; i++) {
if (strcmp(brt->database_name, brt->h->names[i])==0) {
*roothash = get_roothash(brt, i);
return &brt->h->roots[i];
}
}
......@@ -2485,8 +2522,9 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) {
}
brt->h->root_put_counter = global_root_put_counter++;
rootp = toku_calculate_root_offset_pointer(brt);
u_int32_t fullhash = toku_cachetable_hash(brt->cf, *rootp);
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
//assert(fullhash==toku_cachetable_hash(brt->cf, *rootp));
if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) {
goto died0;
......@@ -2642,7 +2680,8 @@ int toku_dump_brt (BRT brt) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
rootp = toku_calculate_root_offset_pointer(brt);
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
printf("split_count=%d\n", split_count);
if ((r = toku_dump_brtnode(brt, *rootp, 0, 0, 0, 0, 0))) goto died0;
if ((r = toku_unpin_brt_header(brt))!=0) return r;
......@@ -2911,10 +2950,13 @@ int toku_brt_search(BRT brt, brt_search_t *search, DBT *newkey, DBT *newval, TOK
*root_put_counter = brt->h->root_put_counter;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
void *node_v;
rr = toku_cachetable_get_and_pin(brt->cf, *rootp, toku_cachetable_hash(brt->cf, *rootp),
//assert(fullhash == toku_cachetable_hash(brt->cf, *rootp));
rr = toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash,
&node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt);
assert(rr == 0);
......@@ -3457,11 +3499,11 @@ int toku_brt_cursor_get (BRT_CURSOR cursor, DBT *key, DBT *val, int get_flags, T
return r;
}
static void toku_brt_keyrange_internal (BRT brt, CACHEKEY nodename, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater) {
static void toku_brt_keyrange_internal (BRT brt, CACHEKEY nodename, u_int32_t fullhash, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater) {
BRTNODE node;
{
void *node_v;
u_int32_t fullhash = toku_cachetable_hash(brt->cf, nodename);
//assert(fullhash == toku_cachetable_hash(brt->cf, nodename));
int rr = toku_cachetable_get_and_pin(brt->cf, nodename, fullhash,
&node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt);
assert(rr == 0);
......@@ -3493,7 +3535,7 @@ static void toku_brt_keyrange_internal (BRT brt, CACHEKEY nodename, DBT *key, u_
} else {
// nextcomp>=0 and prevcomp<=0, so something in the subtree could match
// but they are not both zero, so it's not the whole subtree, so we need to recurse
toku_brt_keyrange_internal(brt, BNC_DISKOFF(node, i), key, less, equal, greater);
toku_brt_keyrange_internal(brt, BNC_DISKOFF(node, i), compute_child_fullhash(brt->cf, node, i), key, less, equal, greater);
}
}
} else {
......@@ -3532,10 +3574,11 @@ int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u
int rr = toku_read_and_pin_brt_header(brt->cf, &brt->h);
assert(rr == 0);
}
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
*less = *equal = *greater = 0;
toku_brt_keyrange_internal (brt, *rootp, key, less, equal, greater);
toku_brt_keyrange_internal (brt, *rootp, fullhash, key, less, equal, greater);
{
int rr = toku_unpin_brt_header(brt);
assert(rr == 0);
......@@ -3563,9 +3606,11 @@ int toku_brt_height_of_root(BRT brt, int *height) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
void *node_v;
if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, toku_cachetable_hash(brt->cf, *rootp), &node_v, NULL,
//assert(fullhash == toku_cachetable_hash(brt->cf, *rootp));
if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) {
goto died0;
}
......
......@@ -142,8 +142,10 @@ static void toku_recover_fheader (LSN UU(lsn), TXNID UU(txnid),FILENUM filenum,L
r=toku_fifo_create(&h->fifo);
assert(r==0);
if ((signed)header.n_named_roots==-1) {
MALLOC_N(1, h->roots); assert(h->roots);
MALLOC_N(1, h->roots); assert(h->roots);
MALLOC_N(1, h->root_hashes); assert(h->root_hashes);
h->roots[0] = header.u.one.root;
h->root_hashes[0].valid= FALSE;
} else {
assert(0);
}
......@@ -656,6 +658,7 @@ void toku_recover_changeunnamedroot (LSN UU(lsn), FILENUM filenum, DISKOFF UU(ol
r = toku_read_and_pin_brt_header(pair->cf, &pair->brt->h);
assert(r==0);
pair->brt->h->roots[0] = newroot;
pair->brt->h->root_hashes[0].valid = FALSE;
r = toku_unpin_brt_header(pair->brt);
}
void toku_recover_changenamedroot (LSN UU(lsn), FILENUM UU(filenum), BYTESTRING UU(name), DISKOFF UU(oldroot), DISKOFF UU(newroot)) { assert(0); }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment