Commit 26fa2ecd authored by John Esmet's avatar John Esmet Committed by Yoni Fogel

[t:4372] marked off some possible errors, fixing comment on top of brt.c


git-svn-id: file:///svn/toku/tokudb@39202 c7de825b-a66e-492c-adef-691d508d4ae1
parent a3462592
...@@ -111,6 +111,16 @@ struct flusher_advice { ...@@ -111,6 +111,16 @@ struct flusher_advice {
void* extra; // parameter passed into callbacks void* extra; // parameter passed into callbacks
}; };
// FIXME all of these need the toku prefix
//
// how about:
//
// toku_brtnode_flush_some_child()
// toku_fa_flusher_advice_init()
// toku_fa_always_recursively_flush()
// toku_fa_dont_destroy_basement_nodes()
// toku_fa_default_merge_child()
// toku_fa_default_pick_child_after_split()
void void
flusher_advice_init( flusher_advice_init(
......
...@@ -1414,6 +1414,8 @@ brt_merge_child( ...@@ -1414,6 +1414,8 @@ brt_merge_child(
} }
} }
// FIXME started_at_root does not exist anymore. this is out of date.
//
// The parameter "started_at_root" is needed to resolve #4147 and #4160, // The parameter "started_at_root" is needed to resolve #4147 and #4160,
// which are subtle interactions of background flushing (cleaner and // which are subtle interactions of background flushing (cleaner and
// flusher threads) and MSN logic. // flusher threads) and MSN logic.
......
...@@ -434,6 +434,7 @@ struct brt { ...@@ -434,6 +434,7 @@ struct brt {
struct toku_list zombie_brt_link; struct toku_list zombie_brt_link;
}; };
// FIXME needs toku prefix
long brtnode_memory_size (BRTNODE node); long brtnode_memory_size (BRTNODE node);
PAIR_ATTR make_brtnode_pair_attr(BRTNODE node); PAIR_ATTR make_brtnode_pair_attr(BRTNODE node);
...@@ -474,11 +475,14 @@ void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTO ...@@ -474,11 +475,14 @@ void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTO
BASEMENTNODE toku_create_empty_bn(void); BASEMENTNODE toku_create_empty_bn(void);
BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer. BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer.
NONLEAF_CHILDINFO toku_create_empty_nl(void); NONLEAF_CHILDINFO toku_create_empty_nl(void);
// FIXME needs toku prefix
void destroy_basement_node (BASEMENTNODE bn); void destroy_basement_node (BASEMENTNODE bn);
// FIXME needs toku prefix
void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl); void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl);
void toku_destroy_brtnode_internals(BRTNODE node); void toku_destroy_brtnode_internals(BRTNODE node);
void toku_brtnode_free (BRTNODE *node); void toku_brtnode_free (BRTNODE *node);
void toku_assert_entire_node_in_memory(BRTNODE node); void toku_assert_entire_node_in_memory(BRTNODE node);
// FIXME needs toku prefix
void bring_node_fully_into_memory(BRTNODE node, struct brt_header* h); void bring_node_fully_into_memory(BRTNODE node, struct brt_header* h);
// append a child node to a parent node // append a child node to a parent node
...@@ -686,6 +690,7 @@ struct pivot_bounds { ...@@ -686,6 +690,7 @@ struct pivot_bounds {
struct kv_pair const * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages). struct kv_pair const * const upper_bound_inclusive; // NULL to indicate negative or positive infinity (which are in practice exclusive since there are now transfinite keys in messages).
}; };
// FIXME needs toku prefix
void maybe_apply_ancestors_messages_to_node (BRT t, BRTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds); void maybe_apply_ancestors_messages_to_node (BRT t, BRTNODE node, ANCESTORS ancestors, struct pivot_bounds const * const bounds);
int int
...@@ -793,7 +798,6 @@ int toku_db_badformat(void) __attribute__((__warn_unused_result__)); ...@@ -793,7 +798,6 @@ int toku_db_badformat(void) __attribute__((__warn_unused_result__));
int toku_brt_remove_on_commit(TOKUTXN child, DBT* iname_dbt_p) __attribute__((__warn_unused_result__)); int toku_brt_remove_on_commit(TOKUTXN child, DBT* iname_dbt_p) __attribute__((__warn_unused_result__));
int toku_brt_remove_now(CACHETABLE ct, DBT* iname_dbt_p) __attribute__((__warn_unused_result__)); int toku_brt_remove_now(CACHETABLE ct, DBT* iname_dbt_p) __attribute__((__warn_unused_result__));
typedef struct brt_upgrade_status { typedef struct brt_upgrade_status {
u_int64_t header_13; // how many headers were upgraded from version 13 u_int64_t header_13; // how many headers were upgraded from version 13
u_int64_t nonleaf_13; u_int64_t nonleaf_13;
...@@ -869,6 +873,7 @@ struct brt_status { ...@@ -869,6 +873,7 @@ struct brt_status {
void toku_brt_get_status(BRT_STATUS); void toku_brt_get_status(BRT_STATUS);
// FIXME needs toku prefix
void void
brt_leaf_apply_cmd_once ( brt_leaf_apply_cmd_once (
BRTNODE leafnode, BRTNODE leafnode,
...@@ -881,6 +886,7 @@ brt_leaf_apply_cmd_once ( ...@@ -881,6 +886,7 @@ brt_leaf_apply_cmd_once (
uint64_t *workdonep uint64_t *workdonep
); );
// FIXME needs toku prefix
void void
brt_leaf_put_cmd ( brt_leaf_put_cmd (
brt_compare_func compare_fun, brt_compare_func compare_fun,
...@@ -907,6 +913,7 @@ void toku_apply_cmd_to_leaf( ...@@ -907,6 +913,7 @@ void toku_apply_cmd_to_leaf(
OMT live_list_reverse OMT live_list_reverse
); );
// FIXME needs toku prefix
void brtnode_put_cmd ( void brtnode_put_cmd (
brt_compare_func compare_fun, brt_compare_func compare_fun,
brt_update_func update_fun, brt_update_func update_fun,
...@@ -918,18 +925,14 @@ void brtnode_put_cmd ( ...@@ -918,18 +925,14 @@ void brtnode_put_cmd (
OMT live_list_reverse OMT live_list_reverse
); );
void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created); void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created);
// Reset the root_xid_that_created field to the given value. // Reset the root_xid_that_created field to the given value.
// This redefines which xid created the dictionary. // This redefines which xid created the dictionary.
void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra); void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
void toku_brt_header_note_hot_begin(BRT brt); void toku_brt_header_note_hot_begin(BRT brt);
void toku_brt_header_note_hot_complete(BRT brt, BOOL success, MSN msn_at_start_of_hot); void toku_brt_header_note_hot_complete(BRT brt, BOOL success, MSN msn_at_start_of_hot);
C_END C_END
#endif #endif
...@@ -3,38 +3,71 @@ ...@@ -3,38 +3,71 @@
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved." #ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* /*
Managing the tree shape: How insertion, deletion, and querying work Managing the tree shape: How insertion, deletion, and querying work
When we insert a message into the BRT, here's what happens. When we insert a message into the BRT, here's what happens.
insert_a_message_at_root(msg): insert_a_message_at_root(msg) {
root = find the root node root = find the root node
if root needs to be split (has fissible reactivity): if root needs to be split {
split the root split the root
root = find the new root node root = find the new root node
insert_msg_into_node_buffer(root, msg) }
if root has too many messages in its buffer and is a nonleaf node: insert_msg_into_node_buffer(root, msg)
flush the buffer on a background thread if root has too many messages in its buffer and is a nonleaf node {
child = heaviest child of root
if that child is non reactive and pinnable {
target = child
buffer = child's buffer
} else {
target = root
buffer = null
}
post a flusher thread work item to the cachetable kibbutz = {
flush_nonleaf_node(node_to_flush, buffer)
}
}
}
flush_nonleaf_node(node, buffer) {
if we have a specific target node and non null buffer to flush {
flush_buffer_to_node(node, buffer)
if that node is now gorged and needs flushing {
flush_some_child(node, advice)
}
} else {
// the buffer is null, so the node given is not going to
// be the TARGET of a flush, but instead the source of one.
// we should find some child of the node and flush it.
flush_some_child(node, advice)
}
}
flush_some_child(parent, advice) {
child = advice->pick_child()
buffer = remove_child_buffer_from_parent(parent, child)
if the buffer is non null {
flush_buffer_to_node(child, buffer)
}
if child is stable and the advice says to recursively flush {
flush_some_child(child, advice)
} else if child needs to be split {
split the child
} else if the child _could_ be merged {
maybe_merge_child(child, parent)
}
}
We also have a background cleaner thread that traverses and flattens the tree:
cleaner_thread() {
}
flush_nonleaf_node(node, height, key):
To process a nonleaf node (height, key)
Note: Height is always > 0.
Note: This process occurs asynchrnously, but we get the YDB lock at the beginning.
Descend the tree following KEY until a node of HEIGHT is found.
While the node is too full then
pick the heaviest child
bring that child into memory (use nonblocking get_and_pin, which means that if we get a try-again, we go back up and restart the process_a_node job.
move all messages for that child from the node to the child.
If the child needs to be split or merged, then split or merge the child.
If the resulting child's (or children's) buffers are too full then create a work item for each such child to process the child. (This can only happen
for nonleaf children, since otherwise there are no buffers to be too full).
We also have a background thread that traverses the tree (relatively slowly) to flatten the tree.
Background_flattener:
It's state is a height and a key and a child number It's state is a height and a key and a child number
Repeat: Repeat:
sleep (say 1s) sleep (say 1s)
...@@ -53,7 +86,6 @@ Background_flattener: ...@@ -53,7 +86,6 @@ Background_flattener:
It may be important for the flattener not to run if there've been no message insertions since the last time it ran. It may be important for the flattener not to run if there've been no message insertions since the last time it ran.
The background flattener should also garbage collect MVCC versions. The flattener should remember the MVCC versions it has encountered The background flattener should also garbage collect MVCC versions. The flattener should remember the MVCC versions it has encountered
so that if any of those are no longer live, it can run again. so that if any of those are no longer live, it can run again.
To shrink a file: Let X be the size of the reachable data. To shrink a file: Let X be the size of the reachable data.
We define an acceptable bloat constant of C. For example we set C=2 if we are willing to allow the file to be as much as 2X in size. We define an acceptable bloat constant of C. For example we set C=2 if we are willing to allow the file to be as much as 2X in size.
......
...@@ -307,6 +307,7 @@ struct cachefile { ...@@ -307,6 +307,7 @@ struct cachefile {
// when things finish. // when things finish.
}; };
// FIXME global with no toku prefix
void add_background_job(CACHEFILE cf, bool already_locked) void add_background_job(CACHEFILE cf, bool already_locked)
{ {
if (!already_locked) { if (!already_locked) {
...@@ -318,6 +319,7 @@ void add_background_job(CACHEFILE cf, bool already_locked) ...@@ -318,6 +319,7 @@ void add_background_job(CACHEFILE cf, bool already_locked)
} }
} }
// FIXME global with no toku prefix
void remove_background_job(CACHEFILE cf, bool already_locked) void remove_background_job(CACHEFILE cf, bool already_locked)
{ {
if (!already_locked) { if (!already_locked) {
...@@ -331,12 +333,14 @@ void remove_background_job(CACHEFILE cf, bool already_locked) ...@@ -331,12 +333,14 @@ void remove_background_job(CACHEFILE cf, bool already_locked)
} }
} }
// FIXME global with no toku prefix
void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra) void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra)
// The function f must call remove_background_job when it completes // The function f must call remove_background_job when it completes
{ {
add_background_job(cf, false); add_background_job(cf, false);
toku_kibbutz_enq(cf->cachetable->kibbutz, f, extra); toku_kibbutz_enq(cf->cachetable->kibbutz, f, extra);
} }
static void wait_on_background_jobs_to_finish (CACHEFILE cf) { static void wait_on_background_jobs_to_finish (CACHEFILE cf) {
cachetable_lock(cf->cachetable); cachetable_lock(cf->cachetable);
while (cf->n_background_jobs>0) { while (cf->n_background_jobs>0) {
...@@ -3866,6 +3870,7 @@ cleaner_thread_rate_pair(PAIR p) ...@@ -3866,6 +3870,7 @@ cleaner_thread_rate_pair(PAIR p)
static int const CLEANER_N_TO_CHECK = 8; static int const CLEANER_N_TO_CHECK = 8;
// FIXME this is global but no one uses it except cachetable.c
int int
toku_cleaner_thread (void *cachetable_v) toku_cleaner_thread (void *cachetable_v)
// Effect: runs a cleaner. // Effect: runs a cleaner.
......
...@@ -5891,6 +5891,7 @@ locked_db_stat64 (DB *db, DB_TXN *txn, DB_BTREE_STAT64 *s) { ...@@ -5891,6 +5891,7 @@ locked_db_stat64 (DB *db, DB_TXN *txn, DB_BTREE_STAT64 *s) {
return r; return r;
} }
static int static int
toku_db_key_range64(DB* db, DB_TXN* txn __attribute__((__unused__)), DBT* key, u_int64_t* less, u_int64_t* equal, u_int64_t* greater, int* is_exact) { toku_db_key_range64(DB* db, DB_TXN* txn __attribute__((__unused__)), DBT* key, u_int64_t* less, u_int64_t* equal, u_int64_t* greater, int* is_exact) {
HANDLE_PANICKED_DB(db); HANDLE_PANICKED_DB(db);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment