Commit f165ee02 authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

[t:4002] Commiting HOT to main.

git-svn-id: file:///svn/toku/tokudb@38549 c7de825b-a66e-492c-adef-691d508d4ae1
parent e44c7d7a
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -541,6 +545,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......@@ -549,7 +554,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[11];
void* __toku_dummy0[10];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=236 size=4, 64=bit offset=376 size=8 */
void* __toku_dummy2[5];
......
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -551,6 +555,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......@@ -559,7 +564,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[14];
void* __toku_dummy0[13];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=248 size=4, 64=bit offset=400 size=8 */
void* __toku_dummy2[5];
......
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -553,6 +557,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......@@ -561,7 +566,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[16];
void* __toku_dummy0[15];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=256 size=4, 64=bit offset=416 size=8 */
void* __toku_dummy2[5];
......
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -553,6 +557,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......@@ -561,7 +566,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[19];
void* __toku_dummy0[18];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=268 size=4, 64=bit offset=440 size=8 */
void* __toku_dummy2[5];
......
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -556,6 +560,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......@@ -564,7 +569,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy1[23];
void* __toku_dummy1[22];
char __toku_dummy2[80];
void *api_internal; /* 32-bit offset=276 size=4, 64=bit offset=464 size=8 */
void* __toku_dummy3[5];
......
......@@ -590,7 +590,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */\n");
printf(" uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */\n");
printf(" uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */\n");
printf(" uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */\n");
printf(" uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the \"flush from root\" process to merge a leaf node */\n");
printf(" uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */\n");
printf(" uint64_t flush_in_memory; /* number of in memory flushes */\n");
printf(" uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */\n");
......@@ -616,6 +616,10 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */\n");
printf(" uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */\n");
printf(" uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */\n");
printf(" uint64_t hot_num_started; /* number of HOT operations that have begun */\n");
printf(" uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */\n");
printf(" uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */\n");
printf(" uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */\n");
printf(" uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/\n");
printf(" uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/\n");
printf(" uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/\n");
......@@ -804,6 +808,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
"int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
"int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */",
"int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
"int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra)",
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
"int (*get_readpagesize)(DB*,u_int32_t*)",
"int (*set_readpagesize)(DB*,u_int32_t)",
......
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -525,6 +529,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......
......@@ -197,7 +197,7 @@ typedef struct __toku_engine_status {
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaves_unmerged; /* number of leaves left unmerged by the cleaner thread */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
......@@ -223,6 +223,10 @@ typedef struct __toku_engine_status {
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
......@@ -525,6 +529,7 @@ struct __toku_db {
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
......
......@@ -51,6 +51,7 @@ BRT_SOURCES = \
brt \
brt-cachetable-wrappers \
brt-flusher \
brt-hot-flusher \
brt_msg \
brt-test-helpers \
cachetable \
......
......@@ -8,7 +8,7 @@ static int brt_root_put_cmd_XY (BRT brt, BRT_MSG *md, TOKUTXN txn) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt);
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h);
if ((r=cachetable_get_and_pin(brt->cf, *rootp, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, (void*)(long)brt->h->nodesize))) {
goto died0;
......
......@@ -6,6 +6,7 @@
#include <brt-cachetable-wrappers.h>
#include <brttypes.h>
#include <brt-flusher.h>
#include <brt-internal.h>
#include <cachetable.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ifndef BRT_FLUSHER_INTERNAL
#define BRT_FLUSHER_INTERNAL
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <brttypes.h>
#include <c_dialects.h>
C_BEGIN
typedef struct flusher_advice FLUSHER_ADVICE;
/**
* Choose a child to flush to. Returns a childnum, or -1 if we should
* go no further.
*
* Flusher threads: pick the heaviest child buffer
* Cleaner threads: pick the heaviest child buffer
* Cleaner thread merging leaf nodes: follow down to a key
* Hot optimize table: follow down to the right of a key
*/
typedef int (*FA_PICK_CHILD)(struct brt_header *h, BRTNODE parent, void* extra);
/**
* Decide whether to call `flush_some_child` on the child if it is
* stable and a nonleaf node.
*
* Flusher threads: yes if child is gorged
* Cleaner threads: yes if child is gorged
* Cleaner thread merging leaf nodes: always yes
* Hot optimize table: always yes
*/
typedef bool (*FA_SHOULD_RECURSIVELY_FLUSH)(BRTNODE child, void* extra);
/**
* Called if the child needs merging. Should do something to get the
* child out of a fusible state. Must unpin parent and child.
*
* Flusher threads: just do the merge
* Cleaner threads: if nonleaf, just merge, otherwise start a "cleaner
* thread merge"
* Cleaner thread merging leaf nodes: just do the merge
* Hot optimize table: just do the merge
*/
typedef void (*FA_MAYBE_MERGE_CHILD)(struct flusher_advice *fa,
struct brt_header *h,
BRTNODE parent,
int childnum,
BRTNODE child,
void* extra);
/**
* Cleaner threads may need to destroy basement nodes which have been
* brought more up to date than the height 1 node flushing to them.
* This function is used to determine if we need to check for basement
* nodes that are too up to date, and then destroy them if we find
* them.
*
* Flusher threads: no
* Cleaner threads: yes
* Cleaner thread merging leaf nodes: no
* Hot optimize table: no
*/
typedef bool (*FA_SHOULD_DESTROY_BN)(void* extra);
/**
* Update `brt_flusher_status` in whatever way necessary. Called once
* by `flush_some_child` right before choosing what to do next (split,
* merge, recurse), with the number of nodes that were dirtied by this
* execution of `flush_some_child`.
*/
typedef void (*FA_UPDATE_STATUS)(BRTNODE child, int dirtied, void* extra);
/**
* Choose whether to go to the left or right child after a split. Called
* by `brt_split_child`. If -1 is returned, `brt_split_child` defaults to
* the old behavior.
*/
typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(struct brt_header* h,
BRTNODE node,
int childnuma,
int childnumb,
void* extra);
/**
* A collection of callbacks used by the flushing machinery to make
* various decisions. There are implementations of each of these
* functions for flusher threads (ft_*), cleaner threads (ct_*), , and hot
* optimize table (hot_*).
*/
struct flusher_advice {
FA_PICK_CHILD pick_child;
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush;
FA_MAYBE_MERGE_CHILD maybe_merge_child;
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes;
FA_UPDATE_STATUS update_status;
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split;
void* extra; // parameter passed into callbacks
};
void
flusher_advice_init(
struct flusher_advice *fa,
FA_PICK_CHILD pick_child,
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes,
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush,
FA_MAYBE_MERGE_CHILD maybe_merge_child,
FA_UPDATE_STATUS update_status,
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split,
void* extra
);
void
flush_some_child(
struct brt_header* h,
BRTNODE parent,
struct flusher_advice *fa);
bool
always_recursively_flush(BRTNODE child, void* extra);
bool
dont_destroy_basement_nodes(void* extra);
void
default_merge_child(struct flusher_advice *fa,
struct brt_header *h,
BRTNODE parent,
int childnum,
BRTNODE child,
void* extra);
int
default_pick_child_after_split(struct brt_header *h,
BRTNODE parent,
int childnuma,
int childnumb,
void *extra);
C_END
#endif // End of header guardian.
......@@ -3,9 +3,23 @@
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <brt-internal.h>
#include <brt-flusher.h>
#include <brt-flusher-internal.h>
#include <brt-cachetable-wrappers.h>
#include <brt-internal.h>
static BRT_FLUSHER_STATUS_S brt_flusher_status;
void toku_brt_flusher_status_init(void)
{
brt_flusher_status.cleaner_min_buffer_size = UINT64_MAX;
brt_flusher_status.cleaner_min_buffer_workdone = UINT64_MAX;
}
void toku_brt_flusher_get_status(BRT_FLUSHER_STATUS status)
{
*status = brt_flusher_status;
}
#define ft_flush_before_applying_inbox 1
#define ft_flush_before_child_pin 2
......@@ -36,8 +50,8 @@ static void call_flusher_thread_callback(int ft_state) {
}
}
static void
find_heaviest_child(BRTNODE node, int *childnum)
static int
find_heaviest_child(BRTNODE node)
{
int max_child = 0;
int max_weight = toku_bnc_nbytesinbuf(BNC(node, 0)) + BP_WORKDONE(node, 0);
......@@ -56,30 +70,29 @@ find_heaviest_child(BRTNODE node, int *childnum)
max_weight = this_weight;
}
}
*childnum = max_child;
if (0) printf("\n");
return max_child;
}
static void
update_flush_status(BRTNODE UU(parent), BRTNODE child, int cascades, BRT_STATUS brt_status)
update_flush_status(BRTNODE child, int cascades)
{
lazy_assert(brt_status);
brt_status->flush_total++;
brt_flusher_status.flush_total++;
if (cascades > 0) {
brt_status->flush_cascades++;
brt_flusher_status.flush_cascades++;
switch (cascades) {
case 1:
brt_status->flush_cascades_1++; break;
brt_flusher_status.flush_cascades_1++; break;
case 2:
brt_status->flush_cascades_2++; break;
brt_flusher_status.flush_cascades_2++; break;
case 3:
brt_status->flush_cascades_3++; break;
brt_flusher_status.flush_cascades_3++; break;
case 4:
brt_status->flush_cascades_4++; break;
brt_flusher_status.flush_cascades_4++; break;
case 5:
brt_status->flush_cascades_5++; break;
brt_flusher_status.flush_cascades_5++; break;
default:
brt_status->flush_cascades_gt_5++; break;
brt_flusher_status.flush_cascades_gt_5++; break;
}
}
bool flush_needs_io = false;
......@@ -89,9 +102,9 @@ update_flush_status(BRTNODE UU(parent), BRTNODE child, int cascades, BRT_STATUS
}
}
if (flush_needs_io) {
brt_status->flush_needed_io++;
brt_flusher_status.flush_needed_io++;
} else {
brt_status->flush_in_memory++;
brt_flusher_status.flush_in_memory++;
}
}
......@@ -113,6 +126,267 @@ maybe_destroy_child_blbs(BRTNODE node, BRTNODE child)
}
}
static void
brt_merge_child(
struct brt_header* h,
BRTNODE node,
int childnum_to_merge,
BOOL *did_react,
struct flusher_advice *fa);
static int
pick_heaviest_child(struct brt_header *UU(h),
BRTNODE parent,
void* UU(extra))
{
int childnum = find_heaviest_child(parent);
assert(toku_bnc_n_entries(BNC(parent, childnum))>0);
return childnum;
}
bool
dont_destroy_basement_nodes(void* UU(extra))
{
return false;
}
static bool
do_destroy_basement_nodes(void* UU(extra))
{
return true;
}
bool
always_recursively_flush(BRTNODE UU(child), void* UU(extra))
{
return true;
}
static bool
recurse_if_child_is_gorged(BRTNODE child, void* UU(extra))
{
return toku_brt_nonleaf_is_gorged(child);
}
int
default_pick_child_after_split(struct brt_header* UU(h),
BRTNODE UU(parent),
int UU(childnuma),
int UU(childnumb),
void* UU(extra))
{
return -1;
}
void
default_merge_child(struct flusher_advice *fa,
struct brt_header *h,
BRTNODE parent,
int childnum,
BRTNODE child,
void* UU(extra))
{
//
// There is probably a way to pass BRTNODE child
// into brt_merge_child, but for simplicity for now,
// we are just going to unpin child and
// let brt_merge_child pin it again
//
toku_unpin_brtnode_off_client_thread(h, child);
//
//
// it is responsibility of brt_merge_child to unlock parent
//
BOOL did_react;
brt_merge_child(h, parent, childnum, &did_react, fa);
}
void
flusher_advice_init(
struct flusher_advice *fa,
FA_PICK_CHILD pick_child,
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes,
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush,
FA_MAYBE_MERGE_CHILD maybe_merge_child,
FA_UPDATE_STATUS update_status,
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split,
void* extra
)
{
fa->pick_child = pick_child;
fa->should_destroy_basement_nodes = should_destroy_basement_nodes;
fa->should_recursively_flush = should_recursively_flush;
fa->maybe_merge_child = maybe_merge_child;
fa->update_status = update_status;
fa->pick_child_after_split = pick_child_after_split;
fa->extra = extra;
}
/**
* Flusher thread ("normal" flushing) implementation.
*/
struct flush_status_update_extra {
int cascades;
};
static void
ft_update_status(BRTNODE child,
int UU(dirtied),
void* extra)
{
struct flush_status_update_extra *fste = extra;
update_flush_status(child, fste->cascades);
// If `flush_some_child` decides to recurse after this, we'll need
// cascades to increase. If not it doesn't matter.
fste->cascades++;
}
static void
ft_flusher_advice_init(struct flusher_advice *fa, struct flush_status_update_extra *fste)
{
fste->cascades = 0;
flusher_advice_init(fa,
pick_heaviest_child,
dont_destroy_basement_nodes,
recurse_if_child_is_gorged,
default_merge_child,
ft_update_status,
default_pick_child_after_split,
fste);
}
struct ctm_extra {
BOOL is_last_child;
DBT target_key;
};
static int
ctm_pick_child(struct brt_header *h,
BRTNODE parent,
void* extra)
{
struct ctm_extra* ctme = extra;
int childnum;
if (parent->height == 1 && ctme->is_last_child) {
childnum = parent->n_children - 1;
}
else {
childnum = toku_brtnode_which_child(
parent,
&ctme->target_key,
&h->descriptor,
h->compare_fun);
}
return childnum;
}
static void
ctm_update_status(
BRTNODE UU(child),
int dirtied,
void* UU(extra)
)
{
brt_flusher_status.cleaner_num_dirtied_for_leaf_merge += dirtied;
}
static void
ct_maybe_merge_child(struct flusher_advice *fa,
struct brt_header *h,
BRTNODE parent,
int childnum,
BRTNODE child,
void* extra)
{
if (child->height > 0) {
default_merge_child(fa, h, parent, childnum, child, extra);
}
else {
struct ctm_extra ctme;
assert(parent->n_children > 1);
int pivot_to_save;
//
// we have two cases, one where the childnum
// is the last child, and therefore the pivot we
// save is not of the pivot which we wish to descend
// and another where it is not the last child,
// so the pivot is sufficient for identifying the leaf
// to be merged
//
if (childnum == (parent->n_children - 1)) {
ctme.is_last_child = TRUE;
pivot_to_save = childnum - 1;
}
else {
ctme.is_last_child = FALSE;
pivot_to_save = childnum;
}
struct kv_pair *pivot = parent->childkeys[pivot_to_save];
size_t pivotlen = kv_pair_keylen(pivot);
char *buf = toku_xmemdup(kv_pair_key_const(pivot), pivotlen);
toku_fill_dbt(&ctme.target_key, buf, pivotlen);
// at this point, ctme is properly setup, now we can do the merge
struct flusher_advice new_fa;
flusher_advice_init(
&new_fa,
ctm_pick_child,
dont_destroy_basement_nodes,
always_recursively_flush,
default_merge_child,
ctm_update_status,
default_pick_child_after_split,
&ctme);
toku_unpin_brtnode_off_client_thread(h, parent);
toku_unpin_brtnode_off_client_thread(h, child);
// grab ydb lock, if it exists, if we are running a brt
// layer test, there may be no ydb lock and that is ok
toku_cachetable_call_ydb_lock(h->cf);
CACHEKEY *rootp;
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, h);
BRTNODE root_node;
toku_pin_brtnode_off_client_thread(h, *rootp, fullhash, &bfe, 0,NULL, &root_node);
toku_assert_entire_node_in_memory(root_node);
// release ydb lock, if it exists, if we are running a brt
// layer test, there may be no ydb lock and that is ok
toku_cachetable_call_ydb_unlock(h->cf);
flush_some_child(h, root_node, &new_fa);
toku_free(buf);
}
}
static void
ct_update_status(BRTNODE child,
int dirtied,
void* extra)
{
struct flush_status_update_extra* fste = extra;
update_flush_status(child, fste->cascades);
brt_flusher_status.cleaner_nodes_dirtied += dirtied;
// Incrementing this in case `flush_some_child` decides to recurse.
fste->cascades++;
}
static void
ct_flusher_advice_init(struct flusher_advice *fa, struct flush_status_update_extra* fste)
{
fste->cascades = 0;
flusher_advice_init(fa,
pick_heaviest_child,
do_destroy_basement_nodes,
recurse_if_child_is_gorged,
ct_maybe_merge_child,
ct_update_status,
default_pick_child_after_split,
fste);
}
//
// This returns true if the node MAY be reactive,
......@@ -379,8 +653,7 @@ brtleaf_split(
DBT *splitk,
BOOL create_new_node,
u_int32_t num_dependent_nodes,
BRTNODE* dependent_nodes,
BRT_STATUS brt_status)
BRTNODE* dependent_nodes)
// Effect: Split a leaf node.
// Argument "node" is node to be split.
// Upon return:
......@@ -390,7 +663,7 @@ brtleaf_split(
{
invariant(node->height == 0);
brt_status->split_leaf++;
brt_flusher_status.split_leaf++;
if (node->n_children) {
// First move all the accumulated stat64info deltas into the first basement.
// After the split, either both nodes or neither node will be included in the next checkpoint.
......@@ -592,11 +865,10 @@ brt_nonleaf_split(
BRTNODE *nodeb,
DBT *splitk,
u_int32_t num_dependent_nodes,
BRTNODE* dependent_nodes,
BRT_STATUS brt_status)
BRTNODE* dependent_nodes)
{
//VERIFY_NODE(t,node);
brt_status->split_nonleaf++;
brt_flusher_status.split_nonleaf++;
toku_assert_entire_node_in_memory(node);
int old_n_children = node->n_children;
int n_children_in_a = old_n_children/2;
......@@ -660,15 +932,6 @@ brt_nonleaf_split(
*nodeb = B;
}
static void
flush_some_child(
struct brt_header* h,
BRTNODE parent,
int *n_dirtied,
int cascades,
bool started_at_root,
BRT_STATUS brt_status);
//
// responsibility of brt_split_child is to take locked BRTNODEs node and child
// and do the following:
......@@ -683,8 +946,7 @@ brt_split_child(
BRTNODE node,
int childnum,
BRTNODE child,
bool started_at_root,
BRT_STATUS brt_status)
struct flusher_advice *fa)
{
assert(node->height>0);
assert(toku_bnc_nbytesinbuf(BNC(node, childnum))==0); // require that the buffer for this child is empty
......@@ -700,9 +962,9 @@ brt_split_child(
dep_nodes[0] = node;
dep_nodes[1] = child;
if (child->height==0) {
brtleaf_split(h, child, &nodea, &nodeb, &splitk, TRUE, 2, dep_nodes, brt_status);
brtleaf_split(h, child, &nodea, &nodeb, &splitk, TRUE, 2, dep_nodes);
} else {
brt_nonleaf_split(h, child, &nodea, &nodeb, &splitk, 2, dep_nodes, brt_status);
brt_nonleaf_split(h, child, &nodea, &nodeb, &splitk, 2, dep_nodes);
}
// printf("%s:%d child did split\n", __FILE__, __LINE__);
handle_split_of_child (node, childnum, nodea, nodeb, &splitk);
......@@ -714,14 +976,17 @@ brt_split_child(
// now we need to unlock node,
// and possibly continue
// flushing one of the children
int picked_child = fa->pick_child_after_split(h, node, childnum, childnum + 1, fa->extra);
toku_unpin_brtnode_off_client_thread(h, node);
if (nodea->height > 0 && toku_brt_nonleaf_is_gorged(nodea)) {
if (picked_child == childnum ||
(picked_child < 0 && nodea->height > 0 && fa->should_recursively_flush(nodea, fa->extra))) {
toku_unpin_brtnode_off_client_thread(h, nodeb);
flush_some_child(h, nodea, NULL, 0, started_at_root, brt_status);
flush_some_child(h, nodea, fa);
}
else if (nodeb->height > 0 && toku_brt_nonleaf_is_gorged(nodeb)) {
else if (picked_child == childnum + 1 ||
(picked_child < 0 && nodeb->height > 0 && fa->should_recursively_flush(nodeb, fa->extra))) {
toku_unpin_brtnode_off_client_thread(h, nodea);
flush_some_child(h, nodeb, NULL, 0, started_at_root, brt_status);
flush_some_child(h, nodeb, fa);
}
else {
toku_unpin_brtnode_off_client_thread(h, nodea);
......@@ -735,14 +1000,13 @@ flush_this_child(
BRTNODE node,
BRTNODE child,
int childnum,
bool started_at_root,
BRT_STATUS brt_status)
struct flusher_advice *fa)
// Effect: Push everything in the CHILDNUMth buffer of node down into the child.
{
update_flush_status(node, child, 0, brt_status);
update_flush_status(child, 0);
int r;
toku_assert_entire_node_in_memory(node);
if (!started_at_root) {
if (fa->should_destroy_basement_nodes(fa)) {
maybe_destroy_child_blbs(node, child);
}
bring_node_fully_into_memory(child, h);
......@@ -764,9 +1028,9 @@ flush_this_child(
}
static void
merge_leaf_nodes(BRTNODE a, BRTNODE b, BRT_STATUS brt_status)
merge_leaf_nodes(BRTNODE a, BRTNODE b)
{
brt_status->merge_leaf++;
brt_flusher_status.merge_leaf++;
toku_assert_entire_node_in_memory(a);
toku_assert_entire_node_in_memory(b);
assert(a->height == 0);
......@@ -841,19 +1105,18 @@ static int
balance_leaf_nodes(
BRTNODE a,
BRTNODE b,
struct kv_pair **splitk,
BRT_STATUS brt_status)
struct kv_pair **splitk)
// Effect:
// If b is bigger then move stuff from b to a until b is the smaller.
// If a is bigger then move stuff from a to b until a is the smaller.
{
brt_status->balance_leaf++;
brt_flusher_status.balance_leaf++;
DBT splitk_dbt;
// first merge all the data into a
merge_leaf_nodes(a,b, brt_status);
merge_leaf_nodes(a,b);
// now split them
// because we are not creating a new node, we can pass in no dependent nodes
brtleaf_split(NULL, a, &a, &b, &splitk_dbt, FALSE, 0, NULL, brt_status);
brtleaf_split(NULL, a, &a, &b, &splitk_dbt, FALSE, 0, NULL);
*splitk = splitk_dbt.data;
return 0;
......@@ -866,8 +1129,7 @@ maybe_merge_pinned_leaf_nodes(
struct kv_pair *parent_splitk,
BOOL *did_merge,
BOOL *did_rebalance,
struct kv_pair **splitk,
BRT_STATUS brt_status)
struct kv_pair **splitk)
// Effect: Either merge a and b into one one node (merge them into a) and set *did_merge = TRUE.
// (We do this if the resulting node is not fissible)
// or distribute the leafentries evenly between a and b, and set *did_rebalance = TRUE.
......@@ -887,7 +1149,7 @@ maybe_merge_pinned_leaf_nodes(
// one is less than 1/4 of a node, and together they are more than 3/4 of a node.
toku_free(parent_splitk); // We don't need the parent_splitk any more. If we need a splitk (if we don't merge) we'll malloc a new one.
*did_rebalance = TRUE;
int r = balance_leaf_nodes(a, b, splitk, brt_status);
int r = balance_leaf_nodes(a, b, splitk);
assert(r==0);
} else {
// we are merging them.
......@@ -895,7 +1157,7 @@ maybe_merge_pinned_leaf_nodes(
*did_rebalance = FALSE;
*splitk = 0;
toku_free(parent_splitk); // if we are merging, the splitk gets freed.
merge_leaf_nodes(a, b, brt_status);
merge_leaf_nodes(a, b);
}
}
......@@ -906,8 +1168,7 @@ maybe_merge_pinned_nonleaf_nodes(
BRTNODE b,
BOOL *did_merge,
BOOL *did_rebalance,
struct kv_pair **splitk,
BRT_STATUS brt_status)
struct kv_pair **splitk)
{
toku_assert_entire_node_in_memory(a);
toku_assert_entire_node_in_memory(b);
......@@ -938,7 +1199,7 @@ maybe_merge_pinned_nonleaf_nodes(
*did_rebalance = FALSE;
*splitk = NULL;
brt_status->merge_nonleaf++;
brt_flusher_status.merge_nonleaf++;
}
static void
......@@ -949,8 +1210,7 @@ maybe_merge_pinned_nodes(
BRTNODE b,
BOOL *did_merge,
BOOL *did_rebalance,
struct kv_pair **splitk,
BRT_STATUS brt_status)
struct kv_pair **splitk)
// Effect: either merge a and b into one node (merge them into a) and set *did_merge = TRUE.
// (We do this if the resulting node is not fissible)
// or distribute a and b evenly and set *did_merge = FALSE and *did_rebalance = TRUE
......@@ -984,9 +1244,9 @@ maybe_merge_pinned_nodes(
}
}
if (a->height == 0) {
maybe_merge_pinned_leaf_nodes(a, b, parent_splitk, did_merge, did_rebalance, splitk, brt_status);
maybe_merge_pinned_leaf_nodes(a, b, parent_splitk, did_merge, did_rebalance, splitk);
} else {
maybe_merge_pinned_nonleaf_nodes(parent_splitk, a, b, did_merge, did_rebalance, splitk, brt_status);
maybe_merge_pinned_nonleaf_nodes(parent_splitk, a, b, did_merge, did_rebalance, splitk);
}
if (*did_merge || *did_rebalance) {
// accurate for leaf nodes because all msgs above have been
......@@ -998,12 +1258,11 @@ maybe_merge_pinned_nodes(
}
static void merge_remove_key_callback(
BLOCKNUM* bp,
BLOCKNUM *bp,
BOOL for_checkpoint,
void* extra
)
void *extra)
{
struct brt_header* h = extra;
struct brt_header *h = extra;
toku_free_blocknum(h->blocktable, bp, h, for_checkpoint);
}
......@@ -1013,12 +1272,11 @@ static void merge_remove_key_callback(
//
static void
brt_merge_child(
struct brt_header* h,
struct brt_header *h,
BRTNODE node,
int childnum_to_merge,
BOOL *did_react,
bool started_at_root,
BRT_STATUS brt_status)
struct flusher_advice *fa)
{
// this function should not be called
// if the child is not mergable
......@@ -1039,7 +1297,6 @@ brt_merge_child(
assert(node->height>0);
// We suspect that at least one of the children is fusible, but they might not be.
// for test
call_flusher_thread_callback(ft_flush_before_merge);
......@@ -1062,10 +1319,10 @@ brt_merge_child(
}
if (toku_bnc_n_entries(BNC(node,childnuma))>0) {
flush_this_child(h, node, childa, childnuma, started_at_root, brt_status);
flush_this_child(h, node, childa, childnuma, fa);
}
if (toku_bnc_n_entries(BNC(node,childnumb))>0) {
flush_this_child(h, node, childb, childnumb, started_at_root, brt_status);
flush_this_child(h, node, childb, childnumb, fa);
}
// now we have both children pinned in main memory, and cachetable locked,
......@@ -1076,7 +1333,7 @@ brt_merge_child(
struct kv_pair *splitk_kvpair = 0;
struct kv_pair *old_split_key = node->childkeys[childnuma];
unsigned int deleted_size = toku_brt_pivot_key_len(old_split_key);
maybe_merge_pinned_nodes(node, node->childkeys[childnuma], childa, childb, &did_merge, &did_rebalance, &splitk_kvpair, brt_status);
maybe_merge_pinned_nodes(node, node->childkeys[childnuma], childa, childb, &did_merge, &did_rebalance, &splitk_kvpair);
if (childa->height>0) { int i; for (i=0; i+1<childa->n_children; i++) assert(childa->childkeys[i]); }
//toku_verify_estimates(t,childa);
// the tree did react if a merge (did_merge) or rebalance (new spkit key) occurred
......@@ -1138,8 +1395,8 @@ brt_merge_child(
toku_unpin_brtnode_off_client_thread(h, node);
toku_unpin_brtnode_off_client_thread(h, childb);
}
if (childa->height > 0 && toku_brt_nonleaf_is_gorged(childa)) {
flush_some_child(h, childa, NULL, 0, started_at_root, brt_status);
if (childa->height > 0 && fa->should_recursively_flush(childa, fa->extra)) {
flush_some_child(h, childa, fa);
}
else {
toku_unpin_brtnode_off_client_thread(h, childa);
......@@ -1172,14 +1429,11 @@ brt_merge_child(
// will have started_at_root==false and anything started by the flusher
// thread will have started_at_root==true, but future mechanisms need to
// be mindful of this issue.
static void
void
flush_some_child(
struct brt_header* h,
struct brt_header *h,
BRTNODE parent,
int *n_dirtied,
int cascades,
bool started_at_root,
BRT_STATUS brt_status)
struct flusher_advice *fa)
// Effect: This function does the following:
// - Pick a child of parent (the heaviest child),
// - flush from parent to child,
......@@ -1189,17 +1443,13 @@ flush_some_child(
// Upon exit of this function, parent is unlocked and no new
// new nodes (such as a child) remain locked
{
bool parent_unpinned = false;
int dirtied = 0;
NONLEAF_CHILDINFO bnc = NULL;
assert(parent->height>0);
toku_assert_entire_node_in_memory(parent);
if (n_dirtied && !parent->dirty) {
(*n_dirtied)++;
}
// pick the child we want to flush to
int childnum;
find_heaviest_child(parent, &childnum);
assert(toku_bnc_n_entries(BNC(parent, childnum))>0);
int childnum = fa->pick_child(h, parent, fa->extra);
// for test
call_flusher_thread_callback(ft_flush_before_child_pin);
......@@ -1216,15 +1466,10 @@ flush_some_child(
fill_bfe_for_min_read(&bfe, h);
toku_pin_brtnode_off_client_thread(h, targetchild, childfullhash, &bfe, 1, &parent, &child);
if (n_dirtied && !child->dirty) {
(*n_dirtied)++;
}
update_flush_status(parent, child, cascades, brt_status);
// for test
call_flusher_thread_callback(ft_flush_after_child_pin);
if (!started_at_root) {
if (fa->should_destroy_basement_nodes(fa)) {
maybe_destroy_child_blbs(parent, child);
}
......@@ -1237,12 +1482,17 @@ flush_some_child(
assert(child->thisnodename.b!=0);
//VERIFY_NODE(brt, child);
// only do the following work if there is a flush to perform
if (toku_bnc_n_entries(BNC(parent, childnum)) > 0) {
if (!parent->dirty) {
dirtied++;
toku_mark_node_dirty(parent);
}
// detach buffer
BP_WORKDONE(parent, childnum) = 0; // this buffer is drained, no work has been done by its contents
NONLEAF_CHILDINFO bnc = BNC(parent, childnum);
bnc = BNC(parent, childnum);
set_BNC(parent, childnum, toku_create_empty_nl());
}
//
// at this point, the buffer has been detached from the parent
......@@ -1252,7 +1502,7 @@ flush_some_child(
//
if (!may_child_be_reactive) {
toku_unpin_brtnode_off_client_thread(h, parent);
parent_unpinned = true;
parent = NULL;
}
//
......@@ -1260,7 +1510,6 @@ flush_some_child(
// so that we can proceed and apply the flush
//
bring_node_fully_into_memory(child, h);
toku_mark_node_dirty(child);
// It is possible after reading in the entire child,
// that we now know that the child is not reactive
......@@ -1269,12 +1518,22 @@ flush_some_child(
// and we have already replaced the bnc
// for the root with a fresh one
enum reactivity child_re = get_node_reactivity(child);
if (!parent_unpinned && child_re == RE_STABLE) {
if (parent && child_re == RE_STABLE) {
toku_unpin_brtnode_off_client_thread(h, parent);
parent_unpinned = true;
parent = NULL;
}
// now we have a bnc to flush to the child
// from above, we know at this point that either the bnc
// is detached from the parent (which may be unpinned),
// and we have to apply the flush, or there was no data
// in the buffer to flush, and as a result, flushing is not necessary
// and bnc is NULL
if (bnc != NULL) {
if (!child->dirty) {
dirtied++;
toku_mark_node_dirty(child);
}
// do the actual flush
r = toku_bnc_flush_to_child(
h->compare_fun,
h->update_fun,
......@@ -1285,34 +1544,31 @@ flush_some_child(
);
assert_zero(r);
destroy_nonleaf_childinfo(bnc);
}
fa->update_status(child, dirtied, fa->extra);
// let's get the reactivity of the child again,
// it is possible that the flush got rid of some values
// and now the parent is no longer reactive
child_re = get_node_reactivity(child);
if (!started_at_root && child->height == 0 && child_re == RE_FUSIBLE) {
// prevent merging leaf nodes, sometimes (when the cleaner thread
// called us)
child_re = RE_STABLE;
brt_status->cleaner_num_leaves_unmerged++;
}
// if the parent has been unpinned above, then
// this is our only option, even if the child is not stable
// if the child is not stable, we'll handle it the next
// time we need to flush to the child
if (parent_unpinned ||
if (!parent ||
child_re == RE_STABLE ||
(child_re == RE_FUSIBLE && parent->n_children == 1)
)
{
if (!parent_unpinned) {
if (parent) {
toku_unpin_brtnode_off_client_thread(h, parent);
parent = NULL;
}
//
// it is the responsibility of flush_some_child to unpin parent
// it is the responsibility of flush_some_child to unpin child
//
if (child->height > 0 && toku_brt_nonleaf_is_gorged(child)) {
flush_some_child(h, child, n_dirtied, cascades+1, started_at_root, brt_status);
if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) {
flush_some_child(h, child, fa);
}
else {
toku_unpin_brtnode_off_client_thread(h, child);
......@@ -1320,75 +1576,66 @@ flush_some_child(
}
else if (child_re == RE_FISSIBLE) {
//
// it is responsibility of brt_split_child to unlock nodes
// of parent and child as it sees fit
// it is responsibility of `brt_split_child` to unlock nodes of
// parent and child as it sees fit
//
brt_split_child(h, parent, childnum, child, started_at_root, brt_status);
assert(parent); // just make sure we have not accidentally unpinned parent
brt_split_child(h, parent, childnum, child, fa);
}
else if (child_re == RE_FUSIBLE) {
BOOL did_react;
//
// There is probably a way to pass BRTNODE child
// into brt_merge_child, but for simplicity for now,
// we are just going to unpin child and
// let brt_merge_child pin it again
//
toku_unpin_brtnode_off_client_thread(h, child);
//
// it is responsibility of `maybe_merge_child to unlock nodes of
// parent and child as it sees fit
//
// it is responsibility of brt_merge_child to unlock parent
//
brt_merge_child(h, parent, childnum, &did_react, started_at_root, brt_status);
assert(parent); // just make sure we have not accidentally unpinned parent
fa->maybe_merge_child(fa, h, parent, childnum, child, fa->extra);
}
else {
assert(FALSE);
}
}
// TODO 3988 Leif set cleaner_nodes_dirtied
static void
update_cleaner_status(
BRTNODE node,
int childnum,
BRT_STATUS brt_status)
int childnum)
{
brt_status->cleaner_total_nodes++;
brt_flusher_status.cleaner_total_nodes++;
if (node->height == 1) {
brt_status->cleaner_h1_nodes++;
brt_flusher_status.cleaner_h1_nodes++;
} else {
brt_status->cleaner_hgt1_nodes++;
brt_flusher_status.cleaner_hgt1_nodes++;
}
unsigned int nbytesinbuf = toku_bnc_nbytesinbuf(BNC(node, childnum));
if (nbytesinbuf == 0) {
brt_status->cleaner_empty_nodes++;
brt_flusher_status.cleaner_empty_nodes++;
} else {
if (nbytesinbuf > brt_status->cleaner_max_buffer_size) {
brt_status->cleaner_max_buffer_size = nbytesinbuf;
if (nbytesinbuf > brt_flusher_status.cleaner_max_buffer_size) {
brt_flusher_status.cleaner_max_buffer_size = nbytesinbuf;
}
if (nbytesinbuf < brt_status->cleaner_min_buffer_size) {
brt_status->cleaner_min_buffer_size = nbytesinbuf;
if (nbytesinbuf < brt_flusher_status.cleaner_min_buffer_size) {
brt_flusher_status.cleaner_min_buffer_size = nbytesinbuf;
}
brt_status->cleaner_total_buffer_size += nbytesinbuf;
brt_flusher_status.cleaner_total_buffer_size += nbytesinbuf;
uint64_t workdone = BP_WORKDONE(node, childnum);
if (workdone > brt_status->cleaner_max_buffer_workdone) {
brt_status->cleaner_max_buffer_workdone = workdone;
if (workdone > brt_flusher_status.cleaner_max_buffer_workdone) {
brt_flusher_status.cleaner_max_buffer_workdone = workdone;
}
if (workdone < brt_status->cleaner_min_buffer_workdone) {
brt_status->cleaner_min_buffer_workdone = workdone;
if (workdone < brt_flusher_status.cleaner_min_buffer_workdone) {
brt_flusher_status.cleaner_min_buffer_workdone = workdone;
}
brt_status->cleaner_total_buffer_workdone += workdone;
brt_flusher_status.cleaner_total_buffer_workdone += workdone;
}
}
int
toku_brtnode_cleaner_callback_internal(
toku_brtnode_cleaner_callback(
void *brtnode_pv,
BLOCKNUM blocknum,
u_int32_t fullhash,
void *extraargs,
BRT_STATUS brt_status)
void *extraargs)
{
BRTNODE node = brtnode_pv;
invariant(node->thisnodename.b == blocknum.b);
......@@ -1396,15 +1643,15 @@ toku_brtnode_cleaner_callback_internal(
invariant(node->height > 0); // we should never pick a leaf node (for now at least)
struct brt_header *h = extraargs;
bring_node_fully_into_memory(node, h);
int childnum;
find_heaviest_child(node, &childnum);
update_cleaner_status(node, childnum, brt_status);
int childnum = find_heaviest_child(node);
update_cleaner_status(node, childnum);
// Either flush_some_child will unlock the node, or we do it here.
if (toku_bnc_nbytesinbuf(BNC(node, childnum)) > 0) {
int n_dirtied = 0;
flush_some_child(h, node, &n_dirtied, 0, false, brt_status);
brt_status->cleaner_nodes_dirtied += n_dirtied;
struct flusher_advice fa;
struct flush_status_update_extra fste;
ct_flusher_advice_init(&fa, &fste);
flush_some_child(h, node, &fa);
} else {
toku_unpin_brtnode_off_client_thread(h, node);
}
......@@ -1415,7 +1662,6 @@ struct flusher_extra {
struct brt_header* h;
BRTNODE node;
NONLEAF_CHILDINFO bnc;
BRT_STATUS brt_status;
};
//
......@@ -1440,6 +1686,10 @@ static void flush_node_fun(void *fe_v)
bring_node_fully_into_memory(fe->node,fe->h);
toku_mark_node_dirty(fe->node);
struct flusher_advice fa;
struct flush_status_update_extra fste;
ft_flusher_advice_init(&fa, &fste);
if (fe->bnc) {
// In this case, we have a bnc to flush to a node
......@@ -1462,7 +1712,7 @@ static void flush_node_fun(void *fe_v)
// of flush_some_child to unlock the node
// otherwise, we unlock the node here.
if (fe->node->height > 0 && toku_brt_nonleaf_is_gorged(fe->node)) {
flush_some_child(fe->h, fe->node, NULL, 0, true, fe->brt_status);
flush_some_child(fe->h, fe->node, &fa);
}
else {
toku_unpin_brtnode_off_client_thread(fe->h,fe->node);
......@@ -1473,7 +1723,7 @@ static void flush_node_fun(void *fe_v)
// bnc, which means we are tasked with flushing some
// buffer in the node.
// It is the responsibility of flush_some_child to unlock the node
flush_some_child(fe->h, fe->node, NULL, 0, true, fe->brt_status);
flush_some_child(fe->h, fe->node, &fa);
}
remove_background_job(fe->h->cf, false);
toku_free(fe);
......@@ -1483,9 +1733,7 @@ static void
place_node_and_bnc_on_background_thread(
BRT brt,
BRTNODE node,
NONLEAF_CHILDINFO bnc,
BRT_STATUS brt_status
)
NONLEAF_CHILDINFO bnc)
{
struct flusher_extra* fe = NULL;
fe = toku_xmalloc(sizeof(struct flusher_extra));
......@@ -1493,7 +1741,6 @@ place_node_and_bnc_on_background_thread(
fe->h = brt->h;
fe->node = node;
fe->bnc = bnc;
fe->brt_status = brt_status;
cachefile_kibbutz_enq(brt->cf, flush_node_fun, fe);
}
......@@ -1511,14 +1758,13 @@ place_node_and_bnc_on_background_thread(
// The parent will be unlocked on the background thread
//
void
flush_node_on_background_thread(BRT brt, BRTNODE parent, BRT_STATUS brt_status)
flush_node_on_background_thread(BRT brt, BRTNODE parent)
{
//
// first let's see if we can detach buffer on client thread
// and pick the child we want to flush to
//
int childnum;
find_heaviest_child(parent, &childnum);
int childnum = find_heaviest_child(parent);
assert(toku_bnc_n_entries(BNC(parent, childnum))>0);
//
// see if we can pin the child
......@@ -1536,7 +1782,7 @@ flush_node_on_background_thread(BRT brt, BRTNODE parent, BRT_STATUS brt_status)
// In this case, we could not lock the child, so just place the parent on the background thread
// In the callback, we will use flush_some_child, which checks to
// see if we should blow away the old basement nodes.
place_node_and_bnc_on_background_thread(brt, parent, NULL, brt_status);
place_node_and_bnc_on_background_thread(brt, parent, NULL);
}
else {
//
......@@ -1564,7 +1810,7 @@ flush_node_on_background_thread(BRT brt, BRTNODE parent, BRT_STATUS brt_status)
// so, because we know for sure the child is not
// reactive, we can unpin the parent
//
place_node_and_bnc_on_background_thread(brt, child, bnc, brt_status);
place_node_and_bnc_on_background_thread(brt, child, bnc);
toku_unpin_brtnode(brt, parent);
}
else {
......@@ -1574,7 +1820,7 @@ flush_node_on_background_thread(BRT brt, BRTNODE parent, BRT_STATUS brt_status)
toku_unpin_brtnode(brt, child);
// Again, we'll have the parent on the background thread, so
// we don't need to destroy the basement nodes yet.
place_node_and_bnc_on_background_thread(brt, parent, NULL, brt_status);
place_node_and_bnc_on_background_thread(brt, parent, NULL);
}
}
}
......@@ -11,6 +11,39 @@
C_BEGIN
typedef struct brt_flusher_status {
uint64_t cleaner_total_nodes; // total number of nodes whose buffers are potentially flushed by cleaner thread
uint64_t cleaner_h1_nodes; // number of nodes of height one whose message buffers are flushed by cleaner thread
uint64_t cleaner_hgt1_nodes; // number of nodes of height > 1 whose message buffers are flushed by cleaner thread
uint64_t cleaner_empty_nodes; // number of nodes that are selected by cleaner, but whose buffers are empty
uint64_t cleaner_nodes_dirtied; // number of nodes that are made dirty by the cleaner thread
uint64_t cleaner_max_buffer_size; // max number of bytes in message buffer flushed by cleaner thread
uint64_t cleaner_min_buffer_size;
uint64_t cleaner_total_buffer_size;
uint64_t cleaner_max_buffer_workdone; // max workdone value of any message buffer flushed by cleaner thread
uint64_t cleaner_min_buffer_workdone;
uint64_t cleaner_total_buffer_workdone;
uint64_t cleaner_num_dirtied_for_leaf_merge; // nodes dirtied by the "flush from root" process to merge a leaf node
uint64_t flush_total; // total number of flushes done by flusher threads or cleaner threads
uint64_t flush_in_memory; // number of in memory flushes
uint64_t flush_needed_io; // number of flushes that had to read a child (or part) off disk
uint64_t flush_cascades; // number of flushes that triggered another flush in the child
uint64_t flush_cascades_1; // number of flushes that triggered 1 cascading flush
uint64_t flush_cascades_2; // number of flushes that triggered 2 cascading flushes
uint64_t flush_cascades_3; // number of flushes that triggered 3 cascading flushes
uint64_t flush_cascades_4; // number of flushes that triggered 4 cascading flushes
uint64_t flush_cascades_5; // number of flushes that triggered 5 cascading flushes
uint64_t flush_cascades_gt_5; // number of flushes that triggered more than 5 cascading flushes
uint64_t split_leaf; // number of leaf nodes split
uint64_t split_nonleaf; // number of nonleaf nodes split
uint64_t merge_leaf; // number of times leaf nodes are merged
uint64_t merge_nonleaf; // number of times nonleaf nodes are merged
uint64_t balance_leaf; // number of times a leaf node is balanced inside brt
} BRT_FLUSHER_STATUS_S, *BRT_FLUSHER_STATUS;
void toku_brt_flusher_status_init(void);
void toku_brt_flusher_get_status(BRT_FLUSHER_STATUS);
/**
* Only for testing, not for production.
*
......@@ -32,12 +65,11 @@ toku_flusher_thread_set_callback(
* brt_status which currently just lives in brt.c.
*/
int
toku_brtnode_cleaner_callback_internal(
toku_brtnode_cleaner_callback(
void *brtnode_pv,
BLOCKNUM blocknum,
u_int32_t fullhash,
void *extraargs,
BRT_STATUS brt_status
void *extraargs
);
/**
......@@ -47,8 +79,7 @@ toku_brtnode_cleaner_callback_internal(
void
flush_node_on_background_thread(
BRT brt,
BRTNODE parent,
BRT_STATUS brt_status
BRTNODE parent
);
/**
......@@ -68,8 +99,7 @@ brtleaf_split(
DBT *splitk,
BOOL create_new_node,
u_int32_t num_dependent_nodes,
BRTNODE* dependent_nodes,
BRT_STATUS brt_status
BRTNODE* dependent_nodes
);
/**
......@@ -89,10 +119,33 @@ brt_nonleaf_split(
BRTNODE *nodeb,
DBT *splitk,
u_int32_t num_dependent_nodes,
BRTNODE* dependent_nodes,
BRT_STATUS brt_status
BRTNODE* dependent_nodes
);
/************************************************************************
* HOT optimize, should perhaps be factored out to its own header file *
************************************************************************
*/
typedef struct brt_hot_status {
uint64_t num_started; // number of HOT operations that have begun
uint64_t num_completed; // number of HOT operations that have successfully completed
uint64_t num_aborted; // number of HOT operations that have been aborted
uint64_t max_root_flush_count; // max number of flushes from root ever required to optimize a tree
} BRT_HOT_STATUS_S, *BRT_HOT_STATUS;
void toku_brt_hot_get_status(BRT_HOT_STATUS);
/**
* Takes given BRT and pushes all pending messages to the leaf nodes.
*/
int
toku_brt_hot_optimize(BRT brt,
int (*progress_callback)(void *extra, float progress),
void *progress_extra);
C_END
#endif // End of header guardian.
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <brt-flusher.h>
#include <brt-flusher-internal.h>
#include <brt-cachetable-wrappers.h>
#include <brt-internal.h>
// Member Descirption:
// 1. highest_pivot_key - this is the key that corresponds to the
// most recently flushed leaf entry.
// 2. max_current_key - this is the pivot/key that we inherit as
// we descend down the tree. We use this to set the highest_pivot_key.
// 3. sub_tree_size - this is the percentage of the entire tree that our
// current position (in a sub-tree) encompasses.
// 4. percentage_done - this is the percentage of leaf nodes that have
// been flushed into.
// 5. rightmost_leaf_seen - this is a boolean we use to determine if
// if we have flushed to every leaf node.
struct hot_flusher_extra {
DBT highest_pivot_key;
DBT max_current_key;
float sub_tree_size;
float percentage_done;
bool rightmost_leaf_seen;
};
static BRT_HOT_STATUS_S hot_status;
void
toku_brt_hot_get_status(BRT_HOT_STATUS s) {
*s = hot_status;
}
// Copies the max current key to the highest pivot key seen.
static void
hot_set_highest_key(struct hot_flusher_extra *flusher)
{
// The max current key will be NULL if we are traversing in the
// rightmost subtree of a given parent. As such, we don't want to
// allocate memory for this case.
if (flusher->max_current_key.data == NULL) {
if (flusher->highest_pivot_key.data) {
toku_free(flusher->highest_pivot_key.data);
}
flusher->highest_pivot_key.data = NULL;
} else {
// Otherwise, let's copy all the contents from one key to the other.
void *source = flusher->max_current_key.data;
void *destination = flusher->highest_pivot_key.data;
u_int32_t size = flusher->max_current_key.size;
destination = toku_xrealloc(destination, size);
memcpy(destination, source, size);
// Finish copying all fields from the max current key.
// Add free here.
toku_fill_dbt(&(flusher->highest_pivot_key), destination, size);
}
}
// Copies the pivot key in the parent to the given DBT key, using the
// pivot corresponding to the given child.
static void
hot_set_key(DBT *key, BRTNODE parent, int childnum)
{
// assert that childnum is less than number of children - 1.
DBT pivot;
struct kv_pair *pair;
pair = parent->childkeys[childnum];
pivot = kv_pair_key_to_dbt(pair);
void *data = key->data;
u_int32_t size = pivot.size;
data = toku_xrealloc(data, size);
memcpy(data, pivot.data, size);
toku_fill_dbt(key, data, size);
}
static int
hot_just_pick_child(struct brt_header *h,
BRTNODE parent,
struct hot_flusher_extra *flusher)
{
int childnum = 0;
// Search through Parents pivots, see which one is greater than
// the highest_pivot_key seen so far.
if (flusher->highest_pivot_key.data == NULL)
{
// Special case of the first child of the root node.
// Also known as, NEGATIVE INFINITY....
childnum = 0;
} else {
// Find the pivot boundary.
childnum = toku_brtnode_hot_next_child(parent,
&flusher->highest_pivot_key,
&h->descriptor,
h->compare_fun);
}
return childnum;
}
static void
hot_update_flusher_keys(BRTNODE parent,
int childnum,
struct hot_flusher_extra *flusher)
{
// Update maximum current key if the child is NOT the rightmost
// child node.
if (childnum < (parent->n_children - 1)) {
hot_set_key(&flusher->max_current_key, parent, childnum);
}
}
// Picks which child flush_some_child will use for flushing and
// recursion.
static int
hot_pick_child(struct brt_header *h,
BRTNODE parent,
void *extra)
{
struct hot_flusher_extra *flusher = extra;
int childnum = hot_just_pick_child(h, parent, flusher);
// Now we determine the percentage of the tree flushed so far.
// Whichever subtree we choose to recurse into, it is a fraction
// of the current parent.
flusher->sub_tree_size /= parent->n_children;
// Update the precentage complete, using our new sub tree size AND
// the number of children we have already flushed.
flusher->percentage_done += (flusher->sub_tree_size * childnum);
hot_update_flusher_keys(parent, childnum, flusher);
return childnum;
}
// Does nothing for now.
static void
hot_update_status(BRTNODE UU(child),
int UU(dirtied),
void *UU(extra))
{
return;
}
static int
hot_pick_child_after_split(struct brt_header *h,
BRTNODE parent,
int childnuma,
int childnumb,
void *extra)
{
struct hot_flusher_extra *flusher = extra;
int childnum = hot_just_pick_child(h, parent, flusher);
assert(childnum == childnuma || childnum == childnumb);
hot_update_flusher_keys(parent, childnum, flusher);
if (parent->height == 1) {
childnum = -1;
}
return childnum;
}
// Basic constructor/initializer for the hot flusher struct.
static void
hot_flusher_init(struct flusher_advice *advice,
struct hot_flusher_extra *flusher)
{
// Initialize the highest pivot key seen to NULL. This represents
// NEGATIVE INFINITY and is used to cover the special case of our
// first traversal of the tree.
toku_init_dbt(&(flusher->highest_pivot_key));
toku_init_dbt(&(flusher->max_current_key));
flusher->rightmost_leaf_seen = 0;
flusher->sub_tree_size = 1.0;
flusher->percentage_done = 0.0;
flusher_advice_init(advice,
hot_pick_child,
dont_destroy_basement_nodes,
always_recursively_flush,
default_merge_child,
hot_update_status,
hot_pick_child_after_split,
flusher
);
}
// Erases any DBT keys we have copied from a traversal.
static void
hot_flusher_destroy(struct hot_flusher_extra *flusher)
{
if (flusher->highest_pivot_key.data) {
toku_free(flusher->highest_pivot_key.data);
}
if (flusher->max_current_key.data) {
toku_free(flusher->max_current_key.data);
}
}
// Entry point for Hot Optimize Table (HOT). Note, this function is
// not recursive. It iterates over root-to-leaf paths.
int
toku_brt_hot_optimize(BRT brt,
int (*progress_callback)(void *extra, float progress),
void *progress_extra)
{
int r = 0;
struct hot_flusher_extra flusher;
struct flusher_advice advice;
hot_flusher_init(&advice, &flusher);
uint64_t loop_count = 0;
MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
// start of HOT operation
(void) __sync_fetch_and_add(&hot_status.num_started, 1);
// Higher level logic prevents a dictionary from being deleted or truncated
// during a hot optimize operation. Doing so would violate the hot optimize contract.
do {
BRTNODE root;
CACHEKEY *rootp;
u_int32_t fullhash;
// Grab YDB Lock.
toku_cachetable_call_ydb_lock(brt->h->cf);
// Get root node (the first parent of each successive HOT
// call.)
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(brt->h,
(BLOCKNUM) *rootp,
fullhash,
&bfe,
0,
NULL,
&root);
toku_assert_entire_node_in_memory(root);
// Prepare HOT diagnostics.
if (loop_count == 0) {
// The first time through, capture msn from root and set
// info in header while holding ydb lock.
msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
toku_brt_header_note_hot_begin(brt);
}
loop_count++;
if (loop_count > hot_status.max_root_flush_count) {
// This is threadsafe, since we're holding the ydb lock.
hot_status.max_root_flush_count = loop_count;
}
// Release YDB Lock.
toku_cachetable_call_ydb_unlock(brt->h->cf);
// Initialize the maximum current key. We need to do this for
// every traversal.
if (flusher.max_current_key.data) {
toku_free(flusher.max_current_key.data);
}
flusher.max_current_key.data = NULL;
flusher.sub_tree_size = 1.0;
flusher.percentage_done = 0.0;
// This should recurse to the bottom of the tree and then
// return.
if (root->height > 0) {
flush_some_child(brt->h, root, &advice);
} else {
// Since there are no children to flush, we should abort
// the HOT call.
flusher.rightmost_leaf_seen = 1;
}
// Set the highest pivot key seen here, since the parent may
// be unlocked and NULL'd later in our caller:
// flush_some_child().
hot_set_highest_key(&flusher);
// This is where we determine if the traversal is finished or
// not.
if (flusher.max_current_key.data == NULL) {
flusher.rightmost_leaf_seen = 1;
}
// Update HOT's progress.
if (progress_callback != NULL) {
r = progress_callback(progress_extra, flusher.percentage_done);
// Check if the callback wants us to stop running HOT.
if (r != 0) {
flusher.rightmost_leaf_seen = 1;
}
}
// Loop until the max key has been updated to positive
// infinity.
} while (!flusher.rightmost_leaf_seen);
// Cleanup.
hot_flusher_destroy(&flusher);
// More diagnostics.
{
BOOL success = false;
if (r == 0) success = true;
toku_cachetable_call_ydb_lock(brt->h->cf);
toku_brt_header_note_hot_complete(brt, success, msn_at_start_of_hot);
toku_cachetable_call_ydb_unlock(brt->h->cf);
if (success)
(void) __sync_fetch_and_add(&hot_status.num_completed, 1);
else
(void) __sync_fetch_and_add(&hot_status.num_aborted, 1);
}
return r;
}
......@@ -341,13 +341,6 @@ enum {
u_int32_t compute_child_fullhash (CACHEFILE cf, BRTNODE node, int childnum);
struct remembered_hash {
BOOL valid; // set to FALSE if the fullhash is invalid
FILENUM fnum;
BLOCKNUM root;
u_int32_t fullhash; // fullhash is the hashed value of fnum and root.
};
// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
enum brtheader_type {BRTHEADER_CURRENT=1, BRTHEADER_CHECKPOINT_INPROGRESS};
......@@ -380,7 +373,6 @@ struct brt_header {
unsigned int nodesize;
unsigned int basementnodesize;
BLOCKNUM root; // roots of the dictionary
struct remembered_hash root_hash; // hash of the root offset.
unsigned int flags;
DESCRIPTOR_S descriptor;
......@@ -404,6 +396,11 @@ struct brt_header {
STAT64INFO_S in_memory_stats;
STAT64INFO_S on_disk_stats;
STAT64INFO_S checkpoint_staging_stats;
uint64_t time_of_last_optimize_begin; // last time that a hot optimize operation was begun
uint64_t time_of_last_optimize_end; // last time that a hot optimize operation was successfully completed
uint32_t count_of_optimize_in_progress; // the number of hot optimize operations currently in progress on this tree
uint32_t count_of_optimize_in_progress_read_from_disk; // the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
MSN msn_at_start_of_last_completed_optimize; // all messages before this msn have been applied to leaf nodes
};
struct brt {
......@@ -526,10 +523,9 @@ extern void toku_brtnode_pe_est_callback(void* brtnode_pv, long* bytes_freed_est
extern int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR old_attr, PAIR_ATTR* new_attr, void *extraargs);
extern BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs);
int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAIR_ATTR* sizep);
extern int toku_brtnode_cleaner_callback (void* brtnode_pv, BLOCKNUM blocknum, u_int32_t fullhash, void* extraargs);
extern int toku_brt_alloc_init_header(BRT t, TOKUTXN txn);
extern int toku_read_brt_header_and_store_in_cachefile (BRT brt, CACHEFILE cf, LSN max_acceptable_lsn, struct brt_header **header, BOOL* was_open);
extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt, u_int32_t *root_hash);
extern CACHEKEY* toku_calculate_root_offset_pointer (struct brt_header* h, u_int32_t *root_hash);
static const BRTNODE null_brtnode=0;
......@@ -716,15 +712,31 @@ unsigned int toku_brtnode_which_child(BRTNODE node, const DBT *k,
DESCRIPTOR desc, brt_compare_func cmp)
__attribute__((__warn_unused_result__));
/**
* Finds the next child for HOT to flush to, given that everything up to
* and including k has been flattened.
*
* If k falls between pivots in node, then we return the childnum where k
* lies.
*
* If k is equal to some pivot, then we return the next (to the right)
* childnum.
*/
unsigned int toku_brtnode_hot_next_child(BRTNODE node,
const DBT *k,
DESCRIPTOR desc,
brt_compare_func cmp);
/* Stuff for testing */
// toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called.
void toku_testsetup_initialize(void);
int toku_testsetup_leaf(BRT brt, BLOCKNUM *);
int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens);
int toku_testsetup_nonleaf (BRT brt, int height, BLOCKNUM *diskoff, int n_children, BLOCKNUM *children, char **keys, int *keylens);
int toku_testsetup_root(BRT brt, BLOCKNUM);
int toku_testsetup_get_sersize(BRT brt, BLOCKNUM); // Return the size on disk.
int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM, char *key, int keylen, char *val, int vallen);
int toku_testsetup_insert_to_nonleaf (BRT brt, BLOCKNUM, enum brt_msg_type, char *key, int keylen, char *val, int vallen);
void toku_pin_node_with_min_bfe(BRTNODE* node, BLOCKNUM b, BRT t);
// These two go together to do lookups in a brtnode using the keys in a command.
struct cmd_leafval_heaviside_extra {
......@@ -799,28 +811,6 @@ struct brt_status {
uint64_t search_root_retries; // number of searches that required the root node to be fetched more than once
uint64_t search_tries_gt_height; // number of searches that required more tries than the height of the tree
uint64_t search_tries_gt_heightplus3; // number of searches that required more tries than the height of the tree plus three
uint64_t cleaner_total_nodes; // total number of nodes whose buffers are potentially flushed by cleaner thread
uint64_t cleaner_h1_nodes; // number of nodes of height one whose message buffers are flushed by cleaner thread
uint64_t cleaner_hgt1_nodes; // number of nodes of height > 1 whose message buffers are flushed by cleaner thread
uint64_t cleaner_empty_nodes; // number of nodes that are selected by cleaner, but whose buffers are empty
uint64_t cleaner_nodes_dirtied; // number of nodes that are made dirty by the cleaner thread
uint64_t cleaner_max_buffer_size; // max number of bytes in message buffer flushed by cleaner thread
uint64_t cleaner_min_buffer_size;
uint64_t cleaner_total_buffer_size;
uint64_t cleaner_max_buffer_workdone; // max workdone value of any message buffer flushed by cleaner thread
uint64_t cleaner_min_buffer_workdone;
uint64_t cleaner_total_buffer_workdone;
uint64_t cleaner_num_leaves_unmerged; // number of leaves left unmerged by the cleaner thread
uint64_t flush_total; // total number of flushes done by flusher threads or cleaner threads
uint64_t flush_in_memory; // number of in memory flushes
uint64_t flush_needed_io; // number of flushes that had to read a child (or part) off disk
uint64_t flush_cascades; // number of flushes that triggered another flush in the child
uint64_t flush_cascades_1; // number of flushes that triggered 1 cascading flush
uint64_t flush_cascades_2; // number of flushes that triggered 2 cascading flushes
uint64_t flush_cascades_3; // number of flushes that triggered 3 cascading flushes
uint64_t flush_cascades_4; // number of flushes that triggered 4 cascading flushes
uint64_t flush_cascades_5; // number of flushes that triggered 5 cascading flushes
uint64_t flush_cascades_gt_5; // number of flushes that triggered more than 5 cascading flushes
uint64_t disk_flush_leaf; // number of leaf nodes flushed to disk, not for checkpoint
uint64_t disk_flush_nonleaf; // number of nonleaf nodes flushed to disk, not for checkpoint
uint64_t disk_flush_leaf_for_checkpoint; // number of leaf nodes flushed to disk for checkpoint
......@@ -829,13 +819,8 @@ struct brt_status {
uint64_t create_nonleaf; // number of nonleaf nodes created
uint64_t destroy_leaf; // number of leaf nodes destroyed
uint64_t destroy_nonleaf; // number of nonleaf nodes destroyed
uint64_t split_leaf; // number of leaf nodes split
uint64_t split_nonleaf; // number of nonleaf nodes split
uint64_t merge_leaf; // number of times leaf nodes are merged
uint64_t merge_nonleaf; // number of times nonleaf nodes are merged
uint64_t dirty_leaf; // number of times leaf nodes are dirtied when previously clean
uint64_t dirty_nonleaf; // number of times nonleaf nodes are dirtied when previously clean
uint64_t balance_leaf; // number of times a leaf node is balanced inside brt
uint64_t msg_bytes_in; // how many bytes of messages injected at root (for all trees)
uint64_t msg_bytes_out; // how many bytes of messages flushed from h1 nodes to leaves
uint64_t msg_bytes_curr; // how many bytes of messages currently in trees (estimate)
......@@ -865,9 +850,6 @@ struct brt_status {
void toku_brt_get_status(BRT_STATUS);
void
brtleaf_split (struct brt_header* h, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes, BRT_STATUS brt_status);
void
brt_leaf_apply_cmd_once (
BRTNODE leafnode,
......@@ -906,6 +888,18 @@ void toku_apply_cmd_to_leaf(
OMT live_list_reverse
);
void brtnode_put_cmd (
brt_compare_func compare_fun,
brt_update_func update_fun,
DESCRIPTOR desc,
BRTNODE node,
BRT_MSG cmd,
bool is_fresh,
OMT snapshot_txnids,
OMT live_list_reverse
);
void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created);
// Reset the root_xid_that_created field to the given value.
// This redefines which xid created the dictionary.
......@@ -913,6 +907,10 @@ void toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created);
void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
void toku_brt_header_note_hot_begin(BRT brt);
void toku_brt_header_note_hot_complete(BRT brt, BOOL success, MSN msn_at_start_of_hot);
C_END
#endif
......@@ -1815,6 +1815,11 @@ serialize_brt_header_min_size (u_int32_t version) {
switch(version) {
case BRT_LAYOUT_VERSION_18:
size += sizeof(uint64_t); // time_of_last_optimize_begin
size += sizeof(uint64_t); // time_of_last_optimize_end
size += sizeof(uint32_t); // count_of_optimize_in_progress
size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
case BRT_LAYOUT_VERSION_17:
size += 16;
invariant(sizeof(STAT64INFO_S) == 16);
......@@ -1891,6 +1896,10 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h,
wbuf_ulonglong(wbuf, h->time_of_last_verification);
wbuf_ulonglong(wbuf, h->checkpoint_staging_stats.numrows);
wbuf_ulonglong(wbuf, h->checkpoint_staging_stats.numbytes);
wbuf_ulonglong(wbuf, h->time_of_last_optimize_begin);
wbuf_ulonglong(wbuf, h->time_of_last_optimize_end);
wbuf_int(wbuf, h->count_of_optimize_in_progress);
wbuf_MSN(wbuf, h->msn_at_start_of_last_completed_optimize);
u_int32_t checksum = x1764_finish(&wbuf->checksum);
wbuf_int(wbuf, checksum);
lazy_assert(wbuf->ndone == wbuf->size);
......@@ -2143,7 +2152,6 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
}
h->root = rbuf_blocknum(&rc);
h->root_hash.valid = FALSE;
h->flags = rbuf_int(&rc);
h->layout_version_original = rbuf_int(&rc);
h->build_id_original = rbuf_int(&rc);
......@@ -2161,10 +2169,15 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
h->basementnodesize = rbuf_int(&rc);
h->time_of_last_verification = rbuf_ulonglong(&rc);
}
if (h->layout_version >= BRT_LAYOUT_VERSION_17) {
if (h->layout_version >= BRT_LAYOUT_VERSION_18) {
h->on_disk_stats.numrows = rbuf_ulonglong(&rc);
h->on_disk_stats.numbytes = rbuf_ulonglong(&rc);
h->in_memory_stats = h->on_disk_stats;
h->time_of_last_optimize_begin = rbuf_ulonglong(&rc);
h->time_of_last_optimize_end = rbuf_ulonglong(&rc);
h->count_of_optimize_in_progress = rbuf_int(&rc);
h->count_of_optimize_in_progress_read_from_disk = h->count_of_optimize_in_progress;
h->msn_at_start_of_last_completed_optimize = rbuf_msn(&rc);
}
(void)rbuf_int(&rc); //Read in checksum and ignore (already verified).
......@@ -2219,7 +2232,8 @@ deserialize_brtheader_versioned (int fd, struct rbuf *rb, struct brt_header **br
case BRT_LAYOUT_VERSION_14:
h->basementnodesize = 128*1024; // basement nodes added in v15
//fall through on purpose
case BRT_LAYOUT_VERSION_17:
case BRT_LAYOUT_VERSION_18:
case BRT_LAYOUT_VERSION_17: // version 17 never released to customers
case BRT_LAYOUT_VERSION_16: // version 16 never released to customers
case BRT_LAYOUT_VERSION_15: // this will not properly support version 15, we'll fix that on upgrade.
invariant(h->layout_version == BRT_LAYOUT_VERSION);
......
......@@ -6,10 +6,11 @@
#include "includes.h"
#include "ule.h"
#include <brt-cachetable-wrappers.h>
#include <brt-flusher.h>
// dummymsn needed to simulate msn because messages are injected at a lower level than toku_brt_root_put_cmd()
#define MIN_DUMMYMSN ((MSN) {(uint64_t)1<<48})
#define MIN_DUMMYMSN ((MSN) {(uint64_t)100000000000})
static MSN dummymsn;
static int testsetup_initialized = 0;
......@@ -31,13 +32,21 @@ next_dummymsn(void) {
BOOL ignore_if_was_already_open;
int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum) {
int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens) {
BRTNODE node;
assert(testsetup_initialized);
int r = toku_read_brt_header_and_store_in_cachefile(brt, brt->cf, MAX_LSN, &brt->h, &ignore_if_was_already_open);
if (r!=0) return r;
toku_create_new_brtnode(brt, &node, 0, 1);
BP_STATE(node,0) = PT_AVAIL;
toku_create_new_brtnode(brt, &node, 0, n_children);
int i;
for (i=0; i<n_children; i++) {
BP_STATE(node,i) = PT_AVAIL;
}
for (i=0; i+1<n_children; i++) {
node->childkeys[i] = kv_pair_malloc(keys[i], keylens[i], 0, 0);
node->totalchildkeylens += keylens[i];
}
*blocknum = node->thisnodename;
toku_unpin_brtnode(brt, node);
......@@ -71,7 +80,6 @@ int toku_testsetup_root(BRT brt, BLOCKNUM blocknum) {
int r = toku_read_brt_header_and_store_in_cachefile(brt, brt->cf, MAX_LSN, &brt->h, &ignore_if_was_already_open);
if (r!=0) return r;
brt->h->root = blocknum;
brt->h->root_hash.valid = FALSE;
return 0;
}
......@@ -131,55 +139,22 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
toku_verify_or_set_counts(node);
assert(node->height==0);
size_t newlesize;
LEAFENTRY leafentry;
OMTVALUE storeddatav;
u_int32_t idx;
DBT keydbt,valdbt;
MSN msn = next_dummymsn();
BRT_MSG_S cmd = {BRT_INSERT, msn, xids_get_root_xids(),
.u.id={toku_fill_dbt(&keydbt, key, keylen),
toku_fill_dbt(&valdbt, val, vallen)}};
//Generate a leafentry (committed insert key,val)
uint childnum = toku_brtnode_which_child(node,
&keydbt,
&brt->h->descriptor, brt->compare_fun);
BASEMENTNODE bn = BLB(node, childnum);
void * maybe_free = 0;
{
int64_t ignoreme;
r = apply_msg_to_leafentry(&cmd, NULL, //No old leafentry
&newlesize, &leafentry,
bn->buffer, &bn->buffer_mempool, &maybe_free,
NULL, NULL, &ignoreme);
assert(r==0);
}
struct cmd_leafval_heaviside_extra be = {brt->compare_fun, &brt->h->descriptor, &keydbt};
r = toku_omt_find_zero(BLB_BUFFER(node, 0), toku_cmd_leafval_heaviside, &be, &storeddatav, &idx);
if (r==0) {
LEAFENTRY storeddata=storeddatav;
// It's already there. So now we have to remove it and put the new one back in.
BLB_NBYTESINBUF(node, 0) -= leafentry_disksize(storeddata);
toku_free(storeddata);
// Now put the new kv in.
toku_omt_set_at(BLB_BUFFER(node, 0), leafentry, idx);
} else {
r = toku_omt_insert(BLB_BUFFER(node, 0), leafentry, toku_cmd_leafval_heaviside, &be, 0);
assert(r==0);
}
// hack to get tests passing. These tests should not be directly inserting into buffers
BLB(node, 0)->max_msn_applied = msn;
BLB_NBYTESINBUF(node, 0) += newlesize;
node->dirty=1;
brtnode_put_cmd (
brt->h->compare_fun,
brt->h->update_fun,
&brt->h->descriptor,
node,
&cmd,
true,
NULL,
NULL
);
toku_verify_or_set_counts(node);
......@@ -194,6 +169,23 @@ testhelper_string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
return strcmp(s, t);
}
void
toku_pin_node_with_min_bfe(BRTNODE* node, BLOCKNUM b, BRT t)
{
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->h);
toku_pin_brtnode_off_client_thread(
t->h,
b,
toku_cachetable_hash(t->h->cf, b),
&bfe,
0,
NULL,
node
);
}
int toku_testsetup_insert_to_nonleaf (BRT brt, BLOCKNUM blocknum, enum brt_msg_type cmdtype, char *key, int keylen, char *val, int vallen) {
void *node_v;
int r;
......
......@@ -12,6 +12,7 @@
*/
#include "includes.h"
#include <brt-flusher.h>
static int
compare_pairs (BRT brt, struct kv_pair *a, struct kv_pair *b) {
......@@ -388,7 +389,7 @@ int
toku_verify_brt_with_progress (BRT brt, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_on_going) {
assert(brt->h);
u_int32_t root_hash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &root_hash);
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &root_hash);
int r = toku_verify_brtnode(brt, ZERO_MSN, ZERO_MSN, *rootp, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going);
if (r == 0) {
toku_brtheader_lock(brt->h);
......
......@@ -173,8 +173,8 @@ get_leaf_num_entries(BRTNODE node) {
static enum reactivity
get_leaf_reactivity (BRTNODE node) {
enum reactivity re = RE_STABLE;
toku_assert_entire_node_in_memory(node);
assert(node->height==0);
if (node->dirty) {
unsigned int size = toku_serialize_brtnode_size(node);
if (size > node->nodesize && get_leaf_num_entries(node) > 1) {
re = RE_FISSIBLE;
......@@ -182,7 +182,6 @@ get_leaf_reactivity (BRTNODE node) {
else if ((size*4) < node->nodesize && !BLB_SEQINSERT(node, node->n_children-1)) {
re = RE_FUSIBLE;
}
}
return re;
}
......@@ -243,17 +242,6 @@ static inline void add_to_brt_status(u_int64_t* val, u_int64_t data) {
(*val) += data;
}
static void brtnode_put_cmd (
brt_compare_func compare_fun,
brt_update_func update_fun,
DESCRIPTOR desc,
BRTNODE node,
BRT_MSG cmd,
bool is_fresh,
OMT snapshot_txnids,
OMT live_list_reverse
);
static void brt_verify_flags(BRT brt, BRTNODE node) {
assert(brt->flags == node->flags);
}
......@@ -766,16 +754,6 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT
return 0;
}
int
toku_brtnode_cleaner_callback(
void *brtnode_pv,
BLOCKNUM blocknum,
u_int32_t fullhash,
void *extraargs)
{
return toku_brtnode_cleaner_callback_internal(brtnode_pv, blocknum, fullhash, extraargs, &brt_status);
}
static inline void
brt_status_update_partial_fetch(u_int8_t state)
{
......@@ -1852,6 +1830,32 @@ unsigned int toku_brtnode_which_child(BRTNODE node, const DBT *k,
#endif
}
// Used for HOT.
unsigned int
toku_brtnode_hot_next_child(BRTNODE node,
const DBT *k,
DESCRIPTOR desc,
brt_compare_func cmp) {
int low = 0;
int hi = node->n_children - 1;
int mi;
while (low < hi) {
mi = (low + hi) / 2;
int r = brt_compare_pivot(desc, cmp, k, node->childkeys[mi]);
if (r > 0) {
low = mi + 1;
} else if (r < 0) {
hi = mi;
} else {
// if they were exactly equal, then we want the sub-tree under
// the next pivot.
return mi + 1;
}
}
invariant(low == hi);
return low;
}
// TODO Use this function to clean up other places where bits of messages are passed around
// such as toku_bnc_insert_msg() and the call stack above it.
static size_t
......@@ -1969,9 +1973,9 @@ brt_handle_maybe_reactive_root (BRT brt, CACHEKEY *rootp, BRTNODE *nodep) {
// in just node. That would be correct.
//
if (node->height==0) {
brtleaf_split(brt->h, node, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &brt_status);
brtleaf_split(brt->h, node, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
} else {
brt_nonleaf_split(brt->h, node, &nodea, &nodeb, &splitk, 0, NULL, &brt_status);
brt_nonleaf_split(brt->h, node, &nodea, &nodeb, &splitk, 0, NULL);
}
brt_init_new_root(brt, nodea, nodeb, splitk, rootp, nodep);
return;
......@@ -1993,6 +1997,7 @@ toku_bnc_flush_to_child(
)
{
assert(toku_fifo_n_entries(bnc->buffer)>0);
assert(bnc);
OMT snapshot_txnids, live_list_reverse;
TOKULOGGER logger = toku_cachefile_logger(cf);
if (child->height == 0 && logger) {
......@@ -2050,7 +2055,7 @@ void bring_node_fully_into_memory(BRTNODE node, struct brt_header* h)
}
}
static void
void
brtnode_put_cmd (
brt_compare_func compare_fun,
brt_update_func update_fun,
......@@ -2229,35 +2234,9 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
}
}
static void compute_and_fill_remembered_hash (BRT brt) {
struct remembered_hash *rh = &brt->h->root_hash;
assert(brt->cf); // if cf is null, we'll be hosed.
rh->valid = TRUE;
rh->fnum=toku_cachefile_filenum(brt->cf);
rh->root=brt->h->root;
rh->fullhash = toku_cachetable_hash(brt->cf, rh->root);
}
static u_int32_t get_roothash (BRT brt) {
struct remembered_hash *rh = &brt->h->root_hash;
BLOCKNUM root = brt->h->root;
// compare cf first, since cf is NULL for invalid entries.
assert(rh);
//printf("v=%d\n", rh->valid);
if (rh->valid) {
//printf("f=%d\n", rh->fnum.fileid);
//printf("cf=%d\n", toku_cachefile_filenum(brt->cf).fileid);
if (rh->fnum.fileid == toku_cachefile_filenum(brt->cf).fileid)
if (rh->root.b == root.b)
return rh->fullhash;
}
compute_and_fill_remembered_hash(brt);
return rh->fullhash;
}
CACHEKEY* toku_calculate_root_offset_pointer (BRT brt, u_int32_t *roothash) {
*roothash = get_roothash(brt);
return &brt->h->root;
CACHEKEY* toku_calculate_root_offset_pointer (struct brt_header* h, u_int32_t *roothash) {
*roothash = toku_cachetable_hash(h->cf, h->root);
return &h->root;
}
int
......@@ -2272,7 +2251,7 @@ toku_brt_root_put_cmd (BRT brt, BRT_MSG_S * cmd)
//assert(0==toku_cachetable_assert_all_unpinned(brt->cachetable));
assert(brt->h);
u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// get the root node
struct brtnode_fetch_extra bfe;
......@@ -2300,7 +2279,7 @@ toku_brt_root_put_cmd (BRT brt, BRT_MSG_S * cmd)
// if we call flush_some_child, then that function unpins the root
// otherwise, we unpin ourselves
if (node->height > 0 && toku_brt_nonleaf_is_gorged(node)) {
flush_node_on_background_thread(brt, node, &brt_status);
flush_node_on_background_thread(brt, node);
}
else {
toku_unpin_brtnode(brt, node); // unpin root
......@@ -2406,7 +2385,6 @@ brt_optimize (BRT brt, BOOL upgrade) {
return r;
}
int
toku_brt_load(BRT brt, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *load_lsn) {
int r = 0;
......@@ -2892,7 +2870,6 @@ brt_init_header_partial (BRT t, TOKUTXN txn) {
t->h->in_memory_stats = ZEROSTATS;
t->h->on_disk_stats = ZEROSTATS;
t->h->checkpoint_staging_stats = ZEROSTATS;
compute_and_fill_remembered_hash(t);
BLOCKNUM root = t->h->root;
if ((r=setup_initial_brt_root_node(t, root))!=0) { return r; }
......@@ -5055,7 +5032,7 @@ toku_brt_search (BRT brt, brt_search_t *search, BRT_GET_CALLBACK_FUNCTION getf,
assert(brt->h);
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE node;
......@@ -5670,7 +5647,7 @@ toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less_p, u_int64_t *equal_p, u_i
{
u_int64_t less = 0, equal = 0, greater = 0;
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, brt->h); // read pivot keys but not message buffers
......@@ -5831,7 +5808,7 @@ int toku_dump_brt (FILE *f, BRT brt) {
assert(brt->h);
u_int32_t fullhash = 0;
toku_dump_translation_table(f, brt->h->blocktable);
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0);
}
......@@ -5891,8 +5868,7 @@ int toku_brt_init(void (*ydb_lock_callback)(void),
r = toku_brt_serialize_init();
if (r==0)
callback_db_set_brt = db_set_brt;
brt_status.cleaner_min_buffer_size = UINT64_MAX;
brt_status.cleaner_min_buffer_workdone = UINT64_MAX;
toku_brt_flusher_status_init();
return r;
}
......@@ -6106,7 +6082,7 @@ BOOL toku_brt_is_empty_fast (BRT brt)
// messages and leafentries would all optimize away and that the tree is empty, but we'll say it is nonempty.
{
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE node;
//assert(fullhash == toku_cachetable_hash(brt->cf, *rootp));
{
......@@ -6173,6 +6149,51 @@ toku_reset_root_xid_that_created(BRT brt, TXNID new_root_xid_that_created) {
toku_brtheader_unlock (h);
}
// Purpose: set fields in brt_header to capture accountability info for start of HOT optimize.
// Requires: ydb lock is held.
// Note: HOT accountability variables in header are modified only while holding header lock.
// (Header lock is really needed for touching the dirty bit, but it's useful and
// convenient here for keeping the HOT variables threadsafe.)
void
toku_brt_header_note_hot_begin(BRT brt) {
struct brt_header *h = brt->h;
time_t now = time(NULL);
// hold lock around setting and clearing of dirty bit
// (see cooperative use of dirty bit in toku_brtheader_begin_checkpoint())
toku_brtheader_lock(h);
h->time_of_last_optimize_begin = now;
h->count_of_optimize_in_progress++;
h->dirty = 1;
toku_brtheader_unlock(h);
}
// Purpose: set fields in brt_header to capture accountability info for end of HOT optimize.
// Requires: ydb lock is held.
// Note: See note for toku_brt_header_note_hot_begin().
void
toku_brt_header_note_hot_complete(BRT brt, BOOL success, MSN msn_at_start_of_hot) {
struct brt_header *h = brt->h;
time_t now = time(NULL);
toku_brtheader_lock(h);
h->count_of_optimize_in_progress--;
if (success) {
h->time_of_last_optimize_end = now;
h->msn_at_start_of_last_completed_optimize = msn_at_start_of_hot;
// If we just successfully completed an optimization and no other thread is performing
// an optimization, then the number of optimizations in progress is zero.
// If there was a crash during a HOT optimization, this is how count_of_optimize_in_progress
// would be reset to zero on the disk after recovery from that crash.
if (h->count_of_optimize_in_progress == h->count_of_optimize_in_progress_read_from_disk)
h->count_of_optimize_in_progress = 0;
}
h->dirty = 1;
toku_brtheader_unlock(h);
}
void
toku_brt_header_init(struct brt_header *h,
BLOCKNUM root_blocknum_on_disk, LSN checkpoint_lsn, TXNID root_xid_that_created, uint32_t target_nodesize, uint32_t target_basementnodesize) {
......
......@@ -22,6 +22,7 @@ enum brt_layout_version_e {
BRT_LAYOUT_VERSION_16 = 16, // Dr. No: No subtree estimates, partition layout information represented more transparently.
// ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only
BRT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to brt_header
BRT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to brt_header
BRT_NEXT_VERSION, // the version after the current version
BRT_LAYOUT_VERSION = BRT_NEXT_VERSION-1, // A hack so I don't have to change this line.
BRT_LAYOUT_MIN_SUPPORTED_VERSION = BRT_LAYOUT_VERSION_13, // Minimum version supported
......
......@@ -466,6 +466,19 @@ toku_cachetable_set_lock_unlock_for_io (CACHETABLE ct, void (*ydb_lock_callback)
ct->ydb_unlock_callback = ydb_unlock_callback;
}
void
toku_cachetable_call_ydb_lock(CACHEFILE cf){
if (cf->cachetable->ydb_lock_callback) {
assert(cf->cachetable->ydb_unlock_callback);
cf->cachetable->ydb_lock_callback();
}
}
void
toku_cachetable_call_ydb_unlock(CACHEFILE cf){
if (cf->cachetable->ydb_unlock_callback) cf->cachetable->ydb_unlock_callback();
}
//
// Increment the reference count
// MUST HOLD cachetable lock
......
......@@ -523,6 +523,8 @@ char * toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env
void toku_cachetable_set_lock_unlock_for_io (CACHETABLE ct, void (*ydb_lock_callback)(void), void (*ydb_unlock_callback)(void));
// Effect: When we do I/O we may need to release locks (e.g., the ydb lock). These functions release the lock acquire the lock.
void toku_cachetable_call_ydb_lock(CACHEFILE cf);
void toku_cachetable_call_ydb_unlock(CACHEFILE cf);
void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra);
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ifndef TOKU_DBUFIO_H
#define TOKU_DBUFIO_H
#ident "$Id: queue.c 20104 2010-05-12 17:22:40Z bkuszmaul $"
#ident "$Id$"
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#include <toku_portability.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id: mempool.c 19902 2010-05-06 20:41:32Z bkuszmaul $"
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
......
#ifndef _TOKU_MEMPOOL_H
#define _TOKU_MEMPOOL_H
#ident "$Id: mempool.h 19902 2010-05-06 20:41:32Z bkuszmaul $"
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ifndef TOKU_NBMUTEX_H
#define TOKU_NBMUTEX_H
#ident "$Id: rwlock.h 32279 2011-06-29 13:51:57Z bkuszmaul $"
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id: brt-serialize-test.c 36450 2011-11-02 20:10:18Z bperlman $"
#ident "$Id$"
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
#include "test.h"
......
#ident "$Id: cachetable-simple-verify.c 36579 2011-11-04 20:02:04Z zardosht $"
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#include "includes.h"
#include "test.h"
......
......@@ -136,7 +136,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// set the new root to point to the new tree
*rootp = newroot->thisnodename;
......
......@@ -126,7 +126,7 @@ test_msnfilter(int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE newroot = make_node(brt, 0);
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* Test an overflow condition on the leaf. See #632. */
......
......@@ -34,7 +34,7 @@ doit (void) {
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
r = toku_testsetup_leaf(t, &nodea);
r = toku_testsetup_leaf(t, &nodea, 1, NULL, NULL);
assert(r==0);
r = toku_testsetup_nonleaf(t, 1, &nodeb, 1, &nodea, 0, 0);
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "$Id$"
/* The goal of this test. Make sure that inserts stay behind deletes. */
#include "test.h"
#include "includes.h"
#include <brt-cachetable-wrappers.h>
#include "brt-flusher.h"
#include "checkpoint.h"
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
CACHETABLE ct;
BRT brt;
int fnamelen;
char *fname;
static int update_func(
DB* UU(db),
const DBT* key,
const DBT* old_val,
const DBT* UU(extra),
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra)
{
DBT new_val;
assert(old_val->size > 0);
if (verbose) {
printf("applying update to %s\n", (char *)key->data);
}
toku_init_dbt(&new_val);
set_val(&new_val, set_extra);
return 0;
}
static void
doit (void) {
BLOCKNUM node_leaf;
BLOCKNUM node_internal, node_root;
int r;
fnamelen = strlen(__FILE__) + 20;
fname = toku_malloc(fnamelen);
assert(fname!=0);
snprintf(fname, fnamelen, "%s.brt", __FILE__);
r = toku_brt_create_cachetable(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); assert(r==0);
unlink(fname);
r = toku_open_brt(fname, 1, &brt, NODESIZE, NODESIZE/2, ct, null_txn, toku_builtin_compare_fun, null_db);
assert(r==0);
toku_free(fname);
brt->update_fun = update_func;
brt->h->update_fun = update_func;
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
char* pivots[1];
pivots[0] = toku_strdup("kkkkk");
int pivot_len = 6;
r = toku_testsetup_leaf(brt, &node_leaf, 2, pivots, &pivot_len);
assert(r==0);
r = toku_testsetup_nonleaf(brt, 1, &node_internal, 1, &node_leaf, 0, 0);
assert(r==0);
r = toku_testsetup_nonleaf(brt, 2, &node_root, 1, &node_internal, 0, 0);
assert(r==0);
r = toku_testsetup_root(brt, node_root);
assert(r==0);
//
// at this point we have created a tree with a root, an internal node,
// and two leaf nodes, the pivot being "kkkkk"
//
// now we insert a row into each leaf node
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"a", // key
2, // keylen
"aa",
3
);
assert(r==0);
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"z", // key
2, // keylen
"zz",
3
);
assert(r==0);
char filler[400];
memset(filler, 0, sizeof(filler));
// now we insert filler data so that the rebalance
// keeps it at two nodes
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"b", // key
2, // keylen
filler,
sizeof(filler)
);
assert(r==0);
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"y", // key
2, // keylen
filler,
sizeof(filler)
);
assert(r==0);
//
// now insert a bunch of dummy delete messages
// into the internal node, to get its cachepressure size up
//
for (int i = 0; i < 100000; i++) {
r = toku_testsetup_insert_to_nonleaf (
brt,
node_internal,
BRT_DELETE_ANY,
"jj", // this key does not exist, so its message application should be a no-op
3,
NULL,
0
);
assert(r==0);
}
//
// now insert a broadcast message into the root
//
r = toku_testsetup_insert_to_nonleaf (
brt,
node_root,
BRT_UPDATE_BROADCAST_ALL,
NULL,
0,
NULL,
0
);
assert(r==0);
// now lock and release the leaf node to make sure it is what we expect it to be.
BRTNODE node = NULL;
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_leaf,
toku_cachetable_hash(brt->h->cf, node_leaf),
&bfe,
0,
NULL,
&node
);
assert(node->dirty);
assert(node->n_children == 2);
assert(BP_STATE(node,0) == PT_AVAIL);
assert(BP_STATE(node,1) == PT_AVAIL);
toku_unpin_brtnode_off_client_thread(brt->h, node);
// now do a lookup on one of the keys, this should bring a leaf node up to date
DBT k;
struct check_pair pair = {2, "a", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair);
assert(r==0);
//
// pin the leaf one more time
// and make sure that one basement
// node is in memory and another is
// on disk
//
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_leaf,
toku_cachetable_hash(brt->h->cf, node_leaf),
&bfe,
0,
NULL,
&node
);
assert(node->dirty);
assert(node->n_children == 2);
assert(BP_STATE(node,0) == PT_AVAIL);
assert(BP_STATE(node,1) == PT_AVAIL);
toku_unpin_brtnode_off_client_thread(brt->h, node);
//
// now let us induce a clean on the internal node
//
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_internal,
toku_cachetable_hash(brt->h->cf, node_internal),
&bfe,
0,
NULL,
&node
);
assert(node->dirty);
// we expect that this flushes its buffer, that
// a merge is not done, and that the lookup
// of values "a" and "z" still works
r = toku_brtnode_cleaner_callback(
node,
node_internal,
toku_cachetable_hash(brt->h->cf, node_internal),
brt->h
);
// verify that node_internal's buffer is empty
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_internal,
toku_cachetable_hash(brt->h->cf, node_internal),
&bfe,
0,
NULL,
&node
);
// check that buffers are empty
assert(toku_bnc_nbytesinbuf(BNC(node, 0)) == 0);
toku_unpin_brtnode_off_client_thread(brt->h, node);
//
// now run a checkpoint to get everything clean,
// and to get the rebalancing to happen
//
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
// check that lookups on the two keys is still good
struct check_pair pair1 = {2, "a", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1);
assert(r==0);
struct check_pair pair2 = {2, "z", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2);
assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
default_parse_args(argc, argv);
doit();
return 0;
}
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "$Id$"
/* The goal of this test. Make sure that inserts stay behind deletes. */
#include "test.h"
#include "includes.h"
#include <brt-cachetable-wrappers.h>
#include "brt-flusher.h"
#include "checkpoint.h"
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
CACHETABLE ct;
BRT brt;
int fnamelen;
char *fname;
static int update_func(
DB* UU(db),
const DBT* key,
const DBT* old_val,
const DBT* UU(extra),
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra)
{
DBT new_val;
assert(old_val->size > 0);
if (verbose) {
printf("applying update to %s\n", (char *)key->data);
}
toku_init_dbt(&new_val);
set_val(&new_val, set_extra);
return 0;
}
static void
doit (void) {
BLOCKNUM node_leaf;
BLOCKNUM node_internal, node_root;
int r;
fnamelen = strlen(__FILE__) + 20;
fname = toku_malloc(fnamelen);
assert(fname!=0);
snprintf(fname, fnamelen, "%s.brt", __FILE__);
r = toku_brt_create_cachetable(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); assert(r==0);
unlink(fname);
r = toku_open_brt(fname, 1, &brt, NODESIZE, NODESIZE/2, ct, null_txn, toku_builtin_compare_fun, null_db);
assert(r==0);
toku_free(fname);
brt->update_fun = update_func;
brt->h->update_fun = update_func;
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
char* pivots[1];
pivots[0] = toku_strdup("kkkkk");
int pivot_len = 6;
r = toku_testsetup_leaf(brt, &node_leaf, 2, pivots, &pivot_len);
assert(r==0);
r = toku_testsetup_nonleaf(brt, 1, &node_internal, 1, &node_leaf, 0, 0);
assert(r==0);
r = toku_testsetup_nonleaf(brt, 2, &node_root, 1, &node_internal, 0, 0);
assert(r==0);
r = toku_testsetup_root(brt, node_root);
assert(r==0);
//
// at this point we have created a tree with a root, an internal node,
// and two leaf nodes, the pivot being "kkkkk"
//
// now we insert a row into each leaf node
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"a", // key
2, // keylen
"aa",
3
);
assert(r==0);
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"z", // key
2, // keylen
"zz",
3
);
assert(r==0);
char filler[400];
memset(filler, 0, sizeof(filler));
// now we insert filler data so that the rebalance
// keeps it at two nodes
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"b", // key
2, // keylen
filler,
sizeof(filler)
);
assert(r==0);
r = toku_testsetup_insert_to_leaf (
brt,
node_leaf,
"y", // key
2, // keylen
filler,
sizeof(filler)
);
assert(r==0);
//
// now insert a bunch of dummy delete messages
// into the internal node, to get its cachepressure size up
//
for (int i = 0; i < 100000; i++) {
r = toku_testsetup_insert_to_nonleaf (
brt,
node_internal,
BRT_DELETE_ANY,
"jj", // this key does not exist, so its message application should be a no-op
3,
NULL,
0
);
assert(r==0);
}
//
// now insert a broadcast message into the root
//
r = toku_testsetup_insert_to_nonleaf (
brt,
node_root,
BRT_UPDATE_BROADCAST_ALL,
NULL,
0,
NULL,
0
);
assert(r==0);
//
// now run a checkpoint to get everything clean
//
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
// now lock and release the leaf node to make sure it is what we expect it to be.
BRTNODE node = NULL;
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_leaf,
toku_cachetable_hash(brt->h->cf, node_leaf),
&bfe,
0,
NULL,
&node
);
assert(!node->dirty);
assert(node->n_children == 2);
// a hack to get the basement nodes evicted
for (int i = 0; i < 20; i++) {
PAIR_ATTR attr;
toku_brtnode_pe_callback(node, make_pair_attr(0xffffffff), &attr, NULL);
}
// this ensures that when we do the lookups below,
// that the data is read off disk
assert(BP_STATE(node,0) == PT_ON_DISK);
assert(BP_STATE(node,1) == PT_ON_DISK);
toku_unpin_brtnode_off_client_thread(brt->h, node);
// now do a lookup on one of the keys, this should bring a leaf node up to date
DBT k;
struct check_pair pair = {2, "a", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair);
assert(r==0);
//
// pin the leaf one more time
// and make sure that one basement
// node is in memory and another is
// on disk
//
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_leaf,
toku_cachetable_hash(brt->h->cf, node_leaf),
&bfe,
0,
NULL,
&node
);
assert(!node->dirty);
assert(node->n_children == 2);
assert(BP_STATE(node,0) == PT_AVAIL);
assert(BP_STATE(node,1) == PT_ON_DISK);
toku_unpin_brtnode_off_client_thread(brt->h, node);
//
// now let us induce a clean on the internal node
//
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_internal,
toku_cachetable_hash(brt->h->cf, node_internal),
&bfe,
0,
NULL,
&node
);
assert(!node->dirty);
// we expect that this flushes its buffer, that
// a merge is not done, and that the lookup
// of values "a" and "z" still works
r = toku_brtnode_cleaner_callback(
node,
node_internal,
toku_cachetable_hash(brt->h->cf, node_internal),
brt->h
);
// verify that node_internal's buffer is empty
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
brt->h,
node_internal,
toku_cachetable_hash(brt->h->cf, node_internal),
&bfe,
0,
NULL,
&node
);
// check that buffers are empty
assert(toku_bnc_nbytesinbuf(BNC(node, 0)) == 0);
toku_unpin_brtnode_off_client_thread(brt->h, node);
//
// now run a checkpoint to get everything clean,
// and to get the rebalancing to happen
//
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
// check that lookups on the two keys is still good
struct check_pair pair1 = {2, "a", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1);
assert(r==0);
struct check_pair pair2 = {2, "z", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "z", 2), lookup_checkf, &pair2);
assert(r==0);
r = toku_close_brt(brt, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
default_parse_args(argc, argv);
doit();
return 0;
}
......@@ -65,7 +65,7 @@ doit (int ksize __attribute__((__unused__))) {
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
for (i=0; i<BRT_FANOUT; i++) {
r=toku_testsetup_leaf(t, &cnodes[i]);
r=toku_testsetup_leaf(t, &cnodes[i], 1, NULL, NULL);
assert(r==0);
char key[KSIZE+10];
int keylen = 1+snprintf(key, KSIZE, "%08d%0*d", i*10000+1, KSIZE-9, 0);
......
#ident "$Id: test-del-inorder.c 32975 2011-07-11 23:42:51Z leifwalsh $"
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "$Id: test-merges-on-cleaner.c 38542 2012-01-06 14:06:23Z christianrober $"
/* The goal of this test. Make sure that inserts stay behind deletes. */
#include "test.h"
#include "includes.h"
#include <brt-cachetable-wrappers.h>
#include "brt-flusher.h"
#include "checkpoint.h"
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
......@@ -58,9 +62,9 @@ doit (void) {
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
r = toku_testsetup_leaf(brt, &node_leaf[0]);
r = toku_testsetup_leaf(brt, &node_leaf[0], 1, NULL, NULL);
assert(r==0);
r = toku_testsetup_leaf(brt, &node_leaf[1]);
r = toku_testsetup_leaf(brt, &node_leaf[1], 1, NULL, NULL);
assert(r==0);
char* pivots[1];
......@@ -70,7 +74,7 @@ doit (void) {
r = toku_testsetup_nonleaf(brt, 1, &node_internal, 2, node_leaf, pivots, &pivot_len);
assert(r==0);
r = toku_testsetup_nonleaf(brt, 1, &node_root, 1, &node_internal, 0, 0);
r = toku_testsetup_nonleaf(brt, 2, &node_root, 1, &node_internal, 0, 0);
assert(r==0);
r = toku_testsetup_root(brt, node_root);
......@@ -132,16 +136,21 @@ doit (void) {
);
assert(r==0);
//
// now let us induce a clean on the internal node
//
BRTNODE node;
toku_pin_node_with_min_bfe(&node, node_leaf[1], brt);
// hack to get merge going
BLB_SEQINSERT(node, node->n_children-1) = FALSE;
toku_unpin_brtnode(brt, node);
// now do a lookup on one of the keys, this should bring a leaf node up to date
DBT k;
struct check_pair pair = {2, "a", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair);
assert(r==0);
//
// now let us induce a clean on the internal node
//
BRTNODE node;
struct brtnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, brt->h);
toku_pin_brtnode_off_client_thread(
......@@ -175,12 +184,19 @@ doit (void) {
NULL,
&node
);
// check that no merge happened
assert(node->n_children == 2);
// check that merge happened
assert(node->n_children == 1);
// check that buffers are empty
assert(toku_bnc_nbytesinbuf(BNC(node, 0)) == 0);
assert(toku_bnc_nbytesinbuf(BNC(node, 1)) == 0);
toku_unpin_brtnode_off_client_thread(brt->h, node);
//
// now run a checkpoint to get everything clean,
// and to get the rebalancing to happen
//
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
// check that lookups on the two keys is still good
struct check_pair pair1 = {2, "a", 0, NULL, 0};
r = toku_brt_lookup(brt, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair1);
......@@ -198,6 +214,7 @@ doit (void) {
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
default_parse_args(argc, argv);
doit();
return 0;
}
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* The goal of this test. Make sure that inserts stay behind deletes. */
#include "test.h"
#include "includes.h"
#include <brt-cachetable-wrappers.h>
#include "brt-flusher.h"
#include "brt-flusher-internal.h"
#include "checkpoint.h"
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
CACHETABLE ct;
BRT t;
int fnamelen;
char *fname;
int curr_child_to_flush;
int num_flushes_called;
static int child_to_flush(struct brt_header* UU(h), BRTNODE parent, void* UU(extra)) {
// internal node has 2 children
if (parent->height == 1) {
assert(parent->n_children == 2);
return curr_child_to_flush;
}
// root has 1 child
else if (parent->height == 2) {
assert(parent->n_children == 1);
return 0;
}
else {
assert(FALSE);
}
return curr_child_to_flush;
}
static void update_status(BRTNODE UU(child), int UU(dirtied), void* UU(extra)) {
num_flushes_called++;
}
static bool
dont_destroy_bn(void* UU(extra))
{
return false;
}
static void merge_should_not_happen(struct flusher_advice* UU(fa),
struct brt_header* UU(h),
BRTNODE UU(parent),
int UU(childnum),
BRTNODE UU(child),
void* UU(extra))
{
assert(FALSE);
}
static bool recursively_flush_should_not_happen(BRTNODE UU(child), void* UU(extra)) {
assert(FALSE);
}
static bool always_flush(BRTNODE UU(child), void* UU(extra)) {
return true;
}
static void
doit (void) {
BLOCKNUM node_internal, node_root;
BLOCKNUM node_leaf[2];
int r;
fnamelen = strlen(__FILE__) + 20;
fname = toku_malloc(fnamelen);
assert(fname!=0);
snprintf(fname, fnamelen, "%s.brt", __FILE__);
r = toku_brt_create_cachetable(&ct, 500*1024*1024, ZERO_LSN, NULL_LOGGER); assert(r==0);
unlink(fname);
r = toku_open_brt(fname, 1, &t, NODESIZE, NODESIZE/2, ct, null_txn, toku_builtin_compare_fun, null_db);
assert(r==0);
toku_free(fname);
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
r = toku_testsetup_leaf(t, &node_leaf[0], 1, NULL, NULL);
assert(r==0);
r = toku_testsetup_leaf(t, &node_leaf[1], 1, NULL, NULL);
assert(r==0);
char* pivots[1];
pivots[0] = toku_strdup("kkkkk");
int pivot_len = 6;
r = toku_testsetup_nonleaf(t, 1, &node_internal, 2, node_leaf, pivots, &pivot_len);
assert(r==0);
r = toku_testsetup_nonleaf(t, 2, &node_root, 1, &node_internal, 0, 0);
assert(r==0);
r = toku_testsetup_root(t, node_root);
assert(r==0);
char filler[900];
memset(filler, 0, sizeof(filler));
// now we insert filler data so that a merge does not happen
r = toku_testsetup_insert_to_leaf (
t,
node_leaf[0],
"b", // key
2, // keylen
filler,
sizeof(filler)
);
assert(r==0);
r = toku_testsetup_insert_to_leaf (
t,
node_leaf[1],
"y", // key
2, // keylen
filler,
sizeof(filler)
);
assert(r==0);
// make buffers in internal node non-empty
r = toku_testsetup_insert_to_nonleaf(
t,
node_internal,
BRT_INSERT,
"a",
2,
NULL,
0
);
assert_zero(r);
r = toku_testsetup_insert_to_nonleaf(
t,
node_internal,
BRT_INSERT,
"z",
2,
NULL,
0
);
assert_zero(r);
//
// now run a checkpoint to get everything clean
//
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
// now with setup done, start the test
// test that if flush_some_child properly honors
// what we say and flushes the child we pick
BRTNODE node = NULL;
toku_pin_node_with_min_bfe(&node, node_internal, t);
toku_assert_entire_node_in_memory(node);
assert(node->n_children == 2);
assert(!node->dirty);
assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) > 0);
assert(toku_bnc_n_entries(node->bp[1].ptr.u.nonleaf) > 0);
struct flusher_advice fa;
flusher_advice_init(
&fa,
child_to_flush,
dont_destroy_bn,
recursively_flush_should_not_happen,
merge_should_not_happen,
update_status,
default_pick_child_after_split,
NULL
);
curr_child_to_flush = 0;
num_flushes_called = 0;
flush_some_child(t->h, node, &fa);
assert(num_flushes_called == 1);
toku_pin_node_with_min_bfe(&node, node_internal, t);
toku_assert_entire_node_in_memory(node);
assert(node->dirty);
assert(node->n_children == 2);
// child 0 should have empty buffer because it flushed
// child 1 should still have message in buffer
assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0);
assert(toku_bnc_n_entries(node->bp[1].ptr.u.nonleaf) > 0);
toku_unpin_brtnode(t, node);
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
toku_pin_node_with_min_bfe(&node, node_internal, t);
assert(!node->dirty);
curr_child_to_flush = 1;
num_flushes_called = 0;
flush_some_child(t->h, node, &fa);
assert(num_flushes_called == 1);
toku_pin_node_with_min_bfe(&node, node_internal, t);
assert(node->dirty);
toku_assert_entire_node_in_memory(node);
assert(node->n_children == 2);
// both buffers should be empty now
assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0);
assert(toku_bnc_n_entries(node->bp[1].ptr.u.nonleaf) == 0);
// now let's do a flush with an empty buffer, make sure it is ok
toku_unpin_brtnode(t, node);
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
toku_pin_node_with_min_bfe(&node, node_internal, t);
assert(!node->dirty);
curr_child_to_flush = 0;
num_flushes_called = 0;
flush_some_child(t->h, node, &fa);
assert(num_flushes_called == 1);
toku_pin_node_with_min_bfe(&node, node_internal, t);
assert(!node->dirty); // nothing was flushed, so node better not be dirty
toku_assert_entire_node_in_memory(node);
assert(node->n_children == 2);
// both buffers should be empty now
assert(toku_bnc_n_entries(node->bp[0].ptr.u.nonleaf) == 0);
assert(toku_bnc_n_entries(node->bp[1].ptr.u.nonleaf) == 0);
toku_unpin_brtnode(t, node);
// now let's start a flush from the root, that always recursively flushes
flusher_advice_init(
&fa,
child_to_flush,
dont_destroy_bn,
always_flush,
merge_should_not_happen,
update_status,
default_pick_child_after_split,
NULL
);
// use a for loop so to get us down both paths
for (int i = 0; i < 2; i++) {
toku_pin_node_with_min_bfe(&node, node_root, t);
toku_assert_entire_node_in_memory(node); // entire root is in memory
curr_child_to_flush = i;
num_flushes_called = 0;
flush_some_child(t->h, node, &fa);
assert(num_flushes_called == 2);
toku_pin_node_with_min_bfe(&node, node_internal, t);
assert(!node->dirty); // nothing was flushed, so node better not be dirty
toku_unpin_brtnode(t, node);
toku_pin_node_with_min_bfe(&node, node_leaf[0], t);
assert(!node->dirty); // nothing was flushed, so node better not be dirty
toku_unpin_brtnode(t, node);
toku_pin_node_with_min_bfe(&node, node_leaf[1], t);
assert(!node->dirty); // nothing was flushed, so node better not be dirty
toku_unpin_brtnode(t, node);
}
// now one more test to show a bug was fixed
// if there is nothing to flush from parent to child,
// and child is not fully in memory, we used to crash
// so, to make sure that is fixed, let's get internal to not
// be fully in memory, and make sure the above test works
// a hack to get internal compressed
r = toku_testsetup_insert_to_nonleaf(
t,
node_internal,
BRT_INSERT,
"c",
2,
NULL,
0
);
assert_zero(r);
r = toku_checkpoint(ct, NULL, NULL, NULL, NULL, NULL, CLIENT_CHECKPOINT);
assert_zero(r);
toku_pin_node_with_min_bfe(&node, node_internal, t);
for (int i = 0; i < 20; i++) {
PAIR_ATTR attr;
toku_brtnode_pe_callback(node, make_pair_attr(0xffffffff), &attr, NULL);
}
assert(BP_STATE(node,0) == PT_COMPRESSED);
toku_unpin_brtnode(t, node);
//now let's do the same test as above
toku_pin_node_with_min_bfe(&node, node_root, t);
toku_assert_entire_node_in_memory(node); // entire root is in memory
curr_child_to_flush = 0;
num_flushes_called = 0;
flush_some_child(t->h, node, &fa);
assert(num_flushes_called == 2);
r = toku_close_brt(t, 0); assert(r==0);
r = toku_cachetable_close(&ct); assert(r==0);
toku_free(pivots[0]);
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
doit();
return 0;
}
......@@ -10,6 +10,7 @@
#include "includes.h"
#include <brt-cachetable-wrappers.h>
#include <brt-flusher.h>
// Some constants to be used in calculations below
static const int nodesize = 1024; // Target max node size
......@@ -27,8 +28,6 @@ static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static const char fname[]= __FILE__ ".brt";
static BRT_STATUS_S my_brt_status;
static int omt_long_cmp(OMTVALUE p, void *q)
{
LEAFENTRY a = p, b = q;
......@@ -172,7 +171,7 @@ test_split_on_boundary(void)
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &my_brt_status);
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
verify_basement_node_msns(nodea, dummy_msn_3884);
verify_basement_node_msns(nodeb, dummy_msn_3884);
......@@ -245,7 +244,7 @@ test_split_with_everything_on_the_left(void)
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &my_brt_status);
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
......@@ -320,7 +319,7 @@ test_split_on_boundary_of_last_node(void)
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &my_brt_status);
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
......@@ -388,7 +387,7 @@ test_split_at_begin(void)
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &my_brt_status);
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
......@@ -452,7 +451,7 @@ test_split_at_end(void)
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &my_brt_status);
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
toku_unpin_brtnode(brt, nodeb);
r = toku_close_brt(brt, NULL); assert(r == 0);
......@@ -506,7 +505,7 @@ test_split_odd_nodes(void)
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL, &my_brt_status);
brtleaf_split(brt->h, &sn, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
verify_basement_node_msns(nodea, dummy_msn_3884);
verify_basement_node_msns(nodeb, dummy_msn_3884);
......
......@@ -35,7 +35,7 @@ doit (void) {
toku_testsetup_initialize(); // must precede any other toku_testsetup calls
r = toku_testsetup_leaf(t, &node_leaf);
r = toku_testsetup_leaf(t, &node_leaf, 1, NULL, NULL);
assert(r==0);
r = toku_testsetup_nonleaf(t, 1, &node_internal, 1, &node_leaf, 0, 0);
......
......@@ -142,7 +142,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// set the new root to point to the new tree
*rootp = newroot->thisnodename;
......
......@@ -112,7 +112,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// set the new root to point to the new tree
*rootp = newroot->thisnodename;
......
......@@ -66,7 +66,7 @@ test_dup_in_leaf(int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE newroot = make_node(brt, 0);
populate_leaf(newroot, htonl(2), 1);
......
......@@ -112,7 +112,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// set the new root to point to the new tree
*rootp = newroot->thisnodename;
......
......@@ -127,7 +127,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// set the new root to point to the new tree
*rootp = newroot->thisnodename;
......
......@@ -66,7 +66,7 @@ test_dup_in_leaf(int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
BRTNODE newroot = make_node(brt, 0);
populate_leaf(newroot, htonl(2), 1);
......
......@@ -112,7 +112,7 @@ test_make_tree(int height, int fanout, int nperleaf, int do_verify) {
// discard the old root block
u_int32_t fullhash = 0;
CACHEKEY *rootp;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
// set the new root to point to the new tree
*rootp = newroot->thisnodename;
......
......@@ -7,7 +7,7 @@
#ifndef TOKU_ULE_INTERNAL_H
#define TOKU_ULE_INTERNAL_H
#ident "$Id: ule.h 24600 2010-10-15 15:22:18Z dwells $"
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
......
......@@ -136,6 +136,7 @@ BDB_DONTRUN_TESTS = \
hotindexer-simple-abort \
hotindexer-undo-do-test \
hotindexer-with-queries \
hot-optimize-table-tests \
insert-dup-prelock \
isolation \
isolation-read-committed \
......@@ -278,6 +279,7 @@ BDB_DONTRUN_TESTS = \
test_stress4 \
test_stress5 \
test_stress6 \
test_stress7 \
test_stress_with_verify \
test_transactional_descriptor \
test_trans_desc_during_chkpt \
......@@ -355,6 +357,7 @@ DEPENDS_ON_STRESS_HELPERS = \
test_stress4 \
test_stress5 \
test_stress6 \
test_stress7 \
#blank
$(patsubst %,%.tdb,$(DEPENDS_ON_STRESS_HELPERS)): threaded_stress_test_helpers.h
......@@ -682,6 +685,7 @@ test_update_broadcast_stress.tdbrun: VGRIND=
test_update_stress.tdbrun: VGRIND=
stress-test.tdbrun: VGRIND=
stress-test.bdbrun: VGRIND=
hot-optimize-table-tests.tdbrun: VGRIND=
libs:
......
/* -*- mode: C; c-basic-offset: 4 -*- */
// hot-optimize-table-tests.c
#include "test.h"
#include "includes.h"
#include <brt-cachetable-wrappers.h>
#include "db.h"
#include "ydb.h"
const int envflags = DB_INIT_MPOOL |
DB_CREATE |
DB_THREAD |
DB_INIT_LOCK |
DB_INIT_LOG |
DB_INIT_TXN |
DB_PRIVATE;
DB_ENV* env;
unsigned int leaf_hits;
// Custom Update Function for our test BRT.
static int
update_func(DB* UU(db),
const DBT* key,
const DBT* old_val,
const DBT* extra,
void (*set_val)(const DBT* new_val, void* set_extra) __attribute__((unused)),
void* UU(set_extra))
{
unsigned int *x_results;
assert(extra->size == sizeof x_results);
x_results = *(unsigned int **) extra->data;
assert(x_results);
assert(old_val->size > 0);
unsigned int* indexptr;
assert(key->size == (sizeof *indexptr));
indexptr = (unsigned int*)key->data;
++leaf_hits;
if (verbose && x_results[*indexptr] != 0) {
printf("x_results = %p, indexptr = %p, *indexptr = %u, x_results[*indexptr] = %u\n", x_results, indexptr, *indexptr, x_results[*indexptr]);
}
assert(x_results[*indexptr] == 0);
x_results[*indexptr]++;
// ++(x_results[*indexptr]);
// memset(&new_val, 0, sizeof(new_val));
// set_val(&new_val, set_extra);
unsigned int i = *indexptr;
if (verbose && ((i + 1) % 50000 == 0)) {
printf("applying update to %u\n", i);
//printf("x_results[] = %u\n", x_results[*indexptr]);
}
return 0;
}
///
static void
hot_test_setup(void)
{
int r = 0;
// Remove any previous environment.
CHK(system("rm -rf " ENVDIR));
// Set up a new TokuDB.
CHK(toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO));
CHK(db_env_create(&env, 0));
env->set_errfile(env, stderr);
r = env->set_default_bt_compare(env, uint_dbt_cmp);CKERR(r);
env->set_update(env, update_func);
CHK(env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO));
}
///
static void
hot_insert_keys(DB* db, unsigned int key_count)
{
int r = 0;
DB_TXN * xact;
unsigned int limit = 1;
if (key_count > 10) {
limit = 100000;
}
// Dummy data.
const unsigned int DUMMY_SIZE = 100;
size_t size = DUMMY_SIZE;
char* dummy = NULL;
dummy = (char*)toku_xmalloc(size);
memset(dummy, 0, size);
// Start the transaction for insertions.
//
r = env->txn_begin(env, 0, &xact, 0); CKERR(r);
unsigned int key;
DBT key_thing;
DBT *keyptr = dbt_init(&key_thing, &key, sizeof(key));
DBT value_thing;
DBT *valueptr = dbt_init(&value_thing, dummy, size);
for (key = 0; key < key_count; ++key)
{
CHK(db->put(db, xact, keyptr, valueptr, 0));
// DEBUG OUTPUT
//
if (verbose && (key + 1) % limit == 0) {
printf("%d Elements inserted.\n", key + 1);
}
}
// Commit the insert transaction.
//
r = xact->commit(xact, 0); CKERR(r);
toku_free(dummy);
}
///
static void
hot_create_db(DB** db, const char* c)
{
int r = 0;
DB_TXN* xact;
verbose ? printf("Creating DB.\n") : 0;
r = env->txn_begin(env, 0, &xact, 0); CKERR(r);
CHK(db_create(db, env, 0));
CHK((*db)->open((*db), xact, c, NULL, DB_BTREE, DB_CREATE, 0666));
r = xact->commit(xact, 0); CKERR(r);
verbose ? printf("DB Created.\n") : 0;
}
///
static void
hot_test(DB* db, unsigned int size)
{
int r = 0;
leaf_hits = 0;
verbose ? printf("Insert some data.\n") : 0;
// Insert our keys to assemble the tree.
hot_insert_keys(db, size);
// Insert Broadcast Message.
verbose ? printf("Insert Broadcast Message.\n") : 0;
unsigned int *XMALLOC_N(size, x_results);
memset(x_results, 0, (sizeof x_results[0]) * size);
DBT extra;
DBT *extrap = dbt_init(&extra, &x_results, sizeof x_results);
DB_TXN * xact;
r = env->txn_begin(env, 0, &xact, 0); CKERR(r);
r = CHK(db->update_broadcast(db, xact, extrap, 0));
r = xact->commit(xact, 0); CKERR(r);
// Flatten the tree.
verbose ? printf("Calling hot optimize...\n") : 0;
r = db->hot_optimize(db, NULL, NULL);
assert(r == 0);
verbose ? printf("HOT Finished!\n") : 0;
for (unsigned int i = 0; i < size; ++i) {
assert(x_results[i] == 1);
}
verbose ? printf("Leaves hit = %u\n", leaf_hits) :0;
toku_free(x_results);
}
///
int
test_main(int argc, char * const argv[])
{
int r = 0;
default_parse_args(argc, argv);
hot_test_setup();
// Create and Open the Database/BRT
DB *db = NULL;
const unsigned int BIG = 4000000;
const unsigned int SMALL = 10;
const unsigned int NONE = 0;
hot_create_db(&db, "none.db");
hot_test(db, NONE);
hot_create_db(&db, "small.db");
hot_test(db, SMALL);
hot_create_db(&db, "big.db");
hot_test(db, BIG);
verbose ? printf("Exiting Test.\n") : 0;
return r;
}
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35324 2011-10-04 01:48:45Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2009 Tokutek Inc. All rights reserved."
#ident "$Id: env_startup.c 20778 2010-05-28 20:38:42Z yfogel $"
#ident "$Id$"
/* Purpose of this test is to verify that a failed assert will
* cause a panic, which should be visible via engine status.
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 35109 2011-09-27 18:41:25Z leifwalsh $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress2.c 35151 2011-09-29 01:32:27Z zardosht $"
#ident "$Id$"
#include "test.h"
#include <stdio.h>
......
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress7.c 38515 2012-01-05 20:48:10Z leifwalsh $"
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <toku_pthread.h>
#include <unistd.h>
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include "threaded_stress_test_helpers.h"
static void
stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) {
int n = cli_args->num_elements;
//
// do insertions and queries with a loader lying around doing stuff
//
if (verbose) printf("starting creation of pthreads\n");
const int num_threads = 4 + cli_args->num_update_threads + cli_args->num_ptquery_threads;
struct arg myargs[num_threads];
for (int i = 0; i < num_threads; i++) {
arg_init(&myargs[i], n, dbp, env, cli_args);
}
// make the forward fast scanner
myargs[0].fast = TRUE;
myargs[0].fwd = TRUE;
myargs[0].operation = scan_op;
// make the backward slow scanner
myargs[1].fast = FALSE;
myargs[1].fwd = FALSE;
myargs[1].operation = scan_op;
// make the guy that runs HOT in the background
myargs[2].operation = hot_op;
myargs[3].operation = keyrange_op;
for (int i = 4; i < 4 + cli_args->num_update_threads; ++i) {
myargs[i].operation = update_op;
}
// make the guy that does point queries
for (int i = 4 + cli_args->num_update_threads; i < num_threads; i++) {
myargs[i].operation = ptquery_op;
}
run_workers(myargs, num_threads, cli_args->time_of_test, false);
}
int
test_main(int argc, char *const argv[]) {
struct cli_args args = DEFAULT_ARGS;
// let's make default checkpointing period really slow
args.checkpointing_period = 1;
parse_stress_test_args(argc, argv, &args);
stress_test_main(&args);
return 0;
}
......@@ -555,6 +555,14 @@ static int UU() update_broadcast_op(DB_ENV *UU(env), DB **dbp, DB_TXN *txn, ARG
return r;
}
static int UU() hot_op(DB_ENV *UU(env), DB **dbp, DB_TXN *UU(txn), ARG UU(arg)) {
int r;
DB* db = *dbp;
r = db->hot_optimize(db, NULL, NULL);
CKERR(r);
return r;
}
static int UU() remove_and_recreate_me(DB_ENV *env, DB **dbp, DB_TXN *UU(txn), ARG UU(arg)) {
int r;
r = (*dbp)->close(*dbp, 0); CKERR(r);
......
......@@ -23,6 +23,7 @@ const char *toku_copyright_string = "Copyright (c) 2007-2009 Tokutek Inc. All r
#include "ydb.h"
#include "ydb-internal.h"
#include "brt-internal.h"
#include "brt-flusher.h"
#include "cachetable.h"
#include "log.h"
#include "memory.h"
......@@ -2065,43 +2066,18 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st
engstat->search_root_retries = brt_stat.search_root_retries;
engstat->search_tries_gt_height = brt_stat.search_tries_gt_height;
engstat->search_tries_gt_heightplus3 = brt_stat.search_tries_gt_heightplus3;
engstat->cleaner_total_nodes = brt_stat.cleaner_total_nodes;
engstat->cleaner_h1_nodes = brt_stat.cleaner_h1_nodes;
engstat->cleaner_hgt1_nodes = brt_stat.cleaner_hgt1_nodes;
engstat->cleaner_empty_nodes = brt_stat.cleaner_empty_nodes;
engstat->cleaner_nodes_dirtied = brt_stat.cleaner_nodes_dirtied;
engstat->cleaner_max_buffer_size = brt_stat.cleaner_max_buffer_size;
engstat->cleaner_min_buffer_size = brt_stat.cleaner_min_buffer_size;
engstat->cleaner_total_buffer_size = brt_stat.cleaner_total_buffer_size;
engstat->cleaner_max_buffer_workdone = brt_stat.cleaner_max_buffer_workdone;
engstat->cleaner_min_buffer_workdone = brt_stat.cleaner_min_buffer_workdone;
engstat->cleaner_total_buffer_workdone = brt_stat.cleaner_total_buffer_workdone;
engstat->cleaner_num_leaves_unmerged = brt_stat.cleaner_num_leaves_unmerged;
engstat->flush_total = brt_stat.flush_total;
engstat->flush_in_memory = brt_stat.flush_in_memory;
engstat->flush_needed_io = brt_stat.flush_needed_io;
engstat->flush_cascades = brt_stat.flush_cascades;
engstat->flush_cascades_1 = brt_stat.flush_cascades_1;
engstat->flush_cascades_2 = brt_stat.flush_cascades_2;
engstat->flush_cascades_3 = brt_stat.flush_cascades_3;
engstat->flush_cascades_4 = brt_stat.flush_cascades_4;
engstat->flush_cascades_5 = brt_stat.flush_cascades_5;
engstat->flush_cascades_gt_5 = brt_stat.flush_cascades_gt_5;
engstat->disk_flush_leaf = brt_stat.disk_flush_leaf;
engstat->disk_flush_nonleaf = brt_stat.disk_flush_nonleaf;
engstat->disk_flush_leaf_for_checkpoint = brt_stat.disk_flush_leaf_for_checkpoint;
engstat->disk_flush_nonleaf_for_checkpoint = brt_stat.disk_flush_nonleaf_for_checkpoint;
engstat->create_leaf = brt_stat.create_leaf;
engstat->create_nonleaf = brt_stat.create_nonleaf;
engstat->create_leaf = brt_stat.create_leaf;
engstat->create_nonleaf = brt_stat.create_nonleaf;
engstat->destroy_leaf = brt_stat.destroy_leaf;
engstat->destroy_nonleaf = brt_stat.destroy_nonleaf;
engstat->split_leaf = brt_stat.split_leaf;
engstat->split_nonleaf = brt_stat.split_nonleaf;
engstat->merge_leaf = brt_stat.merge_leaf;
engstat->merge_nonleaf = brt_stat.merge_nonleaf;
engstat->dirty_leaf = brt_stat.dirty_leaf;
engstat->dirty_nonleaf = brt_stat.dirty_nonleaf;
engstat->balance_leaf = brt_stat.balance_leaf;
engstat->msg_bytes_in = brt_stat.msg_bytes_in;
engstat->msg_bytes_out = brt_stat.msg_bytes_out;
engstat->msg_bytes_curr = brt_stat.msg_bytes_curr;
......@@ -2128,6 +2104,45 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st
engstat->num_msg_buffer_fetched_prefetch = brt_stat.num_msg_buffer_fetched_prefetch;
engstat->num_msg_buffer_fetched_write = brt_stat.num_msg_buffer_fetched_write;
}
{
BRT_FLUSHER_STATUS_S brt_flusher_stat;
toku_brt_flusher_get_status(&brt_flusher_stat);
engstat->cleaner_total_nodes = brt_flusher_stat.cleaner_total_nodes;
engstat->cleaner_h1_nodes = brt_flusher_stat.cleaner_h1_nodes;
engstat->cleaner_hgt1_nodes = brt_flusher_stat.cleaner_hgt1_nodes;
engstat->cleaner_empty_nodes = brt_flusher_stat.cleaner_empty_nodes;
engstat->cleaner_nodes_dirtied = brt_flusher_stat.cleaner_nodes_dirtied;
engstat->cleaner_max_buffer_size = brt_flusher_stat.cleaner_max_buffer_size;
engstat->cleaner_min_buffer_size = brt_flusher_stat.cleaner_min_buffer_size;
engstat->cleaner_total_buffer_size = brt_flusher_stat.cleaner_total_buffer_size;
engstat->cleaner_max_buffer_workdone = brt_flusher_stat.cleaner_max_buffer_workdone;
engstat->cleaner_min_buffer_workdone = brt_flusher_stat.cleaner_min_buffer_workdone;
engstat->cleaner_total_buffer_workdone = brt_flusher_stat.cleaner_total_buffer_workdone;
engstat->cleaner_num_dirtied_for_leaf_merge = brt_flusher_stat.cleaner_num_dirtied_for_leaf_merge;
engstat->flush_total = brt_flusher_stat.flush_total;
engstat->flush_in_memory = brt_flusher_stat.flush_in_memory;
engstat->flush_needed_io = brt_flusher_stat.flush_needed_io;
engstat->flush_cascades = brt_flusher_stat.flush_cascades;
engstat->flush_cascades_1 = brt_flusher_stat.flush_cascades_1;
engstat->flush_cascades_2 = brt_flusher_stat.flush_cascades_2;
engstat->flush_cascades_3 = brt_flusher_stat.flush_cascades_3;
engstat->flush_cascades_4 = brt_flusher_stat.flush_cascades_4;
engstat->flush_cascades_5 = brt_flusher_stat.flush_cascades_5;
engstat->flush_cascades_gt_5 = brt_flusher_stat.flush_cascades_gt_5;
engstat->split_leaf = brt_flusher_stat.split_leaf;
engstat->split_nonleaf = brt_flusher_stat.split_nonleaf;
engstat->merge_leaf = brt_flusher_stat.merge_leaf;
engstat->merge_nonleaf = brt_flusher_stat.merge_nonleaf;
engstat->balance_leaf = brt_flusher_stat.balance_leaf;
}
{
BRT_HOT_STATUS_S hot_stat;
toku_brt_hot_get_status(&hot_stat);
engstat->hot_num_started = hot_stat.num_started;
engstat->hot_num_completed = hot_stat.num_completed;
engstat->hot_num_aborted = hot_stat.num_aborted;
engstat->hot_max_root_flush_count = hot_stat.max_root_flush_count;
}
{
u_int64_t fsync_count, fsync_time;
toku_get_fsync_times(&fsync_count, &fsync_time);
......@@ -2373,7 +2388,7 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "cleaner_max_buffer_workdone %"PRIu64"\n", engstat.cleaner_max_buffer_workdone);
n += snprintf(buff + n, bufsiz - n, "cleaner_min_buffer_workdone %"PRIu64"\n", engstat.cleaner_min_buffer_workdone);
n += snprintf(buff + n, bufsiz - n, "cleaner_total_buffer_workdone %"PRIu64"\n", engstat.cleaner_total_buffer_workdone);
n += snprintf(buff + n, bufsiz - n, "cleaner_num_leaves_unmerged %"PRIu64"\n", engstat.cleaner_num_leaves_unmerged);
n += snprintf(buff + n, bufsiz - n, "cleaner_num_dirtied_for_leaf_merge %"PRIu64"\n", engstat.cleaner_num_dirtied_for_leaf_merge);
n += snprintf(buff + n, bufsiz - n, "flush_total %"PRIu64"\n", engstat.flush_total);
n += snprintf(buff + n, bufsiz - n, "flush_in_memory %"PRIu64"\n", engstat.flush_in_memory);
n += snprintf(buff + n, bufsiz - n, "flush_needed_io %"PRIu64"\n", engstat.flush_needed_io);
......@@ -2399,6 +2414,10 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "dirty_leaf %"PRIu64"\n", engstat.dirty_leaf);
n += snprintf(buff + n, bufsiz - n, "dirty_nonleaf %"PRIu64"\n", engstat.dirty_nonleaf);
n += snprintf(buff + n, bufsiz - n, "balance_leaf %"PRIu64"\n", engstat.balance_leaf);
n += snprintf(buff + n, bufsiz - n, "hot_num_started %"PRIu64"\n", engstat.hot_num_started);
n += snprintf(buff + n, bufsiz - n, "hot_num_completed %"PRIu64"\n", engstat.hot_num_completed);
n += snprintf(buff + n, bufsiz - n, "hot_num_aborted %"PRIu64"\n", engstat.hot_num_aborted);
n += snprintf(buff + n, bufsiz - n, "hot_max_root_flush_count %"PRIu64"\n", engstat.hot_max_root_flush_count);
n += snprintf(buff + n, bufsiz - n, "msg_bytes_in %"PRIu64"\n", engstat.msg_bytes_in);
n += snprintf(buff + n, bufsiz - n, "msg_bytes_out %"PRIu64"\n", engstat.msg_bytes_out);
n += snprintf(buff + n, bufsiz - n, "msg_bytes_curr %"PRIu64"\n", engstat.msg_bytes_curr);
......@@ -6311,6 +6330,44 @@ toku_db_optimize(DB *db) {
return r;
}
static int
toku_db_hot_optimize(DB *db,
int (*progress_callback)(void *extra, float progress),
void *progress_extra)
{
HANDLE_PANICKED_DB(db);
int r = 0;
// #4356 Take directory read lock around hot optimize to prevent
// race condition of another thread deleting the dictionary during
// the hot optimize. Create a long-lived transaction to hold the
// lock, but the transaction does nothing else so the rollback log
// is tiny and the txnid does not appear in any dictionary.
int using_txns = db->dbenv->i->open_flags & DB_INIT_TXN;
DB_TXN *txn;
if (using_txns) {
toku_ydb_lock();
int rx = toku_txn_begin(db->dbenv, NULL, &txn, DB_TXN_NOSYNC, 1);
invariant_zero(rx);
r = toku_grab_read_lock_on_directory(db, txn);
toku_ydb_unlock();
}
// If we areunable to get a directory read lock, do nothing.
if (r == 0) {
r = toku_brt_hot_optimize(db->i->brt,
progress_callback,
progress_extra);
}
if (using_txns) {
int rx = locked_txn_commit(txn, 0);
invariant_zero(rx);
}
return r;
}
static int
toku_db_flatten(DB *db, DB_TXN *txn) {
HANDLE_PANICKED_DB(db);
......@@ -6328,7 +6385,6 @@ autotxn_db_flatten(DB* db, DB_TXN* txn) {
return toku_db_destruct_autotxn(txn, r, changed);
}
static int
locked_db_flatten(DB *db, DB_TXN *txn) {
toku_ydb_lock(); int r = autotxn_db_flatten(db, txn); toku_ydb_unlock(); return r;
......@@ -6342,6 +6398,15 @@ locked_db_optimize(DB *db) {
return r;
}
static int
locked_db_hot_optimize(DB *db,
int (*progress_callback)(void *extra, float progress),
void *progress_extra)
{
int r = toku_db_hot_optimize(db, progress_callback, progress_extra);
return r;
}
static int
db_get_fragmentation(DB * db, TOKU_DB_FRAGMENTATION report) {
HANDLE_PANICKED_DB(db);
......@@ -6461,6 +6526,7 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
SDB(getf_set);
SDB(flatten);
SDB(optimize);
SDB(hot_optimize);
SDB(get_fragmentation);
SDB(set_indexer);
SDB(get_indexer);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment