Commit fb76250b authored by John Esmet's avatar John Esmet

Consolidate garbage collection information into txn_gc_info. Explicitly

separate the difference between the oldest referenced xid we can use for
implicit promotion vs simple garbage collection.
parent f12bdc75
...@@ -1544,11 +1544,7 @@ ft_merge_child( ...@@ -1544,11 +1544,7 @@ ft_merge_child(
} }
} }
static void ft_flush_some_child( void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
FT ft,
FTNODE parent,
struct flusher_advice *fa
)
// Effect: This function does the following: // Effect: This function does the following:
// - Pick a child of parent (the heaviest child), // - Pick a child of parent (the heaviest child),
// - flush from parent to child, // - flush from parent to child,
...@@ -1562,7 +1558,7 @@ static void ft_flush_some_child( ...@@ -1562,7 +1558,7 @@ static void ft_flush_some_child(
NONLEAF_CHILDINFO bnc = NULL; NONLEAF_CHILDINFO bnc = NULL;
paranoid_invariant(parent->height>0); paranoid_invariant(parent->height>0);
toku_assert_entire_node_in_memory(parent); toku_assert_entire_node_in_memory(parent);
TXNID oldest_referenced_xid = parent->oldest_referenced_xid_known; TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
// pick the child we want to flush to // pick the child we want to flush to
int childnum = fa->pick_child(ft, parent, fa->extra); int childnum = fa->pick_child(ft, parent, fa->extra);
...@@ -1655,7 +1651,7 @@ static void ft_flush_some_child( ...@@ -1655,7 +1651,7 @@ static void ft_flush_some_child(
ft, ft,
bnc, bnc,
child, child,
oldest_referenced_xid parent_oldest_referenced_xid_known
); );
destroy_nonleaf_childinfo(bnc); destroy_nonleaf_childinfo(bnc);
} }
...@@ -1679,10 +1675,10 @@ static void ft_flush_some_child( ...@@ -1679,10 +1675,10 @@ static void ft_flush_some_child(
parent = NULL; parent = NULL;
} }
// //
// it is the responsibility of ft_flush_some_child to unpin child // it is the responsibility of toku_ft_flush_some_child to unpin child
// //
if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) { if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) {
ft_flush_some_child(ft, child, fa); toku_ft_flush_some_child(ft, child, fa);
} }
else { else {
toku_unpin_ftnode_off_client_thread(ft, child); toku_unpin_ftnode_off_client_thread(ft, child);
...@@ -1709,13 +1705,6 @@ static void ft_flush_some_child( ...@@ -1709,13 +1705,6 @@ static void ft_flush_some_child(
} }
} }
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) {
// Vanilla flush_some_child flushes from parent to child without
// providing a meaningful oldest_referenced_xid. No simple garbage
// collection is performed.
return ft_flush_some_child(ft, parent, fa);
}
static void static void
update_cleaner_status( update_cleaner_status(
FTNODE node, FTNODE node,
...@@ -1857,7 +1846,7 @@ struct flusher_extra { ...@@ -1857,7 +1846,7 @@ struct flusher_extra {
FT h; FT h;
FTNODE node; FTNODE node;
NONLEAF_CHILDINFO bnc; NONLEAF_CHILDINFO bnc;
TXNID oldest_referenced_xid; TXNID parent_oldest_referenced_xid_known;
}; };
// //
...@@ -1896,16 +1885,16 @@ static void flush_node_fun(void *fe_v) ...@@ -1896,16 +1885,16 @@ static void flush_node_fun(void *fe_v)
fe->h, fe->h,
fe->bnc, fe->bnc,
fe->node, fe->node,
fe->oldest_referenced_xid fe->parent_oldest_referenced_xid_known
); );
destroy_nonleaf_childinfo(fe->bnc); destroy_nonleaf_childinfo(fe->bnc);
// after the flush has completed, now check to see if the node needs flushing // after the flush has completed, now check to see if the node needs flushing
// If so, call ft_flush_some_child on the node (because this flush intends to // If so, call toku_ft_flush_some_child on the node (because this flush intends to
// pass a meaningful oldest referenced xid for simple garbage collection), and it is the // pass a meaningful oldest referenced xid for simple garbage collection), and it is the
// responsibility of the flush to unlock the node. otherwise, we unlock it here. // responsibility of the flush to unlock the node. otherwise, we unlock it here.
if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) { if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
ft_flush_some_child(fe->h, fe->node, &fa); toku_ft_flush_some_child(fe->h, fe->node, &fa);
} }
else { else {
toku_unpin_ftnode_off_client_thread(fe->h,fe->node); toku_unpin_ftnode_off_client_thread(fe->h,fe->node);
...@@ -1916,7 +1905,7 @@ static void flush_node_fun(void *fe_v) ...@@ -1916,7 +1905,7 @@ static void flush_node_fun(void *fe_v)
// bnc, which means we are tasked with flushing some // bnc, which means we are tasked with flushing some
// buffer in the node. // buffer in the node.
// It is the responsibility of flush some child to unlock the node // It is the responsibility of flush some child to unlock the node
ft_flush_some_child(fe->h, fe->node, &fa); toku_ft_flush_some_child(fe->h, fe->node, &fa);
} }
remove_background_job_from_cf(fe->h->cf); remove_background_job_from_cf(fe->h->cf);
toku_free(fe); toku_free(fe);
...@@ -1927,13 +1916,13 @@ place_node_and_bnc_on_background_thread( ...@@ -1927,13 +1916,13 @@ place_node_and_bnc_on_background_thread(
FT h, FT h,
FTNODE node, FTNODE node,
NONLEAF_CHILDINFO bnc, NONLEAF_CHILDINFO bnc,
TXNID oldest_referenced_xid) TXNID parent_oldest_referenced_xid_known)
{ {
struct flusher_extra *XMALLOC(fe); struct flusher_extra *XMALLOC(fe);
fe->h = h; fe->h = h;
fe->node = node; fe->node = node;
fe->bnc = bnc; fe->bnc = bnc;
fe->oldest_referenced_xid = oldest_referenced_xid; fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
cachefile_kibbutz_enq(h->cf, flush_node_fun, fe); cachefile_kibbutz_enq(h->cf, flush_node_fun, fe);
} }
...@@ -1953,7 +1942,7 @@ place_node_and_bnc_on_background_thread( ...@@ -1953,7 +1942,7 @@ place_node_and_bnc_on_background_thread(
void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
{ {
toku::context flush_ctx(CTX_FLUSH); toku::context flush_ctx(CTX_FLUSH);
TXNID oldest_referenced_xid_known = parent->oldest_referenced_xid_known; TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
// //
// first let's see if we can detach buffer on client thread // first let's see if we can detach buffer on client thread
// and pick the child we want to flush to // and pick the child we want to flush to
...@@ -1970,7 +1959,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) ...@@ -1970,7 +1959,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// In this case, we could not lock the child, so just place the parent on the background thread // In this case, we could not lock the child, so just place the parent on the background thread
// In the callback, we will use toku_ft_flush_some_child, which checks to // In the callback, we will use toku_ft_flush_some_child, which checks to
// see if we should blow away the old basement nodes. // see if we should blow away the old basement nodes.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known); place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
} }
else { else {
// //
...@@ -1999,7 +1988,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) ...@@ -1999,7 +1988,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// so, because we know for sure the child is not // so, because we know for sure the child is not
// reactive, we can unpin the parent // reactive, we can unpin the parent
// //
place_node_and_bnc_on_background_thread(h, child, bnc, oldest_referenced_xid_known); place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known);
toku_unpin_ftnode(h, parent); toku_unpin_ftnode(h, parent);
} }
else { else {
...@@ -2009,7 +1998,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent) ...@@ -2009,7 +1998,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
toku_unpin_ftnode(h, child); toku_unpin_ftnode(h, child);
// Again, we'll have the parent on the background thread, so // Again, we'll have the parent on the background thread, so
// we don't need to destroy the basement nodes yet. // we don't need to destroy the basement nodes yet.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known); place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
} }
} }
} }
......
...@@ -229,7 +229,7 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc); ...@@ -229,7 +229,7 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc); long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp); void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
void toku_bnc_empty(NONLEAF_CHILDINFO bnc); void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID oldest_referenced_xid); void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull)); bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize); bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
...@@ -1027,7 +1027,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type, ...@@ -1027,7 +1027,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type,
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t); void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn // toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info); void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, txn_gc_info *gc_info);
void void
toku_get_node_for_verify( toku_get_node_for_verify(
...@@ -1198,9 +1198,7 @@ toku_ft_bn_apply_cmd_once ( ...@@ -1198,9 +1198,7 @@ toku_ft_bn_apply_cmd_once (
const FT_MSG cmd, const FT_MSG cmd,
uint32_t idx, uint32_t idx,
LEAFENTRY le, LEAFENTRY le,
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdonep, uint64_t *workdonep,
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
...@@ -1212,9 +1210,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1212,9 +1210,7 @@ toku_ft_bn_apply_cmd (
DESCRIPTOR desc, DESCRIPTOR desc,
BASEMENTNODE bn, BASEMENTNODE bn,
FT_MSG cmd, FT_MSG cmd,
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
...@@ -1227,8 +1223,7 @@ toku_ft_leaf_apply_cmd ( ...@@ -1227,8 +1223,7 @@ toku_ft_leaf_apply_cmd (
FTNODE node, FTNODE node,
int target_childnum, int target_childnum,
FT_MSG cmd, FT_MSG cmd,
GC_INFO gc_info, txn_gc_info *gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
...@@ -1242,8 +1237,7 @@ toku_ft_node_put_cmd ( ...@@ -1242,8 +1237,7 @@ toku_ft_node_put_cmd (
int target_childnum, int target_childnum,
FT_MSG cmd, FT_MSG cmd,
bool is_fresh, bool is_fresh,
GC_INFO gc_info, txn_gc_info *gc_info,
txn_manager_state *txn_state_for_gc,
size_t flow_deltas[], size_t flow_deltas[],
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
......
...@@ -869,7 +869,7 @@ void toku_ftnode_clone_callback( ...@@ -869,7 +869,7 @@ void toku_ftnode_clone_callback(
*cloned_value_data = cloned_node; *cloned_value_data = cloned_node;
} }
static void ft_leaf_run_gc(FTNODE node, FT ft); static void ft_leaf_run_gc(FT ft, FTNODE node);
void toku_ftnode_flush_callback( void toku_ftnode_flush_callback(
CACHEFILE UU(cachefile), CACHEFILE UU(cachefile),
...@@ -894,7 +894,7 @@ void toku_ftnode_flush_callback( ...@@ -894,7 +894,7 @@ void toku_ftnode_flush_callback(
if (write_me) { if (write_me) {
toku_assert_entire_node_in_memory(ftnode); toku_assert_entire_node_in_memory(ftnode);
if (height == 0) { if (height == 0) {
ft_leaf_run_gc(ftnode, h); ft_leaf_run_gc(h, ftnode);
} }
if (height == 0 && !is_clone) { if (height == 0 && !is_clone) {
ftnode_update_disk_stats(ftnode, h, for_checkpoint); ftnode_update_disk_stats(ftnode, h, for_checkpoint);
...@@ -1690,9 +1690,7 @@ toku_ft_bn_apply_cmd_once ( ...@@ -1690,9 +1690,7 @@ toku_ft_bn_apply_cmd_once (
const FT_MSG cmd, const FT_MSG cmd,
uint32_t idx, uint32_t idx,
LEAFENTRY le, LEAFENTRY le,
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
) )
...@@ -1718,9 +1716,7 @@ toku_ft_bn_apply_cmd_once ( ...@@ -1718,9 +1716,7 @@ toku_ft_bn_apply_cmd_once (
le, le,
&bn->data_buffer, &bn->data_buffer,
idx, idx,
oldest_referenced_xid,
gc_info, gc_info,
txn_state_for_gc,
&new_le, &new_le,
&numbytes_delta &numbytes_delta
); );
...@@ -1768,9 +1764,7 @@ struct setval_extra_s { ...@@ -1768,9 +1764,7 @@ struct setval_extra_s {
const DBT *key; const DBT *key;
uint32_t idx; uint32_t idx;
LEAFENTRY le; LEAFENTRY le;
TXNID oldest_referenced_xid; txn_gc_info *gc_info;
GC_INFO gc_info;
txn_manager_state *txn_state_for_gc;
uint64_t * workdone; // set by toku_ft_bn_apply_cmd_once() uint64_t * workdone; // set by toku_ft_bn_apply_cmd_once()
STAT64INFO stats_to_update; STAT64INFO stats_to_update;
}; };
...@@ -1803,8 +1797,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) { ...@@ -1803,8 +1797,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
} }
toku_ft_bn_apply_cmd_once(svextra->bn, &msg, toku_ft_bn_apply_cmd_once(svextra->bn, &msg,
svextra->idx, svextra->le, svextra->idx, svextra->le,
svextra->oldest_referenced_xid, svextra->gc_info, svextra->gc_info,
svextra->txn_state_for_gc,
svextra->workdone, svextra->stats_to_update); svextra->workdone, svextra->stats_to_update);
svextra->setval_r = 0; svextra->setval_r = 0;
} }
...@@ -1818,9 +1811,7 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn ...@@ -1818,9 +1811,7 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
LEAFENTRY le, LEAFENTRY le,
void* keydata, void* keydata,
uint32_t keylen, uint32_t keylen,
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t * workdone, uint64_t * workdone,
STAT64INFO stats_to_update) { STAT64INFO stats_to_update) {
LEAFENTRY le_for_update; LEAFENTRY le_for_update;
...@@ -1865,8 +1856,8 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn ...@@ -1865,8 +1856,8 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
le_for_update = le; le_for_update = le;
struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, cmd->msn, cmd->xids, struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, cmd->msn, cmd->xids,
keyp, idx, le_for_update, oldest_referenced_xid, gc_info, keyp, idx, le_for_update, gc_info,
txn_state_for_gc, workdone, stats_to_update}; workdone, stats_to_update};
// call handlerton's brt->update_fun(), which passes setval_extra to setval_fun() // call handlerton's brt->update_fun(), which passes setval_extra to setval_fun()
FAKE_DB(db, desc); FAKE_DB(db, desc);
int r = update_fun( int r = update_fun(
...@@ -1889,9 +1880,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1889,9 +1880,7 @@ toku_ft_bn_apply_cmd (
DESCRIPTOR desc, DESCRIPTOR desc,
BASEMENTNODE bn, BASEMENTNODE bn,
FT_MSG cmd, FT_MSG cmd,
TXNID oldest_referenced_xid_known, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
) )
...@@ -1938,7 +1927,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1938,7 +1927,7 @@ toku_ft_bn_apply_cmd (
} else { } else {
assert_zero(r); assert_zero(r);
} }
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
// if the insertion point is within a window of the right edge of // if the insertion point is within a window of the right edge of
// the leaf then it is sequential // the leaf then it is sequential
...@@ -1970,7 +1959,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1970,7 +1959,7 @@ toku_ft_bn_apply_cmd (
); );
if (r == DB_NOTFOUND) break; if (r == DB_NOTFOUND) break;
assert_zero(r); assert_zero(r);
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
break; break;
} }
...@@ -1992,7 +1981,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1992,7 +1981,7 @@ toku_ft_bn_apply_cmd (
cmd->u.id.key = &curr_keydbt; cmd->u.id.key = &curr_keydbt;
int deleted = 0; int deleted = 0;
if (!le_is_clean(storeddata)) { //If already clean, nothing to do. if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
uint32_t new_omt_size = bn->data_buffer.omt_size(); uint32_t new_omt_size = bn->data_buffer.omt_size();
if (new_omt_size != omt_size) { if (new_omt_size != omt_size) {
paranoid_invariant(new_omt_size+1 == omt_size); paranoid_invariant(new_omt_size+1 == omt_size);
...@@ -2024,7 +2013,7 @@ toku_ft_bn_apply_cmd ( ...@@ -2024,7 +2013,7 @@ toku_ft_bn_apply_cmd (
cmd->u.id.key = &curr_keydbt; cmd->u.id.key = &curr_keydbt;
int deleted = 0; int deleted = 0;
if (le_has_xids(storeddata, cmd->xids)) { if (le_has_xids(storeddata, cmd->xids)) {
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
uint32_t new_omt_size = bn->data_buffer.omt_size(); uint32_t new_omt_size = bn->data_buffer.omt_size();
if (new_omt_size != omt_size) { if (new_omt_size != omt_size) {
paranoid_invariant(new_omt_size+1 == omt_size); paranoid_invariant(new_omt_size+1 == omt_size);
...@@ -2056,9 +2045,9 @@ toku_ft_bn_apply_cmd ( ...@@ -2056,9 +2045,9 @@ toku_ft_bn_apply_cmd (
key = cmd->u.id.key->data; key = cmd->u.id.key->data;
keylen = cmd->u.id.key->size; keylen = cmd->u.id.key->size;
} }
r = do_update(update_fun, desc, bn, cmd, idx, NULL, NULL, 0, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); r = do_update(update_fun, desc, bn, cmd, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
} else if (r==0) { } else if (r==0) {
r = do_update(update_fun, desc, bn, cmd, idx, storeddata, key, keylen, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); r = do_update(update_fun, desc, bn, cmd, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
} // otherwise, a worse error, just return it } // otherwise, a worse error, just return it
break; break;
} }
...@@ -2081,7 +2070,7 @@ toku_ft_bn_apply_cmd ( ...@@ -2081,7 +2070,7 @@ toku_ft_bn_apply_cmd (
// This is broken below. Have a compilation error checked // This is broken below. Have a compilation error checked
// in as a reminder // in as a reminder
r = do_update(update_fun, desc, bn, cmd, idx, storeddata, curr_key, curr_keylen, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update); r = do_update(update_fun, desc, bn, cmd, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
assert_zero(r); assert_zero(r);
if (num_leafentries_before == bn->data_buffer.omt_size()) { if (num_leafentries_before == bn->data_buffer.omt_size()) {
...@@ -2319,10 +2308,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn, ...@@ -2319,10 +2308,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
void* keyp, void* keyp,
uint32_t keylen, uint32_t keylen,
LEAFENTRY leaf_entry, LEAFENTRY leaf_entry,
const xid_omt_t &snapshot_xids, txn_gc_info *gc_info,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
STAT64INFO_S * delta) STAT64INFO_S * delta)
{ {
paranoid_invariant(leaf_entry); paranoid_invariant(leaf_entry);
...@@ -2333,7 +2319,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn, ...@@ -2333,7 +2319,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
} }
// Don't run garbage collection if this leafentry decides it's not worth it. // Don't run garbage collection if this leafentry decides it's not worth it.
if (!toku_le_worth_running_garbage_collection(leaf_entry, oldest_referenced_xid_known)) { if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) {
goto exit; goto exit;
} }
...@@ -2355,11 +2341,8 @@ ft_basement_node_gc_once(BASEMENTNODE bn, ...@@ -2355,11 +2341,8 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
index, index,
keyp, keyp,
keylen, keylen,
gc_info,
&new_leaf_entry, &new_leaf_entry,
snapshot_xids,
referenced_xids,
live_root_txns,
oldest_referenced_xid_known,
&numbytes_delta); &numbytes_delta);
numrows_delta = 0; numrows_delta = 0;
...@@ -2388,10 +2371,7 @@ exit: ...@@ -2388,10 +2371,7 @@ exit:
// Garbage collect all leaf entries for a given basement node. // Garbage collect all leaf entries for a given basement node.
static void static void
basement_node_gc_all_les(BASEMENTNODE bn, basement_node_gc_all_les(BASEMENTNODE bn,
const xid_omt_t &snapshot_xids, txn_gc_info *gc_info,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
STAT64INFO_S * delta) STAT64INFO_S * delta)
{ {
int r = 0; int r = 0;
...@@ -2409,10 +2389,7 @@ basement_node_gc_all_les(BASEMENTNODE bn, ...@@ -2409,10 +2389,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
keyp, keyp,
keylen, keylen,
leaf_entry, leaf_entry,
snapshot_xids, gc_info,
referenced_xids,
live_root_txns,
oldest_referenced_xid_known,
delta delta
); );
// Check if the leaf entry was deleted or not. // Check if the leaf entry was deleted or not.
...@@ -2424,12 +2401,7 @@ basement_node_gc_all_les(BASEMENTNODE bn, ...@@ -2424,12 +2401,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
// Garbage collect all leaf entires in all basement nodes. // Garbage collect all leaf entires in all basement nodes.
static void static void
ft_leaf_gc_all_les(FTNODE node, ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info)
FT ft,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known)
{ {
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
paranoid_invariant_zero(node->height); paranoid_invariant_zero(node->height);
...@@ -2440,32 +2412,40 @@ ft_leaf_gc_all_les(FTNODE node, ...@@ -2440,32 +2412,40 @@ ft_leaf_gc_all_les(FTNODE node,
STAT64INFO_S delta; STAT64INFO_S delta;
delta.numrows = 0; delta.numrows = 0;
delta.numbytes = 0; delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, referenced_xids, live_root_txns, oldest_referenced_xid_known, &delta); basement_node_gc_all_les(bn, gc_info, &delta);
toku_ft_update_stats(&ft->in_memory_stats, delta); toku_ft_update_stats(&ft->in_memory_stats, delta);
} }
} }
static void static void
ft_leaf_run_gc(FTNODE node, FT ft) { ft_leaf_run_gc(FT ft, FTNODE node) {
TOKULOGGER logger = toku_cachefile_logger(ft->cf); TOKULOGGER logger = toku_cachefile_logger(ft->cf);
if (logger) { if (logger) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger); TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
txn_manager_state txn_state_for_gc; txn_manager_state txn_state_for_gc;
txn_state_for_gc.init(txn_manager); txn_state_for_gc.init(txn_manager);
TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
// Perform full garbage collection. Provide a fresh snapshot of the transaction // Perform full garbage collection.
// system plus the oldest known referenced xid that could have had messages
// applied to this leaf (which comes from the node, NOT the txn_manager_state,
// which has a value only suitible for simple garbage colletion).
// //
// Using the oldest xid in either the referenced_xids or live_root_txns // - txn_state_for_gc
// snapshots is not sufficient, because there could be something older that is neither // a fresh snapshot of the transaction system.
// live nor referenced, but instead aborted somewhere above us as a message in the tree. // - oldest_referenced_xid_for_simple_gc
ft_leaf_gc_all_les(node, ft, // the oldest xid in any live list as of right now - suitible for simple gc
txn_state_for_gc.snapshot_xids, // - node->oldest_referenced_xid_known
txn_state_for_gc.referenced_xids, // the last known oldest referenced xid for this node and any unapplied messages.
txn_state_for_gc.live_root_txns, // it is a lower bound on the actual oldest referenced xid - but becasue there
node->oldest_referenced_xid_known); // may be abort messages above us, we need to be careful to only use this value
// for implicit promotion (as opposed to the oldest referenced xid for simple gc)
//
// The node has its own oldest referenced xid because it must be careful not to implicitly promote
// provisional entries for transactions that are no longer live, but may have abort messages
// somewhere above us in the tree.
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_for_simple_gc,
node->oldest_referenced_xid_known,
true);
ft_leaf_gc_all_les(ft, node, &gc_info);
txn_state_for_gc.destroy(); txn_state_for_gc.destroy();
} }
} }
...@@ -2474,20 +2454,27 @@ void toku_bnc_flush_to_child( ...@@ -2474,20 +2454,27 @@ void toku_bnc_flush_to_child(
FT ft, FT ft,
NONLEAF_CHILDINFO bnc, NONLEAF_CHILDINFO bnc,
FTNODE child, FTNODE child,
TXNID oldest_referenced_xid_known TXNID parent_oldest_referenced_xid_known
) )
{ {
paranoid_invariant(bnc); paranoid_invariant(bnc);
STAT64INFO_S stats_delta = {0,0}; STAT64INFO_S stats_delta = {0,0};
size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer); size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer);
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE;
txn_manager_state txn_state_for_gc; txn_manager_state txn_state_for_gc;
bool do_garbage_collection = child->height == 0 && toku_cachefile_logger(ft->cf) != nullptr; bool do_garbage_collection = child->height == 0 && logger != nullptr;
if (do_garbage_collection) { if (do_garbage_collection) {
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger); TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
txn_state_for_gc.init(txn_manager); txn_state_for_gc.init(txn_manager);
oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
} }
txn_gc_info gc_info(do_garbage_collection ? &txn_state_for_gc : nullptr,
oldest_referenced_xid_for_simple_gc,
child->oldest_referenced_xid_known,
true);
FIFO_ITERATE( FIFO_ITERATE(
bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh, bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
({ ({
...@@ -2512,14 +2499,13 @@ void toku_bnc_flush_to_child( ...@@ -2512,14 +2499,13 @@ void toku_bnc_flush_to_child(
-1, -1,
&ftcmd, &ftcmd,
is_fresh, is_fresh,
make_gc_info(true), // mvcc_needed &gc_info,
&txn_state_for_gc,
flow_deltas, flow_deltas,
&stats_delta &stats_delta
); );
remaining_memsize -= FIFO_CURRENT_ENTRY_MEMSIZE; remaining_memsize -= FIFO_CURRENT_ENTRY_MEMSIZE;
})); }));
child->oldest_referenced_xid_known = oldest_referenced_xid_known; child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
invariant(remaining_memsize == 0); invariant(remaining_memsize == 0);
if (stats_delta.numbytes || stats_delta.numrows) { if (stats_delta.numbytes || stats_delta.numrows) {
...@@ -2549,8 +2535,7 @@ toku_ft_node_put_cmd ( ...@@ -2549,8 +2535,7 @@ toku_ft_node_put_cmd (
int target_childnum, int target_childnum,
FT_MSG cmd, FT_MSG cmd,
bool is_fresh, bool is_fresh,
GC_INFO gc_info, txn_gc_info *gc_info,
txn_manager_state *txn_state_for_gc,
size_t flow_deltas[], size_t flow_deltas[],
STAT64INFO stats_to_update STAT64INFO stats_to_update
) )
...@@ -2568,7 +2553,7 @@ toku_ft_node_put_cmd ( ...@@ -2568,7 +2553,7 @@ toku_ft_node_put_cmd (
// and instead defer to these functions // and instead defer to these functions
// //
if (node->height==0) { if (node->height==0) {
toku_ft_leaf_apply_cmd(compare_fun, update_fun, desc, node, target_childnum, cmd, gc_info, txn_state_for_gc, nullptr, stats_to_update); toku_ft_leaf_apply_cmd(compare_fun, update_fun, desc, node, target_childnum, cmd, gc_info, nullptr, stats_to_update);
} else { } else {
ft_nonleaf_put_cmd(compare_fun, desc, node, target_childnum, cmd, is_fresh, flow_deltas); ft_nonleaf_put_cmd(compare_fun, desc, node, target_childnum, cmd, is_fresh, flow_deltas);
} }
...@@ -2588,8 +2573,7 @@ void toku_ft_leaf_apply_cmd( ...@@ -2588,8 +2573,7 @@ void toku_ft_leaf_apply_cmd(
FTNODE node, FTNODE node,
int target_childnum, // which child to inject to, or -1 if unknown int target_childnum, // which child to inject to, or -1 if unknown
FT_MSG cmd, FT_MSG cmd,
GC_INFO gc_info, txn_gc_info *gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
) )
...@@ -2622,9 +2606,11 @@ void toku_ft_leaf_apply_cmd( ...@@ -2622,9 +2606,11 @@ void toku_ft_leaf_apply_cmd(
node->max_msn_applied_to_node_on_disk = cmd_msn; node->max_msn_applied_to_node_on_disk = cmd_msn;
} }
// Pass the oldest possible live xid value to each basementnode if (gc_info->mvcc_needed) { // False during recover and non-transactional environments
// when we apply messages to them. // Caller should have recognized that the oldest referenced xid for
TXNID oldest_referenced_xid_known = node->oldest_referenced_xid_known; // simple gc is this node's oldest referenced xid known.
invariant(gc_info->oldest_referenced_xid_for_implicit_promotion == node->oldest_referenced_xid_known);
}
if (ft_msg_applies_once(cmd)) { if (ft_msg_applies_once(cmd)) {
unsigned int childnum = (target_childnum >= 0 unsigned int childnum = (target_childnum >= 0
...@@ -2638,9 +2624,7 @@ void toku_ft_leaf_apply_cmd( ...@@ -2638,9 +2624,7 @@ void toku_ft_leaf_apply_cmd(
desc, desc,
bn, bn,
cmd, cmd,
oldest_referenced_xid_known,
gc_info, gc_info,
txn_state_for_gc,
workdone, workdone,
stats_to_update); stats_to_update);
} else { } else {
...@@ -2656,9 +2640,7 @@ void toku_ft_leaf_apply_cmd( ...@@ -2656,9 +2640,7 @@ void toku_ft_leaf_apply_cmd(
desc, desc,
BLB(node, childnum), BLB(node, childnum),
cmd, cmd,
oldest_referenced_xid_known,
gc_info, gc_info,
txn_state_for_gc,
workdone, workdone,
stats_to_update); stats_to_update);
} else { } else {
...@@ -2678,8 +2660,7 @@ static void inject_message_in_locked_node( ...@@ -2678,8 +2660,7 @@ static void inject_message_in_locked_node(
int childnum, int childnum,
FT_MSG_S *cmd, FT_MSG_S *cmd,
size_t flow_deltas[], size_t flow_deltas[],
TXNID oldest_referenced_xid, txn_gc_info *gc_info
GC_INFO gc_info
) )
{ {
// No guarantee that we're the writer, but oh well. // No guarantee that we're the writer, but oh well.
...@@ -2689,11 +2670,10 @@ static void inject_message_in_locked_node( ...@@ -2689,11 +2670,10 @@ static void inject_message_in_locked_node(
invariant(toku_ctpair_is_write_locked(node->ct_pair)); invariant(toku_ctpair_is_write_locked(node->ct_pair));
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
// Update the oldest known referenced xid for this node if it is younger // If the current gc_info knows about a newer xid suitible for implicit
// than the one currently known. Otherwise, it's better to keep the heurstic // promotions, update the oldest referenced xid known for this node.
// we have and ignore this one. if (gc_info->oldest_referenced_xid_for_implicit_promotion >= node->oldest_referenced_xid_known) {
if (oldest_referenced_xid >= node->oldest_referenced_xid_known) { node->oldest_referenced_xid_known = gc_info->oldest_referenced_xid_for_implicit_promotion;
node->oldest_referenced_xid_known = oldest_referenced_xid;
} }
// Get the MSN from the header. Now that we have a write lock on the // Get the MSN from the header. Now that we have a write lock on the
...@@ -2711,7 +2691,6 @@ static void inject_message_in_locked_node( ...@@ -2711,7 +2691,6 @@ static void inject_message_in_locked_node(
cmd, cmd,
true, true,
gc_info, gc_info,
nullptr,
flow_deltas, flow_deltas,
&stats_delta &stats_delta
); );
...@@ -2751,7 +2730,7 @@ static void inject_message_in_locked_node( ...@@ -2751,7 +2730,7 @@ static void inject_message_in_locked_node(
// This mechanism prevents direct leaf injections from producing an arbitrary amount // This mechanism prevents direct leaf injections from producing an arbitrary amount
// of MVCC garbage if they never get evicted. // of MVCC garbage if they never get evicted.
if (node->height == 0 && toku_serialize_ftnode_size(node) > (ft->h->nodesize * 8)) { if (node->height == 0 && toku_serialize_ftnode_size(node) > (ft->h->nodesize * 8)) {
ft_leaf_run_gc(node, ft); ft_leaf_run_gc(ft, node);
} }
toku_unpin_ftnode(ft, node); toku_unpin_ftnode(ft, node);
} }
...@@ -2878,7 +2857,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int ...@@ -2878,7 +2857,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
abort(); abort();
} }
static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *cmd, size_t flow_deltas[], TXNID oldest_referenced_xid, GC_INFO gc_info) static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *cmd, size_t flow_deltas[], txn_gc_info *gc_info)
// Effect: // Effect:
// Inject cmd into the node at this blocknum (cachekey). // Inject cmd into the node at this blocknum (cachekey).
// Gets a write lock on the node for you. // Gets a write lock on the node for you.
...@@ -2891,7 +2870,7 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f ...@@ -2891,7 +2870,7 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
paranoid_invariant(node->fullhash==fullhash); paranoid_invariant(node->fullhash==fullhash);
ft_verify_flags(ft, node); ft_verify_flags(ft, node);
inject_message_in_locked_node(ft, node, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info); inject_message_in_locked_node(ft, node, -1, cmd, flow_deltas, gc_info);
} }
__attribute__((const)) __attribute__((const))
...@@ -2910,8 +2889,7 @@ static void push_something_in_subtree( ...@@ -2910,8 +2889,7 @@ static void push_something_in_subtree(
int target_childnum, int target_childnum,
FT_MSG_S *cmd, FT_MSG_S *cmd,
size_t flow_deltas[], size_t flow_deltas[],
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
int depth, int depth,
seqinsert_loc loc, seqinsert_loc loc,
bool just_did_split_or_merge bool just_did_split_or_merge
...@@ -2952,7 +2930,7 @@ static void push_something_in_subtree( ...@@ -2952,7 +2930,7 @@ static void push_something_in_subtree(
default: default:
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
} }
inject_message_in_locked_node(ft, subtree_root, target_childnum, cmd, flow_deltas, oldest_referenced_xid, gc_info); inject_message_in_locked_node(ft, subtree_root, target_childnum, cmd, flow_deltas, gc_info);
} else { } else {
int r; int r;
int childnum; int childnum;
...@@ -3049,13 +3027,13 @@ static void push_something_in_subtree( ...@@ -3049,13 +3027,13 @@ static void push_something_in_subtree(
struct ftnode_fetch_extra bfe; struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it
toku_pin_ftnode_off_client_thread_batched(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, 0, nullptr, &newparent); toku_pin_ftnode_off_client_thread_batched(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, 0, nullptr, &newparent);
push_something_in_subtree(ft, newparent, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info, depth, loc, true); push_something_in_subtree(ft, newparent, -1, cmd, flow_deltas, gc_info, depth, loc, true);
return; return;
} }
} }
if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) { if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) {
push_something_in_subtree(ft, child, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info, depth + 1, next_loc, false); push_something_in_subtree(ft, child, -1, cmd, flow_deltas, gc_info, depth + 1, next_loc, false);
toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]); toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]);
// The recursive call unpinned the child, but // The recursive call unpinned the child, but
// we're responsible for unpinning subtree_root. // we're responsible for unpinning subtree_root.
...@@ -3091,7 +3069,7 @@ static void push_something_in_subtree( ...@@ -3091,7 +3069,7 @@ static void push_something_in_subtree(
default: default:
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break; STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
} }
inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, cmd, flow_deltas, oldest_referenced_xid, gc_info); inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, cmd, flow_deltas, gc_info);
} }
} }
} }
...@@ -3099,8 +3077,7 @@ static void push_something_in_subtree( ...@@ -3099,8 +3077,7 @@ static void push_something_in_subtree(
void toku_ft_root_put_cmd( void toku_ft_root_put_cmd(
FT ft, FT ft,
FT_MSG_S *cmd, FT_MSG_S *cmd,
TXNID oldest_referenced_xid, txn_gc_info *gc_info
GC_INFO gc_info
) )
// Effect: // Effect:
// - assign msn to cmd and update msn in the header // - assign msn to cmd and update msn in the header
...@@ -3203,22 +3180,22 @@ void toku_ft_root_put_cmd( ...@@ -3203,22 +3180,22 @@ void toku_ft_root_put_cmd(
// If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here. // If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here.
toku_unpin_ftnode_read_only(ft, node); toku_unpin_ftnode_read_only(ft, node);
STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1); STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1);
inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, oldest_referenced_xid, gc_info); inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, gc_info);
} else if (node->height > 1) { } else if (node->height > 1) {
// If the root's above height 1, we are definitely eligible for promotion. // If the root's above height 1, we are definitely eligible for promotion.
push_something_in_subtree(ft, node, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); push_something_in_subtree(ft, node, -1, cmd, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
} else { } else {
// The root's height 1. We may be eligible for promotion here. // The root's height 1. We may be eligible for promotion here.
// On the extremes, we want to promote, in the middle, we don't. // On the extremes, we want to promote, in the middle, we don't.
int childnum = toku_ftnode_which_child(node, cmd->u.id.key, &ft->cmp_descriptor, ft->compare_fun); int childnum = toku_ftnode_which_child(node, cmd->u.id.key, &ft->cmp_descriptor, ft->compare_fun);
if (childnum == 0 || childnum == node->n_children - 1) { if (childnum == 0 || childnum == node->n_children - 1) {
// On the extremes, promote. We know which childnum we're going to, so pass that down too. // On the extremes, promote. We know which childnum we're going to, so pass that down too.
push_something_in_subtree(ft, node, childnum, cmd, flow_deltas, oldest_referenced_xid, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false); push_something_in_subtree(ft, node, childnum, cmd, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
} else { } else {
// At height 1 in the middle, don't promote, drop the read lock and inject here. // At height 1 in the middle, don't promote, drop the read lock and inject here.
toku_unpin_ftnode_read_only(ft, node); toku_unpin_ftnode_read_only(ft, node);
STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1); STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1);
inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, oldest_referenced_xid, gc_info); inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, gc_info);
} }
} }
} }
...@@ -3281,7 +3258,8 @@ void toku_ft_optimize (FT_HANDLE brt) { ...@@ -3281,7 +3258,8 @@ void toku_ft_optimize (FT_HANDLE brt) {
toku_init_dbt(&key); toku_init_dbt(&key);
toku_init_dbt(&val); toku_init_dbt(&val);
FT_MSG_S ftcmd = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } }; FT_MSG_S ftcmd = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, TXNID_NONE, make_gc_info(true)); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_root_put_cmd(brt->ft, &ftcmd, &gc_info);
xids_destroy(&message_xids); xids_destroy(&message_xids);
} }
} }
...@@ -3329,6 +3307,16 @@ toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32 ...@@ -3329,6 +3307,16 @@ toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
} }
} }
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) {
TXNID oldest_referenced_xid_estimate = TXNID_NONE;
TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf);
if (logger != nullptr) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
}
return oldest_referenced_xid_estimate;
}
void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) { void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) {
paranoid_invariant(type==FT_INSERT || type==FT_INSERT_NO_OVERWRITE); paranoid_invariant(type==FT_INSERT || type==FT_INSERT_NO_OVERWRITE);
XIDS message_xids = xids_get_root_xids(); //By default use committed messages XIDS message_xids = xids_get_root_xids(); //By default use committed messages
...@@ -3355,19 +3343,29 @@ void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool ...@@ -3355,19 +3343,29 @@ void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing // do nothing
} else { } else {
TXNID oldest_referenced_xid = (txn) ? txn->oldest_referenced_xid : TXNID_NONE; TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
toku_ft_send_insert(ft_h, key, val, message_xids, type, oldest_referenced_xid, make_gc_info(txn ? !txn->for_recovery : false)); txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
txn != nullptr ? !txn->for_recovery : false);
toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info);
} }
} }
static void static void
ft_send_update_msg(FT_HANDLE brt, FT_MSG_S *msg, TOKUTXN txn) { ft_send_update_msg(FT_HANDLE ft_h, FT_MSG_S *msg, TOKUTXN txn) {
msg->xids = (txn msg->xids = (txn
? toku_txn_get_xids(txn) ? toku_txn_get_xids(txn)
: xids_get_root_xids()); : xids_get_root_xids());
TXNID oldest_referenced_xid = (txn) ? txn->oldest_referenced_xid : TXNID_NONE; TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
toku_ft_root_put_cmd(brt->ft, msg, oldest_referenced_xid, make_gc_info(txn ? !txn->for_recovery : false)); txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
txn != nullptr ? !txn->for_recovery : false);
toku_ft_root_put_cmd(ft_h->ft, msg, &gc_info);
} }
void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
...@@ -3436,15 +3434,15 @@ void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_e ...@@ -3436,15 +3434,15 @@ void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_e
} }
} }
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info) { void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) {
FT_MSG_S ftcmd = { type, ZERO_MSN, xids, .u = { .id = { key, val } } }; FT_MSG_S ftcmd = { type, ZERO_MSN, xids, .u = { .id = { key, val } } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, oldest_referenced_xid, gc_info); toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info);
} }
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info) { void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info) {
DBT val; DBT val;
FT_MSG_S ftcmd = { FT_COMMIT_ANY, ZERO_MSN, xids, .u = { .id = { key, toku_init_dbt(&val) } } }; FT_MSG_S ftcmd = { FT_COMMIT_ANY, ZERO_MSN, xids, .u = { .id = { key, toku_init_dbt(&val) } } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, oldest_referenced_xid, gc_info); toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info);
} }
void toku_ft_delete(FT_HANDLE brt, DBT *key, TOKUTXN txn) { void toku_ft_delete(FT_HANDLE brt, DBT *key, TOKUTXN txn) {
...@@ -3500,15 +3498,20 @@ void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_vali ...@@ -3500,15 +3498,20 @@ void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_vali
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing // do nothing
} else { } else {
TXNID oldest_referenced_xid = (txn) ? txn->oldest_referenced_xid : TXNID_NONE; TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
toku_ft_send_delete(ft_h, key, message_xids, oldest_referenced_xid, make_gc_info(txn ? !txn->for_recovery : false)); txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
txn != nullptr ? !txn->for_recovery : false);
toku_ft_send_delete(ft_h, key, message_xids, &gc_info);
} }
} }
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info) { void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info) {
DBT val; toku_init_dbt(&val); DBT val; toku_init_dbt(&val);
FT_MSG_S ftcmd = { FT_DELETE_ANY, ZERO_MSN, xids, .u = { .id = { key, &val } } }; FT_MSG_S ftcmd = { FT_DELETE_ANY, ZERO_MSN, xids, .u = { .id = { key, &val } } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, oldest_referenced_xid, gc_info); toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info);
} }
/* ******************** open,close and create ********************** */ /* ******************** open,close and create ********************** */
...@@ -4320,7 +4323,7 @@ int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo) ...@@ -4320,7 +4323,7 @@ int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo)
* basement node. * basement node.
*/ */
static void static void
do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, TXNID oldest_referenced_xid, uint64_t *workdone, STAT64INFO stats_to_update) do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update)
{ {
// The messages are being iterated over in (key,msn) order or just in // The messages are being iterated over in (key,msn) order or just in
// msn order, so all the messages for one key, from one buffer, are in // msn order, so all the messages for one key, from one buffer, are in
...@@ -4345,9 +4348,7 @@ do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, TXNID ol ...@@ -4345,9 +4348,7 @@ do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, TXNID ol
&t->ft->cmp_descriptor, &t->ft->cmp_descriptor,
bn, bn,
&ftcmd, &ftcmd,
oldest_referenced_xid, gc_info,
make_gc_info(true), //mvcc is needed
nullptr,
workdone, workdone,
stats_to_update stats_to_update
); );
...@@ -4365,7 +4366,7 @@ struct iterate_do_bn_apply_cmd_extra { ...@@ -4365,7 +4366,7 @@ struct iterate_do_bn_apply_cmd_extra {
FT_HANDLE t; FT_HANDLE t;
BASEMENTNODE bn; BASEMENTNODE bn;
NONLEAF_CHILDINFO bnc; NONLEAF_CHILDINFO bnc;
TXNID oldest_referenced_xid; txn_gc_info *gc_info;
uint64_t *workdone; uint64_t *workdone;
STAT64INFO stats_to_update; STAT64INFO stats_to_update;
}; };
...@@ -4374,7 +4375,7 @@ int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struc ...@@ -4374,7 +4375,7 @@ int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struc
int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e) int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e)
{ {
struct fifo_entry *entry = toku_fifo_get_entry(e->bnc->buffer, offset); struct fifo_entry *entry = toku_fifo_get_entry(e->bnc->buffer, offset);
do_bn_apply_cmd(e->t, e->bn, entry, e->oldest_referenced_xid, e->workdone, e->stats_to_update); do_bn_apply_cmd(e->t, e->bn, entry, e->gc_info, e->workdone, e->stats_to_update);
return 0; return 0;
} }
...@@ -4496,7 +4497,7 @@ bnc_apply_messages_to_basement_node( ...@@ -4496,7 +4497,7 @@ bnc_apply_messages_to_basement_node(
FTNODE ancestor, // the ancestor node where we can find messages to apply FTNODE ancestor, // the ancestor node where we can find messages to apply
int childnum, // which child buffer of ancestor contains messages we want int childnum, // which child buffer of ancestor contains messages we want
struct pivot_bounds const * const bounds, // contains pivot key bounds of this basement node struct pivot_bounds const * const bounds, // contains pivot key bounds of this basement node
TXNID oldest_referenced_xid, // may be younger than what's in ancestor, we should grab the value from the highest node we have txn_gc_info *gc_info,
bool* msgs_applied bool* msgs_applied
) )
{ {
...@@ -4556,11 +4557,11 @@ bnc_apply_messages_to_basement_node( ...@@ -4556,11 +4557,11 @@ bnc_apply_messages_to_basement_node(
for (int i = 0; i < buffer_size; ++i) { for (int i = 0; i < buffer_size; ++i) {
*msgs_applied = true; *msgs_applied = true;
struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]); struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
do_bn_apply_cmd(t, bn, entry, oldest_referenced_xid, &workdone_this_ancestor, &stats_delta); do_bn_apply_cmd(t, bn, entry, gc_info, &workdone_this_ancestor, &stats_delta);
} }
} else if (stale_lbi == stale_ube) { } else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .oldest_referenced_xid = oldest_referenced_xid, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(fresh_lbi, fresh_ube, &iter_extra); r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(fresh_lbi, fresh_ube, &iter_extra);
assert_zero(r); assert_zero(r);
...@@ -4569,7 +4570,7 @@ bnc_apply_messages_to_basement_node( ...@@ -4569,7 +4570,7 @@ bnc_apply_messages_to_basement_node(
// No fresh messages to apply, we just apply stale messages. // No fresh messages to apply, we just apply stale messages.
if (stale_ube - stale_lbi > 0) *msgs_applied = true; if (stale_ube - stale_lbi > 0) *msgs_applied = true;
struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .oldest_referenced_xid = oldest_referenced_xid, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta }; struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(stale_lbi, stale_ube, &iter_extra); r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(stale_lbi, stale_ube, &iter_extra);
assert_zero(r); assert_zero(r);
...@@ -4592,7 +4593,7 @@ apply_ancestors_messages_to_bn( ...@@ -4592,7 +4593,7 @@ apply_ancestors_messages_to_bn(
int childnum, int childnum,
ANCESTORS ancestors, ANCESTORS ancestors,
struct pivot_bounds const * const bounds, struct pivot_bounds const * const bounds,
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
bool* msgs_applied bool* msgs_applied
) )
{ {
...@@ -4607,7 +4608,7 @@ apply_ancestors_messages_to_bn( ...@@ -4607,7 +4608,7 @@ apply_ancestors_messages_to_bn(
curr_ancestors->node, curr_ancestors->node,
curr_ancestors->childnum, curr_ancestors->childnum,
&curr_bounds, &curr_bounds,
oldest_referenced_xid, gc_info,
msgs_applied msgs_applied
); );
// We don't want to check this ancestor node again if the // We don't want to check this ancestor node again if the
...@@ -4644,13 +4645,20 @@ toku_apply_ancestors_messages_to_node ( ...@@ -4644,13 +4645,20 @@ toku_apply_ancestors_messages_to_node (
VERIFY_NODE(t, node); VERIFY_NODE(t, node);
paranoid_invariant(node->height == 0); paranoid_invariant(node->height == 0);
TXNID oldest_referenced_xid = ancestors->node->oldest_referenced_xid_known; TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t);
TXNID oldest_referenced_xid_for_implicit_promotion = ancestors->node->oldest_referenced_xid_known;
// We want the newest value from any of our ancestors, for it to be most effecitve.
for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) { for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
if (curr_ancestors->node->oldest_referenced_xid_known > oldest_referenced_xid) { if (curr_ancestors->node->oldest_referenced_xid_known > oldest_referenced_xid_for_implicit_promotion) {
oldest_referenced_xid = curr_ancestors->node->oldest_referenced_xid_known; oldest_referenced_xid_for_implicit_promotion = curr_ancestors->node->oldest_referenced_xid_known;
} }
} }
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_for_simple_gc,
oldest_referenced_xid_for_implicit_promotion,
true);
if (!node->dirty && child_to_read >= 0) { if (!node->dirty && child_to_read >= 0) {
paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
apply_ancestors_messages_to_bn( apply_ancestors_messages_to_bn(
...@@ -4659,7 +4667,7 @@ toku_apply_ancestors_messages_to_node ( ...@@ -4659,7 +4667,7 @@ toku_apply_ancestors_messages_to_node (
child_to_read, child_to_read,
ancestors, ancestors,
bounds, bounds,
oldest_referenced_xid, &gc_info,
msgs_applied msgs_applied
); );
} }
...@@ -4678,7 +4686,7 @@ toku_apply_ancestors_messages_to_node ( ...@@ -4678,7 +4686,7 @@ toku_apply_ancestors_messages_to_node (
i, i,
ancestors, ancestors,
bounds, bounds,
oldest_referenced_xid, &gc_info,
msgs_applied msgs_applied
); );
} }
......
...@@ -243,9 +243,11 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn); ...@@ -243,9 +243,11 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn);
// Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery. // Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery.
void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging); void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info); TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xids, GC_INFO gc_info); void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result)); int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result));
......
...@@ -221,6 +221,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char ...@@ -221,6 +221,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
toku_fill_dbt(&valdbt, val, vallen) } } }; toku_fill_dbt(&valdbt, val, vallen) } } };
static size_t zero_flow_deltas[] = { 0, 0 }; static size_t zero_flow_deltas[] = { 0, 0 };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_node_put_cmd ( toku_ft_node_put_cmd (
brt->ft->compare_fun, brt->ft->compare_fun,
brt->ft->update_fun, brt->ft->update_fun,
...@@ -229,8 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char ...@@ -229,8 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
-1, -1,
&cmd, &cmd,
true, true,
make_gc_info(true), &gc_info,
nullptr,
zero_flow_deltas, zero_flow_deltas,
NULL NULL
); );
......
...@@ -2925,7 +2925,8 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int ...@@ -2925,7 +2925,8 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids, .xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } }; .u = { .id = { &thekey, &theval } } };
uint64_t workdone=0; uint64_t workdone=0;
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), nullptr, &workdone, stats_to_update); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, &workdone, stats_to_update);
} }
static int write_literal(struct dbout *out, void*data, size_t len) { static int write_literal(struct dbout *out, void*data, size_t len) {
......
...@@ -143,10 +143,6 @@ typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code ...@@ -143,10 +143,6 @@ typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; } static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
typedef struct gc_info_s { bool mvcc_needed; } GC_INFO;
static inline GC_INFO make_gc_info(bool mvcc_needed) { GC_INFO result = {mvcc_needed}; return result; }
// This struct hold information about values stored in the cachetable. // This struct hold information about values stored in the cachetable.
// As one can tell from the names, we are probably violating an // As one can tell from the names, we are probably violating an
// abstraction layer by placing names. // abstraction layer by placing names.
......
...@@ -247,13 +247,11 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -247,13 +247,11 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data. LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
LEAFENTRY *new_leafentry_p, LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p); int64_t * numbytes_delta_p);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known); bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info);
void void
toku_le_garbage_collect(LEAFENTRY old_leaf_entry, toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
...@@ -261,11 +259,8 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, ...@@ -261,11 +259,8 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx, uint32_t idx,
void* keyp, void* keyp,
uint32_t keylen, uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry, LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p); int64_t * numbytes_delta_p);
#endif /* TOKU_LEAFENTRY_H */ #endif /* TOKU_LEAFENTRY_H */
......
...@@ -247,7 +247,6 @@ struct tokutxn { ...@@ -247,7 +247,6 @@ struct tokutxn {
DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started. xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
XIDS xids; // Represents the xid list XIDS xids; // Represents the xid list
TXNID oldest_referenced_xid;
TOKUTXN snapshot_next; TOKUTXN snapshot_next;
TOKUTXN snapshot_prev; TOKUTXN snapshot_prev;
......
...@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT: ...@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
#include "ft.h" #include "ft.h"
#include "ft-ops.h" #include "ft-ops.h"
#include "log-internal.h" #include "log-internal.h"
//#include "txn_manager.h"
#include "xids.h" #include "xids.h"
#include "rollback-apply.h" #include "rollback-apply.h"
...@@ -265,7 +266,14 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key, ...@@ -265,7 +266,14 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
? toku_fill_dbt(&data_dbt, data->data, data->len) ? toku_fill_dbt(&data_dbt, data->data, data->len)
: toku_init_dbt(&data_dbt) } } }; : toku_init_dbt(&data_dbt) } } };
toku_ft_root_put_cmd(h, &ftcmd, txn->oldest_referenced_xid, make_gc_info(!txn->for_recovery)); TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger);
TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
!txn->for_recovery);
toku_ft_root_put_cmd(h, &ftcmd, &gc_info);
if (reset_root_xid_that_created) { if (reset_root_xid_that_created) {
TXNID new_root_xid_that_created = xids_get_outermost_xid(xids); TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
toku_reset_root_xid_that_created(h, new_root_xid_that_created); toku_reset_root_xid_that_created(h, new_root_xid_that_created);
......
...@@ -124,8 +124,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -124,8 +124,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
// apply an insert to the leaf node // apply an insert to the leaf node
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
leafnode->max_msn_applied_to_node_on_disk = msn; leafnode->max_msn_applied_to_node_on_disk = msn;
......
...@@ -132,8 +132,9 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va ...@@ -132,8 +132,9 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn; brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, make_gc_info(false), nullptr, nullptr); toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, &gc_info, nullptr, nullptr);
{ {
int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair); int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair);
assert(r==0); assert(r==0);
...@@ -141,7 +142,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va ...@@ -141,7 +142,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
} }
FT_MSG_S badcmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} }; FT_MSG_S badcmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, make_gc_info(false), nullptr, nullptr); toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, &gc_info, nullptr, nullptr);
// message should be rejected for duplicate msn, row should still have original val // message should be rejected for duplicate msn, row should still have original val
{ {
...@@ -154,7 +155,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va ...@@ -154,7 +155,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
msn = next_dummymsn(); msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn; brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} }; FT_MSG_S cmd2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, make_gc_info(false), nullptr, nullptr); toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, &gc_info, nullptr, nullptr);
// message should be accepted, val should have new value // message should be accepted, val should have new value
{ {
...@@ -166,7 +167,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va ...@@ -166,7 +167,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
// now verify that message with lesser (older) msn is rejected // now verify that message with lesser (older) msn is rejected
msn.msn = msn.msn - 10; msn.msn = msn.msn - 10;
FT_MSG_S cmd3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }}; FT_MSG_S cmd3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }};
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, make_gc_info(false), nullptr, nullptr); toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, &gc_info, nullptr, nullptr);
// message should be rejected, val should still have value in pair2 // message should be rejected, val should still have value in pair2
{ {
......
...@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT: ...@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
static TOKUTXN const null_txn = 0; static TOKUTXN const null_txn = 0;
static DB * const null_db = 0; static DB * const null_db = 0;
static const char *fname = TOKU_TEST_FILENAME; static const char *fname = TOKU_TEST_FILENAME;
static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
static int dummy_cmp(DB *db __attribute__((unused)), static int dummy_cmp(DB *db __attribute__((unused)),
const DBT *a, const DBT *b) { const DBT *a, const DBT *b) {
...@@ -217,8 +218,8 @@ insert_random_message_to_bn( ...@@ -217,8 +218,8 @@ insert_random_message_to_bn(
*keylenp = keydbt->size; *keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size); *keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes; int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes); toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL); toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb->max_msn_applied.msn) { if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn; blb->max_msn_applied = msn;
} }
...@@ -267,12 +268,12 @@ insert_same_message_to_bns( ...@@ -267,12 +268,12 @@ insert_same_message_to_bns(
*keylenp = keydbt->size; *keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size); *keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes; int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes); toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL); toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb1->max_msn_applied.msn) { if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn; blb1->max_msn_applied = msn;
} }
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL); toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb2->max_msn_applied.msn) { if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn; blb2->max_msn_applied = msn;
} }
...@@ -684,7 +685,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) { ...@@ -684,7 +685,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
if (make_leaf_up_to_date) { if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) { for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) { if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL); toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
} }
} }
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
...@@ -908,7 +909,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) { ...@@ -908,7 +909,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) { for (i = 0; i < num_parent_messages; ++i) {
if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 && if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
!parent_messages_is_fresh[i]) { !parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL); toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
} }
} }
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
...@@ -1104,8 +1105,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) { ...@@ -1104,8 +1105,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
if (make_leaf_up_to_date) { if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) { for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) { if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], make_gc_info(false), NULL, NULL); toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], make_gc_info(false), NULL, NULL); toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
} }
} }
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
......
...@@ -453,12 +453,12 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) { ...@@ -453,12 +453,12 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
size_t result_memsize = 0; size_t result_memsize = 0;
int64_t ignoreme; int64_t ignoreme;
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_le_apply_msg(msg, toku_le_apply_msg(msg,
le_initial, le_initial,
nullptr, nullptr,
0, 0,
TXNID_NONE, &gc_info,
make_gc_info(true),
&le_result, &le_result,
&ignoreme); &ignoreme);
if (le_result) { if (le_result) {
...@@ -751,7 +751,8 @@ static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_reference ...@@ -751,7 +751,8 @@ static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_reference
LEAFENTRY le; LEAFENTRY le;
int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le); CKERR(r); int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le); CKERR(r);
invariant_notnull(le); invariant_notnull(le);
bool worth_running = toku_le_worth_running_garbage_collection(le, oldest_referenced_xid_known); txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true);
bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info);
toku_free(le); toku_free(le);
return worth_running; return worth_running;
} }
......
...@@ -128,7 +128,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -128,7 +128,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// Create bad tree (don't do following): // Create bad tree (don't do following):
// leafnode->max_msn_applied_to_node = msn; // leafnode->max_msn_applied_to_node = msn;
......
...@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -119,7 +119,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -119,7 +119,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) ...@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node // apply an insert to the leaf node
MSN msn = next_dummymsn(); MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} }; FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL); txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node // dont forget to dirty the node
leafnode->dirty = 1; leafnode->dirty = 1;
......
...@@ -239,7 +239,6 @@ toku_txn_begin_with_xid ( ...@@ -239,7 +239,6 @@ toku_txn_begin_with_xid (
} }
else { else {
parent->child_manager->start_child_txn_for_recovery(txn, parent, xid); parent->child_manager->start_child_txn_for_recovery(txn, parent, xid);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
} }
} }
else { else {
...@@ -255,7 +254,6 @@ toku_txn_begin_with_xid ( ...@@ -255,7 +254,6 @@ toku_txn_begin_with_xid (
} }
else { else {
parent->child_manager->start_child_txn(txn, parent); parent->child_manager->start_child_txn(txn, parent);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
toku_txn_manager_handle_snapshot_create_for_child_txn( toku_txn_manager_handle_snapshot_create_for_child_txn(
txn, txn,
logger->txn_manager, logger->txn_manager,
...@@ -327,7 +325,6 @@ static txn_child_manager tcm; ...@@ -327,7 +325,6 @@ static txn_child_manager tcm;
.container_db_txn = container_db_txn, .container_db_txn = container_db_txn,
.live_root_txn_list = nullptr, .live_root_txn_list = nullptr,
.xids = NULL, .xids = NULL,
.oldest_referenced_xid = TXNID_NONE,
.snapshot_next = NULL, .snapshot_next = NULL,
.snapshot_prev = NULL, .snapshot_prev = NULL,
.begin_was_logged = false, .begin_was_logged = false,
......
...@@ -615,7 +615,6 @@ void toku_txn_manager_start_txn_for_recovery( ...@@ -615,7 +615,6 @@ void toku_txn_manager_start_txn_for_recovery(
// using xid that is passed in // using xid that is passed in
txn_manager->last_xid = max_xid(txn_manager->last_xid, xid); txn_manager->last_xid = max_xid(txn_manager->last_xid, xid);
toku_txn_update_xids_in_txn(txn, xid); toku_txn_update_xids_in_txn(txn, xid);
txn->oldest_referenced_xid = TXNID_NONE;
uint32_t idx; uint32_t idx;
int r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, nullptr, &idx); int r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, nullptr, &idx);
...@@ -838,7 +837,6 @@ void txn_manager_state::init(TXN_MANAGER txn_manager) { ...@@ -838,7 +837,6 @@ void txn_manager_state::init(TXN_MANAGER txn_manager) {
&referenced_xids, &referenced_xids,
&live_root_txns &live_root_txns
); );
oldest_referenced_xid_for_simple_gc = txn_manager->last_calculated_oldest_referenced_xid;
} }
void txn_manager_state::destroy() { void txn_manager_state::destroy() {
......
...@@ -130,13 +130,6 @@ struct txn_manager_state { ...@@ -130,13 +130,6 @@ struct txn_manager_state {
rx_omt_t referenced_xids; rx_omt_t referenced_xids;
xid_omt_t live_root_txns; xid_omt_t live_root_txns;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
TXNID oldest_referenced_xid_for_simple_gc;
txn_manager_state() { } txn_manager_state() { }
void init(TXN_MANAGER txn_manager); void init(TXN_MANAGER txn_manager);
void destroy(); void destroy();
...@@ -145,6 +138,32 @@ private: ...@@ -145,6 +138,32 @@ private:
txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct
}; };
// represents all of the information needed to run garbage collection
struct txn_gc_info {
txn_gc_info(txn_manager_state *st, TXNID xid_sgc, TXNID xid_ip, bool mvcc)
: txn_state_for_gc(st),
oldest_referenced_xid_for_simple_gc(xid_sgc),
oldest_referenced_xid_for_implicit_promotion(xid_ip),
mvcc_needed(mvcc) {
}
// a snapshot of the transcation system. may be null.
txn_manager_state *const txn_state_for_gc;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
const TXNID oldest_referenced_xid_for_simple_gc;
// lower bound on the oldest xid in any live when the messages to be cleaned
// had no messages above them. suitable for implicitly promoting a provisonal uxr.
const TXNID oldest_referenced_xid_for_implicit_promotion;
// whether or not mvcc is actually needed - false during recovery and non-transactional systems
const bool mvcc_needed;
};
void toku_txn_manager_init(TXN_MANAGER* txn_manager); void toku_txn_manager_init(TXN_MANAGER* txn_manager);
void toku_txn_manager_destroy(TXN_MANAGER txn_manager); void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
......
...@@ -321,18 +321,18 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, const xid_omt_t &snapshot_txnids, c ...@@ -321,18 +321,18 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, const xid_omt_t &snapshot_txnids, c
// so we get rid of them. // so we get rid of them.
// //
static void static void
ule_simple_garbage_collection(ULE ule, TXNID oldest_referenced_xid, GC_INFO gc_info) { ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) {
uint32_t curr_index = 0; uint32_t curr_index = 0;
uint32_t num_entries; uint32_t num_entries;
if (ule->num_cuxrs == 1) { if (ule->num_cuxrs == 1) {
goto done; goto done;
} }
if (gc_info.mvcc_needed) { if (gc_info->mvcc_needed) {
// starting at the top of the committed stack, find the first // starting at the top of the committed stack, find the first
// uxr with a txnid that is less than oldest_referenced_xid // uxr with a txnid that is less than oldest_referenced_xid
for (uint32_t i = 0; i < ule->num_cuxrs; i++) { for (uint32_t i = 0; i < ule->num_cuxrs; i++) {
curr_index = ule->num_cuxrs - i - 1; curr_index = ule->num_cuxrs - i - 1;
if (ule->uxrs[curr_index].xid < oldest_referenced_xid) { if (ule->uxrs[curr_index].xid < gc_info->oldest_referenced_xid_for_simple_gc) {
break; break;
} }
} }
...@@ -484,11 +484,10 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -484,11 +484,10 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data. LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid, txn_gc_info *gc_info,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
LEAFENTRY *new_leafentry_p, LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
invariant_notnull(gc_info);
paranoid_invariant_notnull(new_leafentry_p); paranoid_invariant_notnull(new_leafentry_p);
ULE_S ule; ULE_S ule;
int64_t oldnumbytes = 0; int64_t oldnumbytes = 0;
...@@ -514,18 +513,14 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -514,18 +513,14 @@ toku_le_apply_msg(FT_MSG msg,
// - we may be able to immediately promote the newly-apllied outermost provisonal uxr // - we may be able to immediately promote the newly-apllied outermost provisonal uxr
// - either way, run simple gc first, and then full gc if there are still some committed uxrs. // - either way, run simple gc first, and then full gc if there are still some committed uxrs.
ule_try_promote_provisional_outermost(&ule, oldest_referenced_xid); ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
ule_simple_garbage_collection(&ule, ule_simple_garbage_collection(&ule, gc_info);
txn_state_for_gc != nullptr ? if (ule.num_cuxrs > 1 && gc_info->txn_state_for_gc != nullptr) {
txn_state_for_gc->oldest_referenced_xid_for_simple_gc :
oldest_referenced_xid,
gc_info);
if (ule.num_cuxrs > 1 && txn_state_for_gc != nullptr) {
size_t size_before_gc = ule_packed_memsize(&ule); size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule, ule_garbage_collect(&ule,
txn_state_for_gc->snapshot_xids, gc_info->txn_state_for_gc->snapshot_xids,
txn_state_for_gc->referenced_xids, gc_info->txn_state_for_gc->referenced_xids,
txn_state_for_gc->live_root_txns gc_info->txn_state_for_gc->live_root_txns
); );
size_t size_after_gc = ule_packed_memsize(&ule); size_t size_after_gc = ule_packed_memsize(&ule);
...@@ -549,7 +544,7 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -549,7 +544,7 @@ toku_le_apply_msg(FT_MSG msg,
ule_cleanup(&ule); ule_cleanup(&ule);
} }
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known) { bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info) {
// Effect: Quickly determines if it's worth trying to run garbage collection on a leafentry // Effect: Quickly determines if it's worth trying to run garbage collection on a leafentry
// Return: True if it makes sense to try garbage collection, false otherwise. // Return: True if it makes sense to try garbage collection, false otherwise.
// Rationale: Garbage collection is likely to clean up under two circumstances: // Rationale: Garbage collection is likely to clean up under two circumstances:
...@@ -565,7 +560,8 @@ bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referen ...@@ -565,7 +560,8 @@ bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referen
} else { } else {
paranoid_invariant(le->u.mvcc.num_cxrs == 1); paranoid_invariant(le->u.mvcc.num_cxrs == 1);
} }
return le->u.mvcc.num_pxrs > 0 && le_outermost_uncommitted_xid(le) < oldest_referenced_xid_known; return le->u.mvcc.num_pxrs > 0 &&
le_outermost_uncommitted_xid(le) < gc_info->oldest_referenced_xid_for_implicit_promotion;
} }
// Garbage collect one leaf entry, using the given OMT's. // Garbage collect one leaf entry, using the given OMT's.
...@@ -592,12 +588,12 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, ...@@ -592,12 +588,12 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx, uint32_t idx,
void* keyp, void* keyp,
uint32_t keylen, uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry, LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p) { int64_t * numbytes_delta_p) {
// We shouldn't want to run gc without having provided a snapshot of the txn system.
invariant_notnull(gc_info);
invariant_notnull(gc_info->txn_state_for_gc);
paranoid_invariant_notnull(new_leaf_entry); paranoid_invariant_notnull(new_leaf_entry);
ULE_S ule; ULE_S ule;
int64_t oldnumbytes = 0; int64_t oldnumbytes = 0;
...@@ -621,15 +617,14 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, ...@@ -621,15 +617,14 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
// The oldest known refeferenced xid is a lower bound on the oldest possible // The oldest known refeferenced xid is a lower bound on the oldest possible
// live xid, so we use that. It's usually close enough to get rid of most // live xid, so we use that. It's usually close enough to get rid of most
// garbage in leafentries. // garbage in leafentries.
TXNID oldest_possible_live_xid = oldest_referenced_xid_known; ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
ule_try_promote_provisional_outermost(&ule, oldest_possible_live_xid);
// No need to run simple gc here if we're going straight for full gc. // No need to run simple gc here if we're going straight for full gc.
if (ule.num_cuxrs > 1) { if (ule.num_cuxrs > 1) {
size_t size_before_gc = ule_packed_memsize(&ule); size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule, ule_garbage_collect(&ule,
snapshot_xids, gc_info->txn_state_for_gc->snapshot_xids,
referenced_xids, gc_info->txn_state_for_gc->referenced_xids,
live_root_txns); gc_info->txn_state_for_gc->live_root_txns);
size_t size_after_gc = ule_packed_memsize(&ule); size_t size_after_gc = ule_packed_memsize(&ule);
STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc); STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc);
......
...@@ -615,7 +615,13 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi ...@@ -615,7 +615,13 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) { if (result == 0) {
toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, TXNID_NONE, make_gc_info(true)); TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info);
} }
} }
return result; return result;
...@@ -651,7 +657,13 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho ...@@ -651,7 +657,13 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) { if (result == 0) {
toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, TXNID_NONE, make_gc_info(true)); TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, &gc_info);
} }
} }
return result; return result;
...@@ -670,8 +682,15 @@ indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) { ...@@ -670,8 +682,15 @@ indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) {
result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids); result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids);
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) if (result == 0) {
toku_ft_send_commit_any(db_struct_i(hotdb)->ft_handle, hotkey, xids, TXNID_NONE, make_gc_info(true)); TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
toku_ft_send_commit_any(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info);
}
} }
} }
return result; return result;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment