Commit fb76250b authored by John Esmet's avatar John Esmet

Consolidate garbage collection information into txn_gc_info. Explicitly

separate the difference between the oldest referenced xid we can use for
implicit promotion vs simple garbage collection.
parent f12bdc75
......@@ -1544,11 +1544,7 @@ ft_merge_child(
}
}
static void ft_flush_some_child(
FT ft,
FTNODE parent,
struct flusher_advice *fa
)
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
// Effect: This function does the following:
// - Pick a child of parent (the heaviest child),
// - flush from parent to child,
......@@ -1562,7 +1558,7 @@ static void ft_flush_some_child(
NONLEAF_CHILDINFO bnc = NULL;
paranoid_invariant(parent->height>0);
toku_assert_entire_node_in_memory(parent);
TXNID oldest_referenced_xid = parent->oldest_referenced_xid_known;
TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
// pick the child we want to flush to
int childnum = fa->pick_child(ft, parent, fa->extra);
......@@ -1655,7 +1651,7 @@ static void ft_flush_some_child(
ft,
bnc,
child,
oldest_referenced_xid
parent_oldest_referenced_xid_known
);
destroy_nonleaf_childinfo(bnc);
}
......@@ -1679,10 +1675,10 @@ static void ft_flush_some_child(
parent = NULL;
}
//
// it is the responsibility of ft_flush_some_child to unpin child
// it is the responsibility of toku_ft_flush_some_child to unpin child
//
if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) {
ft_flush_some_child(ft, child, fa);
toku_ft_flush_some_child(ft, child, fa);
}
else {
toku_unpin_ftnode_off_client_thread(ft, child);
......@@ -1709,13 +1705,6 @@ static void ft_flush_some_child(
}
}
void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) {
// Vanilla flush_some_child flushes from parent to child without
// providing a meaningful oldest_referenced_xid. No simple garbage
// collection is performed.
return ft_flush_some_child(ft, parent, fa);
}
static void
update_cleaner_status(
FTNODE node,
......@@ -1857,7 +1846,7 @@ struct flusher_extra {
FT h;
FTNODE node;
NONLEAF_CHILDINFO bnc;
TXNID oldest_referenced_xid;
TXNID parent_oldest_referenced_xid_known;
};
//
......@@ -1896,16 +1885,16 @@ static void flush_node_fun(void *fe_v)
fe->h,
fe->bnc,
fe->node,
fe->oldest_referenced_xid
fe->parent_oldest_referenced_xid_known
);
destroy_nonleaf_childinfo(fe->bnc);
// after the flush has completed, now check to see if the node needs flushing
// If so, call ft_flush_some_child on the node (because this flush intends to
// If so, call toku_ft_flush_some_child on the node (because this flush intends to
// pass a meaningful oldest referenced xid for simple garbage collection), and it is the
// responsibility of the flush to unlock the node. otherwise, we unlock it here.
if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
ft_flush_some_child(fe->h, fe->node, &fa);
toku_ft_flush_some_child(fe->h, fe->node, &fa);
}
else {
toku_unpin_ftnode_off_client_thread(fe->h,fe->node);
......@@ -1916,7 +1905,7 @@ static void flush_node_fun(void *fe_v)
// bnc, which means we are tasked with flushing some
// buffer in the node.
// It is the responsibility of flush some child to unlock the node
ft_flush_some_child(fe->h, fe->node, &fa);
toku_ft_flush_some_child(fe->h, fe->node, &fa);
}
remove_background_job_from_cf(fe->h->cf);
toku_free(fe);
......@@ -1927,13 +1916,13 @@ place_node_and_bnc_on_background_thread(
FT h,
FTNODE node,
NONLEAF_CHILDINFO bnc,
TXNID oldest_referenced_xid)
TXNID parent_oldest_referenced_xid_known)
{
struct flusher_extra *XMALLOC(fe);
fe->h = h;
fe->node = node;
fe->bnc = bnc;
fe->oldest_referenced_xid = oldest_referenced_xid;
fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
cachefile_kibbutz_enq(h->cf, flush_node_fun, fe);
}
......@@ -1953,7 +1942,7 @@ place_node_and_bnc_on_background_thread(
void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
{
toku::context flush_ctx(CTX_FLUSH);
TXNID oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
//
// first let's see if we can detach buffer on client thread
// and pick the child we want to flush to
......@@ -1970,7 +1959,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// In this case, we could not lock the child, so just place the parent on the background thread
// In the callback, we will use toku_ft_flush_some_child, which checks to
// see if we should blow away the old basement nodes.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
}
else {
//
......@@ -1999,7 +1988,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
// so, because we know for sure the child is not
// reactive, we can unpin the parent
//
place_node_and_bnc_on_background_thread(h, child, bnc, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known);
toku_unpin_ftnode(h, parent);
}
else {
......@@ -2009,7 +1998,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
toku_unpin_ftnode(h, child);
// Again, we'll have the parent on the background thread, so
// we don't need to destroy the basement nodes yet.
place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
}
}
}
......
......@@ -229,7 +229,7 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID oldest_referenced_xid);
void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
......@@ -1027,7 +1027,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type,
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, txn_gc_info *gc_info);
void
toku_get_node_for_verify(
......@@ -1198,9 +1198,7 @@ toku_ft_bn_apply_cmd_once (
const FT_MSG cmd,
uint32_t idx,
LEAFENTRY le,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdonep,
STAT64INFO stats_to_update
);
......@@ -1212,9 +1210,7 @@ toku_ft_bn_apply_cmd (
DESCRIPTOR desc,
BASEMENTNODE bn,
FT_MSG cmd,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
);
......@@ -1227,8 +1223,7 @@ toku_ft_leaf_apply_cmd (
FTNODE node,
int target_childnum,
FT_MSG cmd,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
);
......@@ -1242,8 +1237,7 @@ toku_ft_node_put_cmd (
int target_childnum,
FT_MSG cmd,
bool is_fresh,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
size_t flow_deltas[],
STAT64INFO stats_to_update
);
......
......@@ -869,7 +869,7 @@ void toku_ftnode_clone_callback(
*cloned_value_data = cloned_node;
}
static void ft_leaf_run_gc(FTNODE node, FT ft);
static void ft_leaf_run_gc(FT ft, FTNODE node);
void toku_ftnode_flush_callback(
CACHEFILE UU(cachefile),
......@@ -894,7 +894,7 @@ void toku_ftnode_flush_callback(
if (write_me) {
toku_assert_entire_node_in_memory(ftnode);
if (height == 0) {
ft_leaf_run_gc(ftnode, h);
ft_leaf_run_gc(h, ftnode);
}
if (height == 0 && !is_clone) {
ftnode_update_disk_stats(ftnode, h, for_checkpoint);
......@@ -1690,9 +1690,7 @@ toku_ft_bn_apply_cmd_once (
const FT_MSG cmd,
uint32_t idx,
LEAFENTRY le,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
)
......@@ -1718,9 +1716,7 @@ toku_ft_bn_apply_cmd_once (
le,
&bn->data_buffer,
idx,
oldest_referenced_xid,
gc_info,
txn_state_for_gc,
&new_le,
&numbytes_delta
);
......@@ -1768,9 +1764,7 @@ struct setval_extra_s {
const DBT *key;
uint32_t idx;
LEAFENTRY le;
TXNID oldest_referenced_xid;
GC_INFO gc_info;
txn_manager_state *txn_state_for_gc;
txn_gc_info *gc_info;
uint64_t * workdone; // set by toku_ft_bn_apply_cmd_once()
STAT64INFO stats_to_update;
};
......@@ -1803,8 +1797,7 @@ static void setval_fun (const DBT *new_val, void *svextra_v) {
}
toku_ft_bn_apply_cmd_once(svextra->bn, &msg,
svextra->idx, svextra->le,
svextra->oldest_referenced_xid, svextra->gc_info,
svextra->txn_state_for_gc,
svextra->gc_info,
svextra->workdone, svextra->stats_to_update);
svextra->setval_r = 0;
}
......@@ -1818,9 +1811,7 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
LEAFENTRY le,
void* keydata,
uint32_t keylen,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t * workdone,
STAT64INFO stats_to_update) {
LEAFENTRY le_for_update;
......@@ -1865,8 +1856,8 @@ static int do_update(ft_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE bn
le_for_update = le;
struct setval_extra_s setval_extra = {setval_tag, false, 0, bn, cmd->msn, cmd->xids,
keyp, idx, le_for_update, oldest_referenced_xid, gc_info,
txn_state_for_gc, workdone, stats_to_update};
keyp, idx, le_for_update, gc_info,
workdone, stats_to_update};
// call handlerton's brt->update_fun(), which passes setval_extra to setval_fun()
FAKE_DB(db, desc);
int r = update_fun(
......@@ -1889,9 +1880,7 @@ toku_ft_bn_apply_cmd (
DESCRIPTOR desc,
BASEMENTNODE bn,
FT_MSG cmd,
TXNID oldest_referenced_xid_known,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
)
......@@ -1938,7 +1927,7 @@ toku_ft_bn_apply_cmd (
} else {
assert_zero(r);
}
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
// if the insertion point is within a window of the right edge of
// the leaf then it is sequential
......@@ -1970,7 +1959,7 @@ toku_ft_bn_apply_cmd (
);
if (r == DB_NOTFOUND) break;
assert_zero(r);
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
break;
}
......@@ -1992,7 +1981,7 @@ toku_ft_bn_apply_cmd (
cmd->u.id.key = &curr_keydbt;
int deleted = 0;
if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
uint32_t new_omt_size = bn->data_buffer.omt_size();
if (new_omt_size != omt_size) {
paranoid_invariant(new_omt_size+1 == omt_size);
......@@ -2024,7 +2013,7 @@ toku_ft_bn_apply_cmd (
cmd->u.id.key = &curr_keydbt;
int deleted = 0;
if (le_has_xids(storeddata, cmd->xids)) {
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, gc_info, workdone, stats_to_update);
uint32_t new_omt_size = bn->data_buffer.omt_size();
if (new_omt_size != omt_size) {
paranoid_invariant(new_omt_size+1 == omt_size);
......@@ -2056,9 +2045,9 @@ toku_ft_bn_apply_cmd (
key = cmd->u.id.key->data;
keylen = cmd->u.id.key->size;
}
r = do_update(update_fun, desc, bn, cmd, idx, NULL, NULL, 0, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
r = do_update(update_fun, desc, bn, cmd, idx, NULL, NULL, 0, gc_info, workdone, stats_to_update);
} else if (r==0) {
r = do_update(update_fun, desc, bn, cmd, idx, storeddata, key, keylen, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
r = do_update(update_fun, desc, bn, cmd, idx, storeddata, key, keylen, gc_info, workdone, stats_to_update);
} // otherwise, a worse error, just return it
break;
}
......@@ -2081,7 +2070,7 @@ toku_ft_bn_apply_cmd (
// This is broken below. Have a compilation error checked
// in as a reminder
r = do_update(update_fun, desc, bn, cmd, idx, storeddata, curr_key, curr_keylen, oldest_referenced_xid_known, gc_info, txn_state_for_gc, workdone, stats_to_update);
r = do_update(update_fun, desc, bn, cmd, idx, storeddata, curr_key, curr_keylen, gc_info, workdone, stats_to_update);
assert_zero(r);
if (num_leafentries_before == bn->data_buffer.omt_size()) {
......@@ -2319,10 +2308,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
void* keyp,
uint32_t keylen,
LEAFENTRY leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
txn_gc_info *gc_info,
STAT64INFO_S * delta)
{
paranoid_invariant(leaf_entry);
......@@ -2333,7 +2319,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
}
// Don't run garbage collection if this leafentry decides it's not worth it.
if (!toku_le_worth_running_garbage_collection(leaf_entry, oldest_referenced_xid_known)) {
if (!toku_le_worth_running_garbage_collection(leaf_entry, gc_info)) {
goto exit;
}
......@@ -2355,11 +2341,8 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
index,
keyp,
keylen,
gc_info,
&new_leaf_entry,
snapshot_xids,
referenced_xids,
live_root_txns,
oldest_referenced_xid_known,
&numbytes_delta);
numrows_delta = 0;
......@@ -2388,10 +2371,7 @@ exit:
// Garbage collect all leaf entries for a given basement node.
static void
basement_node_gc_all_les(BASEMENTNODE bn,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
txn_gc_info *gc_info,
STAT64INFO_S * delta)
{
int r = 0;
......@@ -2409,10 +2389,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
keyp,
keylen,
leaf_entry,
snapshot_xids,
referenced_xids,
live_root_txns,
oldest_referenced_xid_known,
gc_info,
delta
);
// Check if the leaf entry was deleted or not.
......@@ -2424,12 +2401,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
// Garbage collect all leaf entires in all basement nodes.
static void
ft_leaf_gc_all_les(FTNODE node,
FT ft,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known)
ft_leaf_gc_all_les(FT ft, FTNODE node, txn_gc_info *gc_info)
{
toku_assert_entire_node_in_memory(node);
paranoid_invariant_zero(node->height);
......@@ -2440,32 +2412,40 @@ ft_leaf_gc_all_les(FTNODE node,
STAT64INFO_S delta;
delta.numrows = 0;
delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, referenced_xids, live_root_txns, oldest_referenced_xid_known, &delta);
basement_node_gc_all_les(bn, gc_info, &delta);
toku_ft_update_stats(&ft->in_memory_stats, delta);
}
}
static void
ft_leaf_run_gc(FTNODE node, FT ft) {
ft_leaf_run_gc(FT ft, FTNODE node) {
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
if (logger) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
txn_manager_state txn_state_for_gc;
txn_state_for_gc.init(txn_manager);
TXNID oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
// Perform full garbage collection. Provide a fresh snapshot of the transaction
// system plus the oldest known referenced xid that could have had messages
// applied to this leaf (which comes from the node, NOT the txn_manager_state,
// which has a value only suitible for simple garbage colletion).
// Perform full garbage collection.
//
// Using the oldest xid in either the referenced_xids or live_root_txns
// snapshots is not sufficient, because there could be something older that is neither
// live nor referenced, but instead aborted somewhere above us as a message in the tree.
ft_leaf_gc_all_les(node, ft,
txn_state_for_gc.snapshot_xids,
txn_state_for_gc.referenced_xids,
txn_state_for_gc.live_root_txns,
node->oldest_referenced_xid_known);
// - txn_state_for_gc
// a fresh snapshot of the transaction system.
// - oldest_referenced_xid_for_simple_gc
// the oldest xid in any live list as of right now - suitible for simple gc
// - node->oldest_referenced_xid_known
// the last known oldest referenced xid for this node and any unapplied messages.
// it is a lower bound on the actual oldest referenced xid - but becasue there
// may be abort messages above us, we need to be careful to only use this value
// for implicit promotion (as opposed to the oldest referenced xid for simple gc)
//
// The node has its own oldest referenced xid because it must be careful not to implicitly promote
// provisional entries for transactions that are no longer live, but may have abort messages
// somewhere above us in the tree.
txn_gc_info gc_info(&txn_state_for_gc,
oldest_referenced_xid_for_simple_gc,
node->oldest_referenced_xid_known,
true);
ft_leaf_gc_all_les(ft, node, &gc_info);
txn_state_for_gc.destroy();
}
}
......@@ -2474,20 +2454,27 @@ void toku_bnc_flush_to_child(
FT ft,
NONLEAF_CHILDINFO bnc,
FTNODE child,
TXNID oldest_referenced_xid_known
TXNID parent_oldest_referenced_xid_known
)
{
paranoid_invariant(bnc);
STAT64INFO_S stats_delta = {0,0};
size_t remaining_memsize = toku_fifo_buffer_size_in_use(bnc->buffer);
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
TXNID oldest_referenced_xid_for_simple_gc = TXNID_NONE;
txn_manager_state txn_state_for_gc;
bool do_garbage_collection = child->height == 0 && toku_cachefile_logger(ft->cf) != nullptr;
bool do_garbage_collection = child->height == 0 && logger != nullptr;
if (do_garbage_collection) {
TOKULOGGER logger = toku_cachefile_logger(ft->cf);
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
txn_state_for_gc.init(txn_manager);
oldest_referenced_xid_for_simple_gc = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
}
txn_gc_info gc_info(do_garbage_collection ? &txn_state_for_gc : nullptr,
oldest_referenced_xid_for_simple_gc,
child->oldest_referenced_xid_known,
true);
FIFO_ITERATE(
bnc->buffer, key, keylen, val, vallen, type, msn, xids, is_fresh,
({
......@@ -2512,14 +2499,13 @@ void toku_bnc_flush_to_child(
-1,
&ftcmd,
is_fresh,
make_gc_info(true), // mvcc_needed
&txn_state_for_gc,
&gc_info,
flow_deltas,
&stats_delta
);
remaining_memsize -= FIFO_CURRENT_ENTRY_MEMSIZE;
}));
child->oldest_referenced_xid_known = oldest_referenced_xid_known;
child->oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
invariant(remaining_memsize == 0);
if (stats_delta.numbytes || stats_delta.numrows) {
......@@ -2549,8 +2535,7 @@ toku_ft_node_put_cmd (
int target_childnum,
FT_MSG cmd,
bool is_fresh,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
size_t flow_deltas[],
STAT64INFO stats_to_update
)
......@@ -2568,7 +2553,7 @@ toku_ft_node_put_cmd (
// and instead defer to these functions
//
if (node->height==0) {
toku_ft_leaf_apply_cmd(compare_fun, update_fun, desc, node, target_childnum, cmd, gc_info, txn_state_for_gc, nullptr, stats_to_update);
toku_ft_leaf_apply_cmd(compare_fun, update_fun, desc, node, target_childnum, cmd, gc_info, nullptr, stats_to_update);
} else {
ft_nonleaf_put_cmd(compare_fun, desc, node, target_childnum, cmd, is_fresh, flow_deltas);
}
......@@ -2588,8 +2573,7 @@ void toku_ft_leaf_apply_cmd(
FTNODE node,
int target_childnum, // which child to inject to, or -1 if unknown
FT_MSG cmd,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
uint64_t *workdone,
STAT64INFO stats_to_update
)
......@@ -2622,9 +2606,11 @@ void toku_ft_leaf_apply_cmd(
node->max_msn_applied_to_node_on_disk = cmd_msn;
}
// Pass the oldest possible live xid value to each basementnode
// when we apply messages to them.
TXNID oldest_referenced_xid_known = node->oldest_referenced_xid_known;
if (gc_info->mvcc_needed) { // False during recover and non-transactional environments
// Caller should have recognized that the oldest referenced xid for
// simple gc is this node's oldest referenced xid known.
invariant(gc_info->oldest_referenced_xid_for_implicit_promotion == node->oldest_referenced_xid_known);
}
if (ft_msg_applies_once(cmd)) {
unsigned int childnum = (target_childnum >= 0
......@@ -2638,9 +2624,7 @@ void toku_ft_leaf_apply_cmd(
desc,
bn,
cmd,
oldest_referenced_xid_known,
gc_info,
txn_state_for_gc,
workdone,
stats_to_update);
} else {
......@@ -2656,9 +2640,7 @@ void toku_ft_leaf_apply_cmd(
desc,
BLB(node, childnum),
cmd,
oldest_referenced_xid_known,
gc_info,
txn_state_for_gc,
workdone,
stats_to_update);
} else {
......@@ -2678,8 +2660,7 @@ static void inject_message_in_locked_node(
int childnum,
FT_MSG_S *cmd,
size_t flow_deltas[],
TXNID oldest_referenced_xid,
GC_INFO gc_info
txn_gc_info *gc_info
)
{
// No guarantee that we're the writer, but oh well.
......@@ -2689,11 +2670,10 @@ static void inject_message_in_locked_node(
invariant(toku_ctpair_is_write_locked(node->ct_pair));
toku_assert_entire_node_in_memory(node);
// Update the oldest known referenced xid for this node if it is younger
// than the one currently known. Otherwise, it's better to keep the heurstic
// we have and ignore this one.
if (oldest_referenced_xid >= node->oldest_referenced_xid_known) {
node->oldest_referenced_xid_known = oldest_referenced_xid;
// If the current gc_info knows about a newer xid suitible for implicit
// promotions, update the oldest referenced xid known for this node.
if (gc_info->oldest_referenced_xid_for_implicit_promotion >= node->oldest_referenced_xid_known) {
node->oldest_referenced_xid_known = gc_info->oldest_referenced_xid_for_implicit_promotion;
}
// Get the MSN from the header. Now that we have a write lock on the
......@@ -2711,7 +2691,6 @@ static void inject_message_in_locked_node(
cmd,
true,
gc_info,
nullptr,
flow_deltas,
&stats_delta
);
......@@ -2751,7 +2730,7 @@ static void inject_message_in_locked_node(
// This mechanism prevents direct leaf injections from producing an arbitrary amount
// of MVCC garbage if they never get evicted.
if (node->height == 0 && toku_serialize_ftnode_size(node) > (ft->h->nodesize * 8)) {
ft_leaf_run_gc(node, ft);
ft_leaf_run_gc(ft, node);
}
toku_unpin_ftnode(ft, node);
}
......@@ -2878,7 +2857,7 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
abort();
}
static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *cmd, size_t flow_deltas[], TXNID oldest_referenced_xid, GC_INFO gc_info)
static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t fullhash, FT_MSG_S *cmd, size_t flow_deltas[], txn_gc_info *gc_info)
// Effect:
// Inject cmd into the node at this blocknum (cachekey).
// Gets a write lock on the node for you.
......@@ -2891,7 +2870,7 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f
toku_assert_entire_node_in_memory(node);
paranoid_invariant(node->fullhash==fullhash);
ft_verify_flags(ft, node);
inject_message_in_locked_node(ft, node, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info);
inject_message_in_locked_node(ft, node, -1, cmd, flow_deltas, gc_info);
}
__attribute__((const))
......@@ -2910,8 +2889,7 @@ static void push_something_in_subtree(
int target_childnum,
FT_MSG_S *cmd,
size_t flow_deltas[],
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_gc_info *gc_info,
int depth,
seqinsert_loc loc,
bool just_did_split_or_merge
......@@ -2952,7 +2930,7 @@ static void push_something_in_subtree(
default:
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
}
inject_message_in_locked_node(ft, subtree_root, target_childnum, cmd, flow_deltas, oldest_referenced_xid, gc_info);
inject_message_in_locked_node(ft, subtree_root, target_childnum, cmd, flow_deltas, gc_info);
} else {
int r;
int childnum;
......@@ -3049,13 +3027,13 @@ static void push_something_in_subtree(
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it
toku_pin_ftnode_off_client_thread_batched(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, 0, nullptr, &newparent);
push_something_in_subtree(ft, newparent, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info, depth, loc, true);
push_something_in_subtree(ft, newparent, -1, cmd, flow_deltas, gc_info, depth, loc, true);
return;
}
}
if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) {
push_something_in_subtree(ft, child, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info, depth + 1, next_loc, false);
push_something_in_subtree(ft, child, -1, cmd, flow_deltas, gc_info, depth + 1, next_loc, false);
toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]);
// The recursive call unpinned the child, but
// we're responsible for unpinning subtree_root.
......@@ -3091,7 +3069,7 @@ static void push_something_in_subtree(
default:
STATUS_INC(FT_PRO_NUM_INJECT_DEPTH_GT3, 1); break;
}
inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, cmd, flow_deltas, oldest_referenced_xid, gc_info);
inject_message_at_this_blocknum(ft, subtree_root_blocknum, subtree_root_fullhash, cmd, flow_deltas, gc_info);
}
}
}
......@@ -3099,8 +3077,7 @@ static void push_something_in_subtree(
void toku_ft_root_put_cmd(
FT ft,
FT_MSG_S *cmd,
TXNID oldest_referenced_xid,
GC_INFO gc_info
txn_gc_info *gc_info
)
// Effect:
// - assign msn to cmd and update msn in the header
......@@ -3203,22 +3180,22 @@ void toku_ft_root_put_cmd(
// If the root's a leaf or we're injecting a broadcast, drop the read lock and inject here.
toku_unpin_ftnode_read_only(ft, node);
STATUS_INC(FT_PRO_NUM_ROOT_H0_INJECT, 1);
inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, oldest_referenced_xid, gc_info);
inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, gc_info);
} else if (node->height > 1) {
// If the root's above height 1, we are definitely eligible for promotion.
push_something_in_subtree(ft, node, -1, cmd, flow_deltas, oldest_referenced_xid, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
push_something_in_subtree(ft, node, -1, cmd, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
} else {
// The root's height 1. We may be eligible for promotion here.
// On the extremes, we want to promote, in the middle, we don't.
int childnum = toku_ftnode_which_child(node, cmd->u.id.key, &ft->cmp_descriptor, ft->compare_fun);
if (childnum == 0 || childnum == node->n_children - 1) {
// On the extremes, promote. We know which childnum we're going to, so pass that down too.
push_something_in_subtree(ft, node, childnum, cmd, flow_deltas, oldest_referenced_xid, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
push_something_in_subtree(ft, node, childnum, cmd, flow_deltas, gc_info, 0, LEFT_EXTREME | RIGHT_EXTREME, false);
} else {
// At height 1 in the middle, don't promote, drop the read lock and inject here.
toku_unpin_ftnode_read_only(ft, node);
STATUS_INC(FT_PRO_NUM_ROOT_H1_INJECT, 1);
inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, oldest_referenced_xid, gc_info);
inject_message_at_this_blocknum(ft, root_key, fullhash, cmd, flow_deltas, gc_info);
}
}
}
......@@ -3281,7 +3258,8 @@ void toku_ft_optimize (FT_HANDLE brt) {
toku_init_dbt(&key);
toku_init_dbt(&val);
FT_MSG_S ftcmd = { FT_OPTIMIZE, ZERO_MSN, message_xids, .u = { .id = {&key,&val} } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, TXNID_NONE, make_gc_info(true));
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_root_put_cmd(brt->ft, &ftcmd, &gc_info);
xids_destroy(&message_xids);
}
}
......@@ -3329,6 +3307,16 @@ toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
}
}
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h) {
TXNID oldest_referenced_xid_estimate = TXNID_NONE;
TOKULOGGER logger = toku_cachefile_logger(ft_h->ft->cf);
if (logger != nullptr) {
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(logger);
oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
}
return oldest_referenced_xid_estimate;
}
void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) {
paranoid_invariant(type==FT_INSERT || type==FT_INSERT_NO_OVERWRITE);
XIDS message_xids = xids_get_root_xids(); //By default use committed messages
......@@ -3355,19 +3343,29 @@ void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing
} else {
TXNID oldest_referenced_xid = (txn) ? txn->oldest_referenced_xid : TXNID_NONE;
toku_ft_send_insert(ft_h, key, val, message_xids, type, oldest_referenced_xid, make_gc_info(txn ? !txn->for_recovery : false));
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
txn != nullptr ? !txn->for_recovery : false);
toku_ft_send_insert(ft_h, key, val, message_xids, type, &gc_info);
}
}
static void
ft_send_update_msg(FT_HANDLE brt, FT_MSG_S *msg, TOKUTXN txn) {
ft_send_update_msg(FT_HANDLE ft_h, FT_MSG_S *msg, TOKUTXN txn) {
msg->xids = (txn
? toku_txn_get_xids(txn)
: xids_get_root_xids());
TXNID oldest_referenced_xid = (txn) ? txn->oldest_referenced_xid : TXNID_NONE;
toku_ft_root_put_cmd(brt->ft, msg, oldest_referenced_xid, make_gc_info(txn ? !txn->for_recovery : false));
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
txn != nullptr ? !txn->for_recovery : false);
toku_ft_root_put_cmd(ft_h->ft, msg, &gc_info);
}
void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
......@@ -3436,15 +3434,15 @@ void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_e
}
}
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info) {
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info) {
FT_MSG_S ftcmd = { type, ZERO_MSN, xids, .u = { .id = { key, val } } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, oldest_referenced_xid, gc_info);
toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info);
}
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info) {
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info) {
DBT val;
FT_MSG_S ftcmd = { FT_COMMIT_ANY, ZERO_MSN, xids, .u = { .id = { key, toku_init_dbt(&val) } } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, oldest_referenced_xid, gc_info);
toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info);
}
void toku_ft_delete(FT_HANDLE brt, DBT *key, TOKUTXN txn) {
......@@ -3500,15 +3498,20 @@ void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_vali
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
// do nothing
} else {
TXNID oldest_referenced_xid = (txn) ? txn->oldest_referenced_xid : TXNID_NONE;
toku_ft_send_delete(ft_h, key, message_xids, oldest_referenced_xid, make_gc_info(txn ? !txn->for_recovery : false));
TXNID oldest_referenced_xid_estimate = toku_ft_get_oldest_referenced_xid_estimate(ft_h);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
txn != nullptr ? !txn->for_recovery : false);
toku_ft_send_delete(ft_h, key, message_xids, &gc_info);
}
}
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info) {
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info) {
DBT val; toku_init_dbt(&val);
FT_MSG_S ftcmd = { FT_DELETE_ANY, ZERO_MSN, xids, .u = { .id = { key, &val } } };
toku_ft_root_put_cmd(brt->ft, &ftcmd, oldest_referenced_xid, gc_info);
toku_ft_root_put_cmd(brt->ft, &ftcmd, gc_info);
}
/* ******************** open,close and create ********************** */
......@@ -4320,7 +4323,7 @@ int fifo_offset_msn_cmp(FIFO &fifo, const int32_t &ao, const int32_t &bo)
* basement node.
*/
static void
do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, TXNID oldest_referenced_xid, uint64_t *workdone, STAT64INFO stats_to_update)
do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, txn_gc_info *gc_info, uint64_t *workdone, STAT64INFO stats_to_update)
{
// The messages are being iterated over in (key,msn) order or just in
// msn order, so all the messages for one key, from one buffer, are in
......@@ -4345,9 +4348,7 @@ do_bn_apply_cmd(FT_HANDLE t, BASEMENTNODE bn, struct fifo_entry *entry, TXNID ol
&t->ft->cmp_descriptor,
bn,
&ftcmd,
oldest_referenced_xid,
make_gc_info(true), //mvcc is needed
nullptr,
gc_info,
workdone,
stats_to_update
);
......@@ -4365,7 +4366,7 @@ struct iterate_do_bn_apply_cmd_extra {
FT_HANDLE t;
BASEMENTNODE bn;
NONLEAF_CHILDINFO bnc;
TXNID oldest_referenced_xid;
txn_gc_info *gc_info;
uint64_t *workdone;
STAT64INFO stats_to_update;
};
......@@ -4374,7 +4375,7 @@ int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struc
int iterate_do_bn_apply_cmd(const int32_t &offset, const uint32_t UU(idx), struct iterate_do_bn_apply_cmd_extra *const e)
{
struct fifo_entry *entry = toku_fifo_get_entry(e->bnc->buffer, offset);
do_bn_apply_cmd(e->t, e->bn, entry, e->oldest_referenced_xid, e->workdone, e->stats_to_update);
do_bn_apply_cmd(e->t, e->bn, entry, e->gc_info, e->workdone, e->stats_to_update);
return 0;
}
......@@ -4496,7 +4497,7 @@ bnc_apply_messages_to_basement_node(
FTNODE ancestor, // the ancestor node where we can find messages to apply
int childnum, // which child buffer of ancestor contains messages we want
struct pivot_bounds const * const bounds, // contains pivot key bounds of this basement node
TXNID oldest_referenced_xid, // may be younger than what's in ancestor, we should grab the value from the highest node we have
txn_gc_info *gc_info,
bool* msgs_applied
)
{
......@@ -4556,11 +4557,11 @@ bnc_apply_messages_to_basement_node(
for (int i = 0; i < buffer_size; ++i) {
*msgs_applied = true;
struct fifo_entry *entry = toku_fifo_get_entry(bnc->buffer, offsets[i]);
do_bn_apply_cmd(t, bn, entry, oldest_referenced_xid, &workdone_this_ancestor, &stats_delta);
do_bn_apply_cmd(t, bn, entry, gc_info, &workdone_this_ancestor, &stats_delta);
}
} else if (stale_lbi == stale_ube) {
// No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .oldest_referenced_xid = oldest_referenced_xid, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(fresh_lbi, fresh_ube, &iter_extra);
assert_zero(r);
......@@ -4569,7 +4570,7 @@ bnc_apply_messages_to_basement_node(
// No fresh messages to apply, we just apply stale messages.
if (stale_ube - stale_lbi > 0) *msgs_applied = true;
struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .oldest_referenced_xid = oldest_referenced_xid, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
struct iterate_do_bn_apply_cmd_extra iter_extra = { .t = t, .bn = bn, .bnc = bnc, .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta };
r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_cmd_extra, iterate_do_bn_apply_cmd>(stale_lbi, stale_ube, &iter_extra);
assert_zero(r);
......@@ -4592,7 +4593,7 @@ apply_ancestors_messages_to_bn(
int childnum,
ANCESTORS ancestors,
struct pivot_bounds const * const bounds,
TXNID oldest_referenced_xid,
txn_gc_info *gc_info,
bool* msgs_applied
)
{
......@@ -4607,7 +4608,7 @@ apply_ancestors_messages_to_bn(
curr_ancestors->node,
curr_ancestors->childnum,
&curr_bounds,
oldest_referenced_xid,
gc_info,
msgs_applied
);
// We don't want to check this ancestor node again if the
......@@ -4644,13 +4645,20 @@ toku_apply_ancestors_messages_to_node (
VERIFY_NODE(t, node);
paranoid_invariant(node->height == 0);
TXNID oldest_referenced_xid = ancestors->node->oldest_referenced_xid_known;
TXNID oldest_referenced_xid_for_simple_gc = toku_ft_get_oldest_referenced_xid_estimate(t);
TXNID oldest_referenced_xid_for_implicit_promotion = ancestors->node->oldest_referenced_xid_known;
// We want the newest value from any of our ancestors, for it to be most effecitve.
for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
if (curr_ancestors->node->oldest_referenced_xid_known > oldest_referenced_xid) {
oldest_referenced_xid = curr_ancestors->node->oldest_referenced_xid_known;
if (curr_ancestors->node->oldest_referenced_xid_known > oldest_referenced_xid_for_implicit_promotion) {
oldest_referenced_xid_for_implicit_promotion = curr_ancestors->node->oldest_referenced_xid_known;
}
}
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_for_simple_gc,
oldest_referenced_xid_for_implicit_promotion,
true);
if (!node->dirty && child_to_read >= 0) {
paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL);
apply_ancestors_messages_to_bn(
......@@ -4659,7 +4667,7 @@ toku_apply_ancestors_messages_to_node (
child_to_read,
ancestors,
bounds,
oldest_referenced_xid,
&gc_info,
msgs_applied
);
}
......@@ -4678,7 +4686,7 @@ toku_apply_ancestors_messages_to_node (
i,
ancestors,
bounds,
oldest_referenced_xid,
&gc_info,
msgs_applied
);
}
......
......@@ -243,9 +243,11 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn);
// Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery.
void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xids, GC_INFO gc_info);
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result));
......
......@@ -221,6 +221,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
toku_fill_dbt(&valdbt, val, vallen) } } };
static size_t zero_flow_deltas[] = { 0, 0 };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_node_put_cmd (
brt->ft->compare_fun,
brt->ft->update_fun,
......@@ -229,8 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
-1,
&cmd,
true,
make_gc_info(true),
nullptr,
&gc_info,
zero_flow_deltas,
NULL
);
......
......@@ -2925,7 +2925,8 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } };
uint64_t workdone=0;
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), nullptr, &workdone, stats_to_update);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, &workdone, stats_to_update);
}
static int write_literal(struct dbout *out, void*data, size_t len) {
......
......@@ -143,10 +143,6 @@ typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
typedef struct gc_info_s { bool mvcc_needed; } GC_INFO;
static inline GC_INFO make_gc_info(bool mvcc_needed) { GC_INFO result = {mvcc_needed}; return result; }
// This struct hold information about values stored in the cachetable.
// As one can tell from the names, we are probably violating an
// abstraction layer by placing names.
......
......@@ -247,13 +247,11 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known);
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info);
void
toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
......@@ -261,11 +259,8 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx,
void* keyp,
uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p);
#endif /* TOKU_LEAFENTRY_H */
......
......@@ -247,7 +247,6 @@ struct tokutxn {
DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
XIDS xids; // Represents the xid list
TXNID oldest_referenced_xid;
TOKUTXN snapshot_next;
TOKUTXN snapshot_prev;
......
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
#include "ft.h"
#include "ft-ops.h"
#include "log-internal.h"
//#include "txn_manager.h"
#include "xids.h"
#include "rollback-apply.h"
......@@ -265,7 +266,14 @@ static int do_insertion (enum ft_msg_type type, FILENUM filenum, BYTESTRING key,
? toku_fill_dbt(&data_dbt, data->data, data->len)
: toku_init_dbt(&data_dbt) } } };
toku_ft_root_put_cmd(h, &ftcmd, txn->oldest_referenced_xid, make_gc_info(!txn->for_recovery));
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(txn->logger);
TXNID oldest_referenced_xid_estimate = toku_txn_manager_get_oldest_referenced_xid_estimate(txn_manager);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
// no messages above us, we can implicitly promote uxrs based on this xid
oldest_referenced_xid_estimate,
!txn->for_recovery);
toku_ft_root_put_cmd(h, &ftcmd, &gc_info);
if (reset_root_xid_that_created) {
TXNID new_root_xid_that_created = xids_get_outermost_xid(xids);
toku_reset_root_xid_that_created(h, new_root_xid_that_created);
......
......@@ -124,8 +124,9 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
MSN msn = next_dummymsn();
// apply an insert to the leaf node
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u = {.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
leafnode->max_msn_applied_to_node_on_disk = msn;
......
......@@ -132,8 +132,9 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
MSN msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd, &gc_info, nullptr, nullptr);
{
int r = toku_ft_lookup(brt, &thekey, lookup_checkf, &pair);
assert(r==0);
......@@ -141,7 +142,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
}
FT_MSG_S badcmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &badcmd, &gc_info, nullptr, nullptr);
// message should be rejected for duplicate msn, row should still have original val
{
......@@ -154,7 +155,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
msn = next_dummymsn();
brt->ft->h->max_msn_in_ft = msn;
FT_MSG_S cmd2 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &val2 }} };
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd2, &gc_info, nullptr, nullptr);
// message should be accepted, val should have new value
{
......@@ -166,7 +167,7 @@ append_leaf(FT_HANDLE brt, FTNODE leafnode, void *key, uint32_t keylen, void *va
// now verify that message with lesser (older) msn is rejected
msn.msn = msn.msn - 10;
FT_MSG_S cmd3 = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &badval } }};
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, make_gc_info(false), nullptr, nullptr);
toku_ft_leaf_apply_cmd(brt->ft->compare_fun, brt->ft->update_fun, &brt->ft->cmp_descriptor, leafnode, -1, &cmd3, &gc_info, nullptr, nullptr);
// message should be rejected, val should still have value in pair2
{
......
......@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static const char *fname = TOKU_TEST_FILENAME;
static txn_gc_info non_mvcc_gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
static int dummy_cmp(DB *db __attribute__((unused)),
const DBT *a, const DBT *b) {
......@@ -217,8 +218,8 @@ insert_random_message_to_bn(
*keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb->max_msn_applied.msn) {
blb->max_msn_applied = msn;
}
......@@ -267,12 +268,12 @@ insert_same_message_to_bns(
*keylenp = keydbt->size;
*keyp = toku_xmemdup(keydbt->data, keydbt->size);
int64_t numbytes;
toku_le_apply_msg(&msg, NULL, NULL, 0, TXNID_NONE, make_gc_info(false), save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_le_apply_msg(&msg, NULL, NULL, 0, &non_mvcc_gc_info, save, &numbytes);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb1, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb1->max_msn_applied.msn) {
blb1->max_msn_applied = msn;
}
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, TXNID_NONE, make_gc_info(false), NULL, NULL);
toku_ft_bn_apply_cmd(t->ft->compare_fun, t->ft->update_fun, NULL, blb2, &msg, &non_mvcc_gc_info, NULL, NULL);
if (msn.msn > blb2->max_msn_applied.msn) {
blb2->max_msn_applied = msn;
}
......@@ -684,7 +685,7 @@ flush_to_leaf(FT_HANDLE t, bool make_leaf_up_to_date, bool use_flush) {
if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
}
}
for (i = 0; i < 8; ++i) {
......@@ -908,7 +909,7 @@ flush_to_leaf_with_keyrange(FT_HANDLE t, bool make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (dummy_cmp(NULL, parent_messages[i]->u.id.key, &childkeys[7]) <= 0 &&
!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
}
}
for (i = 0; i < 8; ++i) {
......@@ -1104,8 +1105,8 @@ compare_apply_and_flush(FT_HANDLE t, bool make_leaf_up_to_date) {
if (make_leaf_up_to_date) {
for (i = 0; i < num_parent_messages; ++i) {
if (!parent_messages_is_fresh[i]) {
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], make_gc_info(false), NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child1, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
toku_ft_leaf_apply_cmd(t->ft->compare_fun, t->ft->update_fun, &t->ft->descriptor, child2, -1, parent_messages[i], &non_mvcc_gc_info, NULL, NULL);
}
}
for (i = 0; i < 8; ++i) {
......
......@@ -453,12 +453,12 @@ test_le_apply(ULE ule_initial, FT_MSG msg, ULE ule_expected) {
size_t result_memsize = 0;
int64_t ignoreme;
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
toku_le_apply_msg(msg,
le_initial,
nullptr,
0,
TXNID_NONE,
make_gc_info(true),
&gc_info,
&le_result,
&ignoreme);
if (le_result) {
......@@ -751,7 +751,8 @@ static bool ule_worth_running_garbage_collection(ULE ule, TXNID oldest_reference
LEAFENTRY le;
int r = le_pack(ule, nullptr, 0, nullptr, 0, 0, &le); CKERR(r);
invariant_notnull(le);
bool worth_running = toku_le_worth_running_garbage_collection(le, oldest_referenced_xid_known);
txn_gc_info gc_info(nullptr, oldest_referenced_xid_known, oldest_referenced_xid_known, true);
bool worth_running = toku_le_worth_running_garbage_collection(le, &gc_info);
toku_free(le);
return worth_running;
}
......
......@@ -128,7 +128,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// Create bad tree (don't do following):
// leafnode->max_msn_applied_to_node = msn;
......
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -117,7 +117,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -119,7 +119,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -116,7 +116,8 @@ append_leaf(FTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen)
// apply an insert to the leaf node
MSN msn = next_dummymsn();
FT_MSG_S cmd = { FT_INSERT, msn, xids_get_root_xids(), .u={.id = { &thekey, &theval }} };
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(false), NULL, NULL);
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, false);
toku_ft_bn_apply_cmd_once(BLB(leafnode, 0), &cmd, idx, NULL, &gc_info, NULL, NULL);
// dont forget to dirty the node
leafnode->dirty = 1;
......
......@@ -239,7 +239,6 @@ toku_txn_begin_with_xid (
}
else {
parent->child_manager->start_child_txn_for_recovery(txn, parent, xid);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
}
}
else {
......@@ -255,7 +254,6 @@ toku_txn_begin_with_xid (
}
else {
parent->child_manager->start_child_txn(txn, parent);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
toku_txn_manager_handle_snapshot_create_for_child_txn(
txn,
logger->txn_manager,
......@@ -327,7 +325,6 @@ static txn_child_manager tcm;
.container_db_txn = container_db_txn,
.live_root_txn_list = nullptr,
.xids = NULL,
.oldest_referenced_xid = TXNID_NONE,
.snapshot_next = NULL,
.snapshot_prev = NULL,
.begin_was_logged = false,
......
......@@ -615,7 +615,6 @@ void toku_txn_manager_start_txn_for_recovery(
// using xid that is passed in
txn_manager->last_xid = max_xid(txn_manager->last_xid, xid);
toku_txn_update_xids_in_txn(txn, xid);
txn->oldest_referenced_xid = TXNID_NONE;
uint32_t idx;
int r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, nullptr, &idx);
......@@ -838,7 +837,6 @@ void txn_manager_state::init(TXN_MANAGER txn_manager) {
&referenced_xids,
&live_root_txns
);
oldest_referenced_xid_for_simple_gc = txn_manager->last_calculated_oldest_referenced_xid;
}
void txn_manager_state::destroy() {
......
......@@ -130,13 +130,6 @@ struct txn_manager_state {
rx_omt_t referenced_xids;
xid_omt_t live_root_txns;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
TXNID oldest_referenced_xid_for_simple_gc;
txn_manager_state() { }
void init(TXN_MANAGER txn_manager);
void destroy();
......@@ -145,6 +138,32 @@ private:
txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct
};
// represents all of the information needed to run garbage collection
struct txn_gc_info {
txn_gc_info(txn_manager_state *st, TXNID xid_sgc, TXNID xid_ip, bool mvcc)
: txn_state_for_gc(st),
oldest_referenced_xid_for_simple_gc(xid_sgc),
oldest_referenced_xid_for_implicit_promotion(xid_ip),
mvcc_needed(mvcc) {
}
// a snapshot of the transcation system. may be null.
txn_manager_state *const txn_state_for_gc;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
const TXNID oldest_referenced_xid_for_simple_gc;
// lower bound on the oldest xid in any live when the messages to be cleaned
// had no messages above them. suitable for implicitly promoting a provisonal uxr.
const TXNID oldest_referenced_xid_for_implicit_promotion;
// whether or not mvcc is actually needed - false during recovery and non-transactional systems
const bool mvcc_needed;
};
void toku_txn_manager_init(TXN_MANAGER* txn_manager);
void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
......
......@@ -321,18 +321,18 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, const xid_omt_t &snapshot_txnids, c
// so we get rid of them.
//
static void
ule_simple_garbage_collection(ULE ule, TXNID oldest_referenced_xid, GC_INFO gc_info) {
ule_simple_garbage_collection(ULE ule, txn_gc_info *gc_info) {
uint32_t curr_index = 0;
uint32_t num_entries;
if (ule->num_cuxrs == 1) {
goto done;
}
if (gc_info.mvcc_needed) {
if (gc_info->mvcc_needed) {
// starting at the top of the committed stack, find the first
// uxr with a txnid that is less than oldest_referenced_xid
for (uint32_t i = 0; i < ule->num_cuxrs; i++) {
curr_index = ule->num_cuxrs - i - 1;
if (ule->uxrs[curr_index].xid < oldest_referenced_xid) {
if (ule->uxrs[curr_index].xid < gc_info->oldest_referenced_xid_for_simple_gc) {
break;
}
}
......@@ -484,11 +484,10 @@ toku_le_apply_msg(FT_MSG msg,
LEAFENTRY old_leafentry, // NULL if there was no stored data.
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
txn_gc_info *gc_info,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
invariant_notnull(gc_info);
paranoid_invariant_notnull(new_leafentry_p);
ULE_S ule;
int64_t oldnumbytes = 0;
......@@ -514,18 +513,14 @@ toku_le_apply_msg(FT_MSG msg,
// - we may be able to immediately promote the newly-apllied outermost provisonal uxr
// - either way, run simple gc first, and then full gc if there are still some committed uxrs.
ule_try_promote_provisional_outermost(&ule, oldest_referenced_xid);
ule_simple_garbage_collection(&ule,
txn_state_for_gc != nullptr ?
txn_state_for_gc->oldest_referenced_xid_for_simple_gc :
oldest_referenced_xid,
gc_info);
if (ule.num_cuxrs > 1 && txn_state_for_gc != nullptr) {
ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
ule_simple_garbage_collection(&ule, gc_info);
if (ule.num_cuxrs > 1 && gc_info->txn_state_for_gc != nullptr) {
size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
txn_state_for_gc->snapshot_xids,
txn_state_for_gc->referenced_xids,
txn_state_for_gc->live_root_txns
gc_info->txn_state_for_gc->snapshot_xids,
gc_info->txn_state_for_gc->referenced_xids,
gc_info->txn_state_for_gc->live_root_txns
);
size_t size_after_gc = ule_packed_memsize(&ule);
......@@ -549,7 +544,7 @@ toku_le_apply_msg(FT_MSG msg,
ule_cleanup(&ule);
}
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known) {
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, txn_gc_info *gc_info) {
// Effect: Quickly determines if it's worth trying to run garbage collection on a leafentry
// Return: True if it makes sense to try garbage collection, false otherwise.
// Rationale: Garbage collection is likely to clean up under two circumstances:
......@@ -565,7 +560,8 @@ bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referen
} else {
paranoid_invariant(le->u.mvcc.num_cxrs == 1);
}
return le->u.mvcc.num_pxrs > 0 && le_outermost_uncommitted_xid(le) < oldest_referenced_xid_known;
return le->u.mvcc.num_pxrs > 0 &&
le_outermost_uncommitted_xid(le) < gc_info->oldest_referenced_xid_for_implicit_promotion;
}
// Garbage collect one leaf entry, using the given OMT's.
......@@ -592,12 +588,12 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
uint32_t idx,
void* keyp,
uint32_t keylen,
txn_gc_info *gc_info,
LEAFENTRY *new_leaf_entry,
const xid_omt_t &snapshot_xids,
const rx_omt_t &referenced_xids,
const xid_omt_t &live_root_txns,
TXNID oldest_referenced_xid_known,
int64_t * numbytes_delta_p) {
// We shouldn't want to run gc without having provided a snapshot of the txn system.
invariant_notnull(gc_info);
invariant_notnull(gc_info->txn_state_for_gc);
paranoid_invariant_notnull(new_leaf_entry);
ULE_S ule;
int64_t oldnumbytes = 0;
......@@ -621,15 +617,14 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
// The oldest known refeferenced xid is a lower bound on the oldest possible
// live xid, so we use that. It's usually close enough to get rid of most
// garbage in leafentries.
TXNID oldest_possible_live_xid = oldest_referenced_xid_known;
ule_try_promote_provisional_outermost(&ule, oldest_possible_live_xid);
ule_try_promote_provisional_outermost(&ule, gc_info->oldest_referenced_xid_for_implicit_promotion);
// No need to run simple gc here if we're going straight for full gc.
if (ule.num_cuxrs > 1) {
size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
snapshot_xids,
referenced_xids,
live_root_txns);
gc_info->txn_state_for_gc->snapshot_xids,
gc_info->txn_state_for_gc->referenced_xids,
gc_info->txn_state_for_gc->live_root_txns);
size_t size_after_gc = ule_packed_memsize(&ule);
STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc);
......
......@@ -615,7 +615,13 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, TXNID_NONE, make_gc_info(true));
TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info);
}
}
return result;
......@@ -651,7 +657,13 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, TXNID_NONE, make_gc_info(true));
TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT, &gc_info);
}
}
return result;
......@@ -670,8 +682,15 @@ indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) {
result = indexer->i->test_commit_any(indexer, hotdb, hotkey, xids);
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0)
toku_ft_send_commit_any(db_struct_i(hotdb)->ft_handle, hotkey, xids, TXNID_NONE, make_gc_info(true));
if (result == 0) {
TXNID oldest_referenced_xid_estimate =
toku_ft_get_oldest_referenced_xid_estimate(db_struct_i(hotdb)->ft_handle);
txn_gc_info gc_info(nullptr,
oldest_referenced_xid_estimate,
oldest_referenced_xid_estimate,
true);
toku_ft_send_commit_any(db_struct_i(hotdb)->ft_handle, hotkey, xids, &gc_info);
}
}
}
return result;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment