Commit 7029824b authored by Yoni Fogel's avatar Yoni Fogel

[t:5067] Merge #5067 to main. New GC algorithm that calculates...

[t:5067] Merge #5067 to main.  New GC algorithm that calculates "live_list_reverse" on the fly when needed

git-svn-id: file:///svn/toku/tokudb@44749 c7de825b-a66e-492c-adef-691d508d4ae1
parent 976c4791
...@@ -2100,7 +2100,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn, ...@@ -2100,7 +2100,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
u_int32_t index, u_int32_t index,
LEAFENTRY leaf_entry, LEAFENTRY leaf_entry,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse, OMT referenced_xids,
OMT live_root_txns, OMT live_root_txns,
STAT64INFO_S * delta) STAT64INFO_S * delta)
{ {
...@@ -2129,7 +2129,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn, ...@@ -2129,7 +2129,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
&bn->buffer_mempool, &bn->buffer_mempool,
&maybe_free, &maybe_free,
snapshot_xids, snapshot_xids,
live_list_reverse, referenced_xids,
live_root_txns); live_root_txns);
// These will represent the number of bytes and rows changed as // These will represent the number of bytes and rows changed as
...@@ -2171,7 +2171,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn, ...@@ -2171,7 +2171,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
static void static void
basement_node_gc_all_les(BASEMENTNODE bn, basement_node_gc_all_les(BASEMENTNODE bn,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse, OMT referenced_xids,
OMT live_root_txns, OMT live_root_txns,
STAT64INFO_S * delta) STAT64INFO_S * delta)
{ {
...@@ -2184,7 +2184,7 @@ basement_node_gc_all_les(BASEMENTNODE bn, ...@@ -2184,7 +2184,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
r = toku_omt_fetch(bn->buffer, index, &storedatav); r = toku_omt_fetch(bn->buffer, index, &storedatav);
assert(r == 0); assert(r == 0);
leaf_entry = storedatav; leaf_entry = storedatav;
ft_basement_node_gc_once(bn, index, leaf_entry, snapshot_xids, live_list_reverse, live_root_txns, delta); ft_basement_node_gc_once(bn, index, leaf_entry, snapshot_xids, referenced_xids, live_root_txns, delta);
// Check if the leaf entry was deleted or not. // Check if the leaf entry was deleted or not.
if (num_leafentries_before == toku_omt_size(bn->buffer)) { if (num_leafentries_before == toku_omt_size(bn->buffer)) {
++index; ++index;
...@@ -2197,7 +2197,7 @@ static void ...@@ -2197,7 +2197,7 @@ static void
ft_leaf_gc_all_les(FTNODE node, ft_leaf_gc_all_les(FTNODE node,
FT h, FT h,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse, OMT referenced_xids,
OMT live_root_txns) OMT live_root_txns)
{ {
toku_assert_entire_node_in_memory(node); toku_assert_entire_node_in_memory(node);
...@@ -2209,7 +2209,7 @@ ft_leaf_gc_all_les(FTNODE node, ...@@ -2209,7 +2209,7 @@ ft_leaf_gc_all_les(FTNODE node,
STAT64INFO_S delta; STAT64INFO_S delta;
delta.numrows = 0; delta.numrows = 0;
delta.numbytes = 0; delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, live_list_reverse, live_root_txns, &delta); basement_node_gc_all_les(bn, snapshot_xids, referenced_xids, live_root_txns, &delta);
toku_ft_update_stats(&h->in_memory_stats, delta); toku_ft_update_stats(&h->in_memory_stats, delta);
} }
} }
...@@ -2247,12 +2247,12 @@ toku_bnc_flush_to_child( ...@@ -2247,12 +2247,12 @@ toku_bnc_flush_to_child(
TOKULOGGER logger = toku_cachefile_logger(h->cf); TOKULOGGER logger = toku_cachefile_logger(h->cf);
if (child->height == 0 && logger) { if (child->height == 0 && logger) {
OMT snapshot_txnids = NULL; OMT snapshot_txnids = NULL;
OMT live_list_reverse = NULL; OMT referenced_xids = NULL;
OMT live_root_txns = NULL; OMT live_root_txns = NULL;
toku_txn_manager_clone_state_for_gc( toku_txn_manager_clone_state_for_gc(
logger->txn_manager, logger->txn_manager,
&snapshot_txnids, &snapshot_txnids,
&live_list_reverse, &referenced_xids,
&live_root_txns &live_root_txns
); );
size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer); size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer);
...@@ -2260,13 +2260,13 @@ toku_bnc_flush_to_child( ...@@ -2260,13 +2260,13 @@ toku_bnc_flush_to_child(
// may be misleading if there's a broadcast message in there // may be misleading if there's a broadcast message in there
STATUS_VALUE(FT_MSG_BYTES_CURR) -= buffsize; STATUS_VALUE(FT_MSG_BYTES_CURR) -= buffsize;
// Perform the garbage collection. // Perform the garbage collection.
ft_leaf_gc_all_les(child, h, snapshot_txnids, live_list_reverse, live_root_txns); ft_leaf_gc_all_les(child, h, snapshot_txnids, referenced_xids, live_root_txns);
// Free the OMT's we used for garbage collecting. // Free the OMT's we used for garbage collecting.
toku_omt_destroy(&snapshot_txnids); toku_omt_destroy(&snapshot_txnids);
toku_omt_destroy(&live_root_txns); toku_omt_destroy(&live_root_txns);
toku_omt_free_items_pool(live_list_reverse); toku_omt_free_items_pool(referenced_xids);
toku_omt_destroy(&live_list_reverse); toku_omt_destroy(&referenced_xids);
} }
return 0; return 0;
......
...@@ -35,15 +35,6 @@ toku_find_xid_by_xid (OMTVALUE v, void *xidv) { ...@@ -35,15 +35,6 @@ toku_find_xid_by_xid (OMTVALUE v, void *xidv) {
return 0; return 0;
} }
int
toku_find_pair_by_xid (OMTVALUE v, void *xidv) {
XID_PAIR pair = v;
TXNID xidfind = (TXNID)xidv;
if (pair->xid1<xidfind) return -1;
if (pair->xid1>xidfind) return +1;
return 0;
}
void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) { void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) {
return malloc_in_memarena(log->rollentry_arena, size); return malloc_in_memarena(log->rollentry_arena, size);
} }
......
...@@ -53,7 +53,6 @@ void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log); ...@@ -53,7 +53,6 @@ void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log);
void toku_txn_maybe_note_ft (TOKUTXN txn, FT h); void toku_txn_maybe_note_ft (TOKUTXN txn, FT h);
int toku_logger_txn_rollback_raw_count(TOKUTXN txn, u_int64_t *raw_count); int toku_logger_txn_rollback_raw_count(TOKUTXN txn, u_int64_t *raw_count);
int toku_find_pair_by_xid (OMTVALUE v, void *txnv);
int toku_find_xid_by_xid (OMTVALUE v, void *xidv); int toku_find_xid_by_xid (OMTVALUE v, void *xidv);
PAIR_ATTR rollback_memory_size(ROLLBACK_LOG_NODE log); PAIR_ATTR rollback_memory_size(ROLLBACK_LOG_NODE log);
......
...@@ -101,11 +101,6 @@ BOOL toku_is_txn_in_live_root_txn_list(OMT live_root_txn_list, TXNID xid); ...@@ -101,11 +101,6 @@ BOOL toku_is_txn_in_live_root_txn_list(OMT live_root_txn_list, TXNID xid);
TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn); TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn);
typedef struct {
TXNID xid1;
TXNID xid2;
} XID_PAIR_S, *XID_PAIR;
#include "txn_state.h" #include "txn_state.h"
TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn); TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn);
......
This diff is collapsed.
...@@ -47,9 +47,9 @@ int toku_txn_manager_start_txn( ...@@ -47,9 +47,9 @@ int toku_txn_manager_start_txn(
void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn); void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_clone_state_for_gc( void toku_txn_manager_clone_state_for_gc(
TXN_MANAGER txn_manager, TXN_MANAGER txn_manager,
OMT* snapshot_xids, OMT* snapshot_xids,
OMT* live_list_reverse, OMT* referenced_xids,
OMT* live_root_txns OMT* live_root_txns
); );
...@@ -61,8 +61,8 @@ int toku_txn_manager_get_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid ...@@ -61,8 +61,8 @@ int toku_txn_manager_get_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid
u_int32_t toku_txn_manager_num_live_txns(TXN_MANAGER txn_manager); u_int32_t toku_txn_manager_num_live_txns(TXN_MANAGER txn_manager);
int toku_txn_manager_iter_over_live_txns( int toku_txn_manager_iter_over_live_txns(
TXN_MANAGER txn_manager, TXN_MANAGER txn_manager,
int (*f)(OMTVALUE, u_int32_t, void*), int (*f)(OMTVALUE, u_int32_t, void*),
void* v void* v
); );
...@@ -71,10 +71,10 @@ void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn); ...@@ -71,10 +71,10 @@ void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn); void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
int toku_txn_manager_recover_txn( int toku_txn_manager_recover_txn(
TXN_MANAGER txn_manager, TXN_MANAGER txn_manager,
struct tokulogger_preplist preplist[/*count*/], struct tokulogger_preplist preplist[/*count*/],
long count, long count,
long *retp, /*out*/ long *retp, /*out*/
u_int32_t flags u_int32_t flags
); );
...@@ -91,6 +91,7 @@ TXNID toku_txn_manager_get_last_xid(TXN_MANAGER mgr); ...@@ -91,6 +91,7 @@ TXNID toku_txn_manager_get_last_xid(TXN_MANAGER mgr);
// Test-only function // Test-only function
void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment); void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment);
TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT snapshot_txnids, OMT referenced_xids);
#if defined(__cplusplus) || defined(__cilkplusplus) #if defined(__cplusplus) || defined(__cilkplusplus)
} }
#endif #endif
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "xids.h" #include "xids.h"
#include "ft_msg.h" #include "ft_msg.h"
#include "ule.h" #include "ule.h"
#include "txn_manager.h"
#include "ule-internal.h" #include "ule-internal.h"
...@@ -188,36 +189,16 @@ get_next_older_txnid(TXNID xc, OMT omt) { ...@@ -188,36 +189,16 @@ get_next_older_txnid(TXNID xc, OMT omt) {
return xid; return xid;
} }
TXNID
toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse) {
OMTVALUE pairv;
XID_PAIR pair;
uint32_t idx;
TXNID rval;
int r;
r = toku_omt_find_zero(live_list_reverse, toku_find_pair_by_xid, (void *)xc, &pairv, &idx);
if (r==0) {
pair = pairv;
invariant(pair->xid1 == xc); //sanity check
rval = pair->xid2;
}
else {
invariant(r==DB_NOTFOUND);
rval = TXNID_NONE;
}
return rval;
}
// //
// This function returns TRUE if live transaction TL1 is allowed to read a value committed by // This function returns TRUE if live transaction TL1 is allowed to read a value committed by
// transaction xc, false otherwise. // transaction xc, false otherwise.
// //
static BOOL static BOOL
xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT live_list_reverse) { xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT snapshot_txnids, OMT referenced_xids) {
BOOL rval; BOOL rval;
if (tl1 < xc) rval = FALSE; //cannot read a newer txn if (tl1 < xc) rval = FALSE; //cannot read a newer txn
else { else {
TXNID x = toku_get_youngest_live_list_txnid_for(xc, live_list_reverse); TXNID x = toku_get_youngest_live_list_txnid_for(xc, snapshot_txnids, referenced_xids);
if (x == TXNID_NONE) rval = TRUE; //Not in ANY live list, tl1 can read it. if (x == TXNID_NONE) rval = TRUE; //Not in ANY live list, tl1 can read it.
else rval = tl1 > x; //Newer than the 'newest one that has it in live list' else rval = tl1 > x; //Newer than the 'newest one that has it in live list'
// we know tl1 > xc // we know tl1 > xc
...@@ -230,7 +211,7 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT live_list_reverse) { ...@@ -230,7 +211,7 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT live_list_reverse) {
} }
static void static void
garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_root_txns) { garbage_collection(ULE ule, OMT snapshot_xids, OMT referenced_xids, OMT live_root_txns) {
if (ule->num_cuxrs == 1) goto done; if (ule->num_cuxrs == 1) goto done;
// will fail if too many num_cuxrs // will fail if too many num_cuxrs
BOOL necessary_static[MAX_TRANSACTION_RECORDS]; BOOL necessary_static[MAX_TRANSACTION_RECORDS];
...@@ -266,7 +247,7 @@ garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_r ...@@ -266,7 +247,7 @@ garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_r
continue; continue;
} }
tl1 = toku_get_youngest_live_list_txnid_for(xc, live_list_reverse); tl1 = toku_get_youngest_live_list_txnid_for(xc, snapshot_xids, referenced_xids);
if (tl1 == xc) { if (tl1 == xc) {
// if tl1 == xc, that means xc should be live and show up in // if tl1 == xc, that means xc should be live and show up in
// live_root_txns, which we check above. So, if we get // live_root_txns, which we check above. So, if we get
...@@ -294,7 +275,7 @@ garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_r ...@@ -294,7 +275,7 @@ garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_r
curr_committed_entry--; curr_committed_entry--;
while (curr_committed_entry > 0) { while (curr_committed_entry > 0) {
xc = ule->uxrs[curr_committed_entry].xid; xc = ule->uxrs[curr_committed_entry].xid;
if (xid_reads_committed_xid(tl1, xc, live_list_reverse)) { if (xid_reads_committed_xid(tl1, xc, snapshot_xids, referenced_xids)) {
break; break;
} }
curr_committed_entry--; curr_committed_entry--;
...@@ -387,7 +368,7 @@ apply_msg_to_leafentry(FT_MSG msg, // message to apply to leafentry ...@@ -387,7 +368,7 @@ apply_msg_to_leafentry(FT_MSG msg, // message to apply to leafentry
// the memory completely, if we removed the leaf entry. // the memory completely, if we removed the leaf entry.
// -- snapshot_xids : we use these in memory transaction ids to // -- snapshot_xids : we use these in memory transaction ids to
// determine what to garbage collect. // determine what to garbage collect.
// -- live_list_reverse : list of in memory active transactions. // -- referenced_xids : list of in memory active transactions.
// NOTE: it is not a good idea to garbage collect a leaf // NOTE: it is not a good idea to garbage collect a leaf
// entry with only one committed value. // entry with only one committed value.
int int
...@@ -398,15 +379,15 @@ garbage_collect_leafentry(LEAFENTRY old_leaf_entry, ...@@ -398,15 +379,15 @@ garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
struct mempool *mp, struct mempool *mp,
void **maybe_free, void **maybe_free,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse, OMT referenced_xids,
OMT live_root_txns) { OMT live_root_txns) {
int r = 0; int r = 0;
ULE_S ule; ULE_S ule;
le_unpack(&ule, old_leaf_entry); le_unpack(&ule, old_leaf_entry);
assert(snapshot_xids); assert(snapshot_xids);
assert(live_list_reverse); assert(referenced_xids);
assert(live_root_txns); assert(live_root_txns);
garbage_collection(&ule, snapshot_xids, live_list_reverse, live_root_txns); garbage_collection(&ule, snapshot_xids, referenced_xids, live_root_txns);
r = le_pack(&ule, r = le_pack(&ule,
new_leaf_entry_memory_size, new_leaf_entry_memory_size,
new_leaf_entry, new_leaf_entry,
......
...@@ -69,11 +69,9 @@ int garbage_collect_leafentry(LEAFENTRY old_leaf_entry, ...@@ -69,11 +69,9 @@ int garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
struct mempool *mp, struct mempool *mp,
void **maybe_free, void **maybe_free,
OMT snapshot_xids, OMT snapshot_xids,
OMT live_list_reverse, OMT referenced_xids,
OMT live_root_txns); OMT live_root_txns);
TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse);
#if defined(__cplusplus) || defined(__cilkplusplus) #if defined(__cplusplus) || defined(__cilkplusplus)
} }
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment