Commit f12bdc75 authored by John Esmet's avatar John Esmet

Pass down txn manager state to message application, which it can use to

run full garbage collection when a leafentry has > 1 committed entry.
parent f7323d26
...@@ -1065,6 +1065,10 @@ typedef enum { ...@@ -1065,6 +1065,10 @@ typedef enum {
LE_MAX_PROVISIONAL_XR, LE_MAX_PROVISIONAL_XR,
LE_EXPANDED, LE_EXPANDED,
LE_MAX_MEMSIZE, LE_MAX_MEMSIZE,
LE_APPLY_GC_BYTES_IN,
LE_APPLY_GC_BYTES_OUT,
LE_NORMAL_GC_BYTES_IN,
LE_NORMAL_GC_BYTES_OUT,
LE_STATUS_NUM_ROWS LE_STATUS_NUM_ROWS
} le_status_entry; } le_status_entry;
...@@ -1196,6 +1200,7 @@ toku_ft_bn_apply_cmd_once ( ...@@ -1196,6 +1200,7 @@ toku_ft_bn_apply_cmd_once (
LEAFENTRY le, LEAFENTRY le,
TXNID oldest_referenced_xid, TXNID oldest_referenced_xid,
GC_INFO gc_info, GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdonep, uint64_t *workdonep,
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
...@@ -1209,6 +1214,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1209,6 +1214,7 @@ toku_ft_bn_apply_cmd (
FT_MSG cmd, FT_MSG cmd,
TXNID oldest_referenced_xid, TXNID oldest_referenced_xid,
GC_INFO gc_info, GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
...@@ -1222,6 +1228,7 @@ toku_ft_leaf_apply_cmd ( ...@@ -1222,6 +1228,7 @@ toku_ft_leaf_apply_cmd (
int target_childnum, int target_childnum,
FT_MSG cmd, FT_MSG cmd,
GC_INFO gc_info, GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone, uint64_t *workdone,
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
...@@ -1236,6 +1243,7 @@ toku_ft_node_put_cmd ( ...@@ -1236,6 +1243,7 @@ toku_ft_node_put_cmd (
FT_MSG cmd, FT_MSG cmd,
bool is_fresh, bool is_fresh,
GC_INFO gc_info, GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
size_t flow_deltas[], size_t flow_deltas[],
STAT64INFO stats_to_update STAT64INFO stats_to_update
); );
......
This diff is collapsed.
...@@ -230,6 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char ...@@ -230,6 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
&cmd, &cmd,
true, true,
make_gc_info(true), make_gc_info(true),
nullptr,
zero_flow_deltas, zero_flow_deltas,
NULL NULL
); );
......
...@@ -2925,7 +2925,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int ...@@ -2925,7 +2925,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids, .xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } }; .u = { .id = { &thekey, &theval } } };
uint64_t workdone=0; uint64_t workdone=0;
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), &workdone, stats_to_update); toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), nullptr, &workdone, stats_to_update);
} }
static int write_literal(struct dbout *out, void*data, size_t len) { static int write_literal(struct dbout *out, void*data, size_t len) {
......
...@@ -249,6 +249,7 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -249,6 +249,7 @@ toku_le_apply_msg(FT_MSG msg,
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid, TXNID oldest_referenced_xid,
GC_INFO gc_info, GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
LEAFENTRY *new_leafentry_p, LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p); int64_t * numbytes_delta_p);
......
...@@ -291,6 +291,7 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) { ...@@ -291,6 +291,7 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
txn_manager->last_xid = 0; txn_manager->last_xid = 0;
txn_manager->last_xid_seen_for_recover = TXNID_NONE; txn_manager->last_xid_seen_for_recover = TXNID_NONE;
txn_manager->last_calculated_oldest_referenced_xid = TXNID_NONE;
*txn_managerp = txn_manager; *txn_managerp = txn_manager;
} }
...@@ -324,6 +325,10 @@ toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) { ...@@ -324,6 +325,10 @@ toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) {
return rval; return rval;
} }
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) {
return txn_manager->last_calculated_oldest_referenced_xid;
}
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids); int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids);
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){ int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){
(*referenced_xids)[index] = live_xid->txnid.parent_id64; (*referenced_xids)[index] = live_xid->txnid.parent_id64;
...@@ -371,7 +376,7 @@ max_xid(TXNID a, TXNID b) { ...@@ -371,7 +376,7 @@ max_xid(TXNID a, TXNID b) {
return a < b ? b : a; return a < b ? b : a;
} }
static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) { static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) {
TXNID oldest_referenced_xid = TXNID_MAX; TXNID oldest_referenced_xid = TXNID_MAX;
int r; int r;
if (txn_manager->live_root_ids.size() > 0) { if (txn_manager->live_root_ids.size() > 0) {
...@@ -397,8 +402,8 @@ static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) { ...@@ -397,8 +402,8 @@ static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
if (txn_manager->last_xid < oldest_referenced_xid) { if (txn_manager->last_xid < oldest_referenced_xid) {
oldest_referenced_xid = txn_manager->last_xid; oldest_referenced_xid = txn_manager->last_xid;
} }
paranoid_invariant(oldest_referenced_xid != TXNID_MAX); invariant(oldest_referenced_xid != TXNID_MAX);
return oldest_referenced_xid; txn_manager->last_calculated_oldest_referenced_xid = oldest_referenced_xid;
} }
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index) //Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
...@@ -672,7 +677,7 @@ void toku_txn_manager_start_txn( ...@@ -672,7 +677,7 @@ void toku_txn_manager_start_txn(
r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx); r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx);
invariant_zero(r); invariant_zero(r);
} }
txn->oldest_referenced_xid = get_oldest_referenced_xid_unlocked(txn_manager); set_oldest_referenced_xid(txn_manager);
if (needs_snapshot) { if (needs_snapshot) {
txn_manager_create_snapshot_unlocked( txn_manager_create_snapshot_unlocked(
...@@ -825,7 +830,22 @@ void toku_txn_manager_clone_state_for_gc( ...@@ -825,7 +830,22 @@ void toku_txn_manager_clone_state_for_gc(
txn_manager_unlock(txn_manager); txn_manager_unlock(txn_manager);
} }
void txn_manager_state::init(TXN_MANAGER txn_manager) {
invariant_notnull(txn_manager);
toku_txn_manager_clone_state_for_gc(
txn_manager,
&snapshot_xids,
&referenced_xids,
&live_root_txns
);
oldest_referenced_xid_for_simple_gc = txn_manager->last_calculated_oldest_referenced_xid;
}
void txn_manager_state::destroy() {
snapshot_xids.destroy();
referenced_xids.destroy();
live_root_txns.destroy();
}
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) { void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) {
TOKUTXN txn; TOKUTXN txn;
......
...@@ -121,6 +121,28 @@ struct txn_manager { ...@@ -121,6 +121,28 @@ struct txn_manager {
TXNID last_xid; TXNID last_xid;
TXNID last_xid_seen_for_recover; TXNID last_xid_seen_for_recover;
TXNID last_calculated_oldest_referenced_xid;
};
struct txn_manager_state {
// a snapshot of the txn manager's mvcc state
xid_omt_t snapshot_xids;
rx_omt_t referenced_xids;
xid_omt_t live_root_txns;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
TXNID oldest_referenced_xid_for_simple_gc;
txn_manager_state() { }
void init(TXN_MANAGER txn_manager);
void destroy();
private:
txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct
}; };
...@@ -129,6 +151,8 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager); ...@@ -129,6 +151,8 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager); TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager);
void toku_txn_manager_handle_snapshot_create_for_child_txn( void toku_txn_manager_handle_snapshot_create_for_child_txn(
TOKUTXN txn, TOKUTXN txn,
TXN_MANAGER txn_manager, TXN_MANAGER txn_manager,
......
...@@ -116,7 +116,7 @@ PATENT RIGHTS GRANT: ...@@ -116,7 +116,7 @@ PATENT RIGHTS GRANT:
#include "ule-internal.h" #include "ule-internal.h"
#include <util/status.h> #include <util/status.h>
#include <util/scoped_malloc.h> #include <util/scoped_malloc.h>
#include <util/partitioned_counter.h>
#define ULE_DEBUG 0 #define ULE_DEBUG 0
...@@ -141,6 +141,10 @@ status_init(void) { ...@@ -141,6 +141,10 @@ status_init(void) {
STATUS_INIT(LE_MAX_PROVISIONAL_XR, nullptr, UINT64, "max provisional xr", TOKU_ENGINE_STATUS); STATUS_INIT(LE_MAX_PROVISIONAL_XR, nullptr, UINT64, "max provisional xr", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_EXPANDED, nullptr, UINT64, "expanded", TOKU_ENGINE_STATUS); STATUS_INIT(LE_EXPANDED, nullptr, UINT64, "expanded", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_MAX_MEMSIZE, nullptr, UINT64, "max memsize", TOKU_ENGINE_STATUS); STATUS_INIT(LE_MAX_MEMSIZE, nullptr, UINT64, "max memsize", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_APPLY_GC_BYTES_IN, nullptr, PARCOUNT, "size of leafentries before garbage collection (during message application)", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_APPLY_GC_BYTES_OUT, nullptr, PARCOUNT, "size of leafentries after garbage collection (during message application)", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_NORMAL_GC_BYTES_IN, nullptr, PARCOUNT, "size of leafentries before garbage collection (outside message application)", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_NORMAL_GC_BYTES_OUT,nullptr, PARCOUNT, "size of leafentries after garbage collection (outside message application)", TOKU_ENGINE_STATUS);
le_status.initialized = true; le_status.initialized = true;
} }
#undef STATUS_INIT #undef STATUS_INIT
...@@ -153,6 +157,14 @@ toku_le_get_status(LE_STATUS statp) { ...@@ -153,6 +157,14 @@ toku_le_get_status(LE_STATUS statp) {
} }
#define STATUS_VALUE(x) le_status.status[x].value.num #define STATUS_VALUE(x) le_status.status[x].value.num
#define STATUS_INC(x, d) \
do { \
if (le_status.status[x].type == PARCOUNT) { \
increment_partitioned_counter(le_status.status[x].value.parcount, d); \
} else { \
toku_sync_fetch_and_add(&le_status.status[x].value.num, d); \
} \
} while (0)
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
...@@ -441,6 +453,18 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref ...@@ -441,6 +453,18 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref
done:; done:;
} }
static size_t ule_packed_memsize(ULE ule) {
// Returns: The size 'ule' would be when packed into a leafentry, or 0 if the
// topmost committed value is a delete.
if (ule->num_cuxrs == 1 && ule->num_puxrs == 0) {
UXR uxr = ule_get_innermost_uxr(ule);
if (uxr_is_delete(uxr)) {
return 0;
}
}
return le_memsize_from_ule(ule);
}
///////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////
// This is the big enchilada. (Bring Tums.) Note that this level of abstraction // This is the big enchilada. (Bring Tums.) Note that this level of abstraction
// has no knowledge of the inner structure of either leafentry or msg. It makes // has no knowledge of the inner structure of either leafentry or msg. It makes
...@@ -462,6 +486,7 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -462,6 +486,7 @@ toku_le_apply_msg(FT_MSG msg,
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid, TXNID oldest_referenced_xid,
GC_INFO gc_info, GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
LEAFENTRY *new_leafentry_p, LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
paranoid_invariant_notnull(new_leafentry_p); paranoid_invariant_notnull(new_leafentry_p);
...@@ -486,7 +511,27 @@ toku_le_apply_msg(FT_MSG msg, ...@@ -486,7 +511,27 @@ toku_le_apply_msg(FT_MSG msg,
oldnumbytes = ule_get_innermost_numbytes(&ule, keylen); oldnumbytes = ule_get_innermost_numbytes(&ule, keylen);
} }
msg_modify_ule(&ule, msg); // modify unpacked leafentry msg_modify_ule(&ule, msg); // modify unpacked leafentry
ule_simple_garbage_collection(&ule, oldest_referenced_xid, gc_info);
// - we may be able to immediately promote the newly-apllied outermost provisonal uxr
// - either way, run simple gc first, and then full gc if there are still some committed uxrs.
ule_try_promote_provisional_outermost(&ule, oldest_referenced_xid);
ule_simple_garbage_collection(&ule,
txn_state_for_gc != nullptr ?
txn_state_for_gc->oldest_referenced_xid_for_simple_gc :
oldest_referenced_xid,
gc_info);
if (ule.num_cuxrs > 1 && txn_state_for_gc != nullptr) {
size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
txn_state_for_gc->snapshot_xids,
txn_state_for_gc->referenced_xids,
txn_state_for_gc->live_root_txns
);
size_t size_after_gc = ule_packed_memsize(&ule);
STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc);
STATUS_INC(LE_APPLY_GC_BYTES_OUT, size_after_gc);
}
int rval = le_pack( int rval = le_pack(
&ule, // create packed leafentry &ule, // create packed leafentry
data_buffer, data_buffer,
...@@ -578,7 +623,18 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry, ...@@ -578,7 +623,18 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
// garbage in leafentries. // garbage in leafentries.
TXNID oldest_possible_live_xid = oldest_referenced_xid_known; TXNID oldest_possible_live_xid = oldest_referenced_xid_known;
ule_try_promote_provisional_outermost(&ule, oldest_possible_live_xid); ule_try_promote_provisional_outermost(&ule, oldest_possible_live_xid);
ule_garbage_collect(&ule, snapshot_xids, referenced_xids, live_root_txns); // No need to run simple gc here if we're going straight for full gc.
if (ule.num_cuxrs > 1) {
size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
snapshot_xids,
referenced_xids,
live_root_txns);
size_t size_after_gc = ule_packed_memsize(&ule);
STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc);
STATUS_INC(LE_APPLY_GC_BYTES_OUT, size_after_gc);
}
int r = le_pack( int r = le_pack(
&ule, &ule,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment