Commit 5b697e7e authored by John Esmet's avatar John Esmet

Merge branch 'releases/tokumx-2.0'

parents db9b01c8 668f1118
...@@ -2514,7 +2514,7 @@ toku_cachetable_minicron_shutdown(CACHETABLE ct) { ...@@ -2514,7 +2514,7 @@ toku_cachetable_minicron_shutdown(CACHETABLE ct) {
void toku_cachetable_prepare_close(CACHETABLE ct UU()) { void toku_cachetable_prepare_close(CACHETABLE ct UU()) {
extern bool toku_serialize_in_parallel; extern bool toku_serialize_in_parallel;
toku_serialize_in_parallel = true; toku_drd_unsafe_set(&toku_serialize_in_parallel, true);
} }
/* Requires that it all be flushed. */ /* Requires that it all be flushed. */
......
...@@ -600,8 +600,9 @@ handle_split_of_child( ...@@ -600,8 +600,9 @@ handle_split_of_child(
// We never set the rightmost blocknum to be the root. // We never set the rightmost blocknum to be the root.
// Instead, we wait for the root to split and let promotion initialize the rightmost // Instead, we wait for the root to split and let promotion initialize the rightmost
// blocknum to be the first non-root leaf node on the right extreme to recieve an insert. // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
invariant(ft->h->root_blocknum.b != ft->rightmost_blocknum.b); BLOCKNUM rightmost_blocknum = toku_drd_unsafe_fetch(&ft->rightmost_blocknum);
if (childa->blocknum.b == ft->rightmost_blocknum.b) { invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
if (childa->blocknum.b == rightmost_blocknum.b) {
// The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values // The rightmost leaf (a) split into (a) and (b). We want (b) to swap pair values
// with (a), now that it is the new rightmost leaf. This keeps the rightmost blocknum // with (a), now that it is the new rightmost leaf. This keeps the rightmost blocknum
// constant, the same the way we keep the root blocknum constant. // constant, the same the way we keep the root blocknum constant.
...@@ -1428,7 +1429,8 @@ ft_merge_child( ...@@ -1428,7 +1429,8 @@ ft_merge_child(
node->pivotkeys.delete_at(childnuma); node->pivotkeys.delete_at(childnuma);
// Handle a merge of the rightmost leaf node. // Handle a merge of the rightmost leaf node.
if (did_merge && childb->blocknum.b == ft->rightmost_blocknum.b) { BLOCKNUM rightmost_blocknum = toku_drd_unsafe_fetch(&ft->rightmost_blocknum);
if (did_merge && childb->blocknum.b == rightmost_blocknum.b) {
invariant(childb->blocknum.b != ft->h->root_blocknum.b); invariant(childb->blocknum.b != ft->h->root_blocknum.b);
toku_ftnode_swap_pair_values(childa, childb); toku_ftnode_swap_pair_values(childa, childb);
BP_BLOCKNUM(node, childnuma) = childa->blocknum; BP_BLOCKNUM(node, childnuma) = childa->blocknum;
......
...@@ -1200,7 +1200,7 @@ exit: ...@@ -1200,7 +1200,7 @@ exit:
// We need a function to have something a drd suppression can reference // We need a function to have something a drd suppression can reference
// see src/tests/drd.suppressions (unsafe_touch_clock) // see src/tests/drd.suppressions (unsafe_touch_clock)
static void unsafe_touch_clock(FTNODE node, int i) { static void unsafe_touch_clock(FTNODE node, int i) {
BP_TOUCH_CLOCK(node, i); toku_drd_unsafe_set(&node->bp[i].clock_count, static_cast<unsigned char>(1));
} }
// Callback that states if a partial fetch of the node is necessary // Callback that states if a partial fetch of the node is necessary
...@@ -1620,13 +1620,13 @@ static void inject_message_in_locked_node( ...@@ -1620,13 +1620,13 @@ static void inject_message_in_locked_node(
paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn); paranoid_invariant(msg_with_msn.msn().msn == node->max_msn_applied_to_node_on_disk.msn);
if (node->blocknum.b == ft->rightmost_blocknum.b) { if (node->blocknum.b == ft->rightmost_blocknum.b) {
if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { if (toku_drd_unsafe_fetch(&ft->seqinsert_score) < FT_SEQINSERT_SCORE_THRESHOLD) {
// we promoted to the rightmost leaf node and the seqinsert score has not yet saturated. // we promoted to the rightmost leaf node and the seqinsert score has not yet saturated.
toku_sync_fetch_and_add(&ft->seqinsert_score, 1); toku_sync_fetch_and_add(&ft->seqinsert_score, 1);
} }
} else if (ft->seqinsert_score != 0) { } else if (toku_drd_unsafe_fetch(&ft->seqinsert_score) != 0) {
// we promoted to something other than the rightmost leaf node and the score should reset // we promoted to something other than the rightmost leaf node and the score should reset
ft->seqinsert_score = 0; toku_drd_unsafe_set(&ft->seqinsert_score, static_cast<uint32_t>(0));
} }
// if we call toku_ft_flush_some_child, then that function unpins the root // if we call toku_ft_flush_some_child, then that function unpins the root
...@@ -1785,19 +1785,19 @@ static inline bool should_inject_in_node(seqinsert_loc loc, int height, int dept ...@@ -1785,19 +1785,19 @@ static inline bool should_inject_in_node(seqinsert_loc loc, int height, int dept
return (height == 0 || (loc == NEITHER_EXTREME && (height <= 1 || depth >= 2))); return (height == 0 || (loc == NEITHER_EXTREME && (height <= 1 || depth >= 2)));
} }
static void ft_set_or_verify_rightmost_blocknum(FT ft, BLOCKNUM b) static void ft_verify_or_set_rightmost_blocknum(FT ft, BLOCKNUM b)
// Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft' // Given: 'b', the _definitive_ and constant rightmost blocknum of 'ft'
{ {
if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { if (toku_drd_unsafe_fetch(&ft->rightmost_blocknum.b) == RESERVED_BLOCKNUM_NULL) {
toku_ft_lock(ft); toku_ft_lock(ft);
if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) { if (ft->rightmost_blocknum.b == RESERVED_BLOCKNUM_NULL) {
ft->rightmost_blocknum = b; toku_drd_unsafe_set(&ft->rightmost_blocknum, b);
} }
toku_ft_unlock(ft); toku_ft_unlock(ft);
} }
// The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null. // The rightmost blocknum only transitions from RESERVED_BLOCKNUM_NULL to non-null.
// If it's already set, verify that the stored value is consistent with 'b' // If it's already set, verify that the stored value is consistent with 'b'
invariant(ft->rightmost_blocknum.b == b.b); invariant(toku_drd_unsafe_fetch(&ft->rightmost_blocknum.b) == b.b);
} }
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) { bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) {
...@@ -1859,7 +1859,7 @@ static void push_something_in_subtree( ...@@ -1859,7 +1859,7 @@ static void push_something_in_subtree(
// otherwise. We explicitly skip the root node because then we don't have // otherwise. We explicitly skip the root node because then we don't have
// to worry about changing the rightmost blocknum when the root splits. // to worry about changing the rightmost blocknum when the root splits.
if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->blocknum.b != ft->h->root_blocknum.b) { if (subtree_root->height == 0 && loc == RIGHT_EXTREME && subtree_root->blocknum.b != ft->h->root_blocknum.b) {
ft_set_or_verify_rightmost_blocknum(ft, subtree_root->blocknum); ft_verify_or_set_rightmost_blocknum(ft, subtree_root->blocknum);
} }
inject_message_in_locked_node(ft, subtree_root, target_childnum, msg, flow_deltas, gc_info); inject_message_in_locked_node(ft, subtree_root, target_childnum, msg, flow_deltas, gc_info);
} else { } else {
...@@ -2276,20 +2276,21 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m ...@@ -2276,20 +2276,21 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
int r = -1; int r = -1;
uint32_t rightmost_fullhash; uint32_t rightmost_fullhash;
BLOCKNUM rightmost_blocknum = ft->rightmost_blocknum; BLOCKNUM rightmost_blocknum;
FTNODE rightmost_leaf = nullptr; FTNODE rightmost_leaf = nullptr;
// Don't do the optimization if our heurstic suggests that // Don't do the optimization if our heurstic suggests that
// insertion pattern is not sequential. // insertion pattern is not sequential.
if (ft->seqinsert_score < FT_SEQINSERT_SCORE_THRESHOLD) { if (toku_drd_unsafe_fetch(&ft->seqinsert_score) < FT_SEQINSERT_SCORE_THRESHOLD) {
goto cleanup; goto cleanup;
} }
// We know the seqinsert score is high enough that we should // We know the seqinsert score is high enough that we should
// attemp to directly insert into the right most leaf. Because // attempt to directly insert into the rightmost leaf. Because
// the score is non-zero, the rightmost blocknum must have been // the score is non-zero, the rightmost blocknum must have been
// set. See inject_message_in_locked_node(), which only increases // set. See inject_message_in_locked_node(), which only increases
// the score if the target node blocknum == rightmost_blocknum // the score if the target node blocknum == rightmost_blocknum
rightmost_blocknum = ft->rightmost_blocknum;
invariant(rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL); invariant(rightmost_blocknum.b != RESERVED_BLOCKNUM_NULL);
// Pin the rightmost leaf with a write lock. // Pin the rightmost leaf with a write lock.
......
...@@ -572,14 +572,18 @@ toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed) ...@@ -572,14 +572,18 @@ toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed)
release_output(logger, fsynced_lsn); release_output(logger, fsynced_lsn);
} }
void toku_logger_fsync (TOKULOGGER logger) void toku_logger_fsync(TOKULOGGER logger)
// Effect: This is the exported fsync used by ydb.c for env_log_flush. Group commit doesn't have to work. // Effect: This is the exported fsync used by ydb.c for env_log_flush. Group commit doesn't have to work.
// Entry: Holds no locks // Entry: Holds no locks
// Exit: Holds no locks // Exit: Holds no locks
// Implementation note: Acquire the output condition lock, then the output permission, then release the output condition lock, then get the input lock. // Implementation note: Acquire the output condition lock, then the output permission, then release the output condition lock, then get the input lock.
// Then release everything. // Then release everything. Hold the input lock while reading the current max lsn in buf to make drd happy that there is no data race.
{ {
toku_logger_maybe_fsync(logger, logger->inbuf.max_lsn_in_buf, true, false); ml_lock(&logger->input_lock);
const LSN max_lsn_in_buf = logger->inbuf.max_lsn_in_buf;
ml_unlock(&logger->input_lock);
toku_logger_maybe_fsync(logger, max_lsn_in_buf, true, false);
} }
void toku_logger_fsync_if_lsn_not_fsynced (TOKULOGGER logger, LSN lsn) { void toku_logger_fsync_if_lsn_not_fsynced (TOKULOGGER logger, LSN lsn) {
......
...@@ -145,7 +145,7 @@ struct toku_thread_pool *get_ft_pool(void) { ...@@ -145,7 +145,7 @@ struct toku_thread_pool *get_ft_pool(void) {
} }
void toku_serialize_set_parallel(bool in_parallel) { void toku_serialize_set_parallel(bool in_parallel) {
toku_serialize_in_parallel = in_parallel; toku_drd_unsafe_set(&toku_serialize_in_parallel, in_parallel);
} }
void toku_ft_serialize_layer_init(void) { void toku_ft_serialize_layer_init(void) {
...@@ -852,7 +852,7 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA ...@@ -852,7 +852,7 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
ft->h->basementnodesize, ft->h->basementnodesize,
ft->h->compression_method, ft->h->compression_method,
do_rebalancing, do_rebalancing,
toku_serialize_in_parallel, // in_parallel toku_drd_unsafe_fetch(&toku_serialize_in_parallel),
&n_to_write, &n_to_write,
&n_uncompressed_bytes, &n_uncompressed_bytes,
&compressed_buf &compressed_buf
......
...@@ -325,7 +325,7 @@ toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) { ...@@ -325,7 +325,7 @@ toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) {
} }
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) { TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) {
return txn_manager->last_calculated_oldest_referenced_xid; return toku_drd_unsafe_fetch(&txn_manager->last_calculated_oldest_referenced_xid);
} }
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids); int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids);
...@@ -385,7 +385,7 @@ static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) { ...@@ -385,7 +385,7 @@ static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) {
oldest_referenced_xid = txn_manager->last_xid; oldest_referenced_xid = txn_manager->last_xid;
} }
invariant(oldest_referenced_xid != TXNID_MAX); invariant(oldest_referenced_xid != TXNID_MAX);
txn_manager->last_calculated_oldest_referenced_xid = oldest_referenced_xid; toku_drd_unsafe_set(&txn_manager->last_calculated_oldest_referenced_xid, oldest_referenced_xid);
} }
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index) //Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
......
...@@ -275,7 +275,7 @@ void locktree::sto_end(void) { ...@@ -275,7 +275,7 @@ void locktree::sto_end(void) {
void locktree::sto_end_early_no_accounting(void *prepared_lkr) { void locktree::sto_end_early_no_accounting(void *prepared_lkr) {
sto_migrate_buffer_ranges_to_tree(prepared_lkr); sto_migrate_buffer_ranges_to_tree(prepared_lkr);
sto_end(); sto_end();
m_sto_score = 0; toku_drd_unsafe_set(&m_sto_score, 0);
} }
void locktree::sto_end_early(void *prepared_lkr) { void locktree::sto_end_early(void *prepared_lkr) {
...@@ -536,11 +536,11 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid, ...@@ -536,11 +536,11 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
} }
bool locktree::sto_txnid_is_valid_unsafe(void) const { bool locktree::sto_txnid_is_valid_unsafe(void) const {
return m_sto_txnid != TXNID_NONE; return toku_drd_unsafe_fetch(&m_sto_txnid) != TXNID_NONE;
} }
int locktree::sto_get_score_unsafe(void) const { int locktree::sto_get_score_unsafe(void) const {
return m_sto_score; return toku_drd_unsafe_fetch(&m_sto_score);
} }
bool locktree::sto_try_release(TXNID txnid) { bool locktree::sto_try_release(TXNID txnid) {
......
...@@ -138,3 +138,22 @@ PATENT RIGHTS GRANT: ...@@ -138,3 +138,22 @@ PATENT RIGHTS GRANT:
# define RUNNING_ON_VALGRIND (0U) # define RUNNING_ON_VALGRIND (0U)
#endif #endif
// Unsafely fetch and return a `T' from src, telling drd to ignore
// racey access to src for the next sizeof(*src) bytes
template <typename T>
T toku_drd_unsafe_fetch(T *src) {
TOKU_DRD_IGNORE_VAR(*src);
T val = *src;
TOKU_DRD_STOP_IGNORING_VAR(*src);
return val;
}
// Unsafely set a `T' value into *dest from src, telling drd to ignore
// racey access to dest for the next sizeof(*dest) bytes
template <typename T>
void toku_drd_unsafe_set(T *dest, const T src) {
TOKU_DRD_IGNORE_VAR(*dest);
*dest = src;
TOKU_DRD_STOP_IGNORING_VAR(*dest);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment