Commit 45794eac authored by John Esmet's avatar John Esmet

FT-279 Clean up ftnode_fetch_extra struct and, most importantly, its

initialization code
parent 3e8a2988
......@@ -209,7 +209,7 @@ toku_pin_ftnode_for_query(
UNLOCKERS unlockers,
ANCESTORS ancestors,
const pivot_bounds &bounds,
FTNODE_FETCH_EXTRA bfe,
ftnode_fetch_extra *bfe,
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
FTNODE *node_p,
bool* msgs_applied)
......@@ -322,7 +322,7 @@ toku_pin_ftnode_with_dep_nodes(
FT ft,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
ftnode_fetch_extra *bfe,
pair_lock_type lock_type,
uint32_t num_dependent_nodes,
FTNODE *dependent_nodes,
......@@ -364,7 +364,7 @@ toku_pin_ftnode_with_dep_nodes(
void toku_pin_ftnode(FT ft,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
ftnode_fetch_extra *bfe,
pair_lock_type lock_type,
FTNODE *node_p,
bool move_messages) {
......
......@@ -148,7 +148,7 @@ toku_pin_ftnode_for_query(
UNLOCKERS unlockers,
ANCESTORS ancestors,
const pivot_bounds &bounds,
FTNODE_FETCH_EXTRA bfe,
ftnode_fetch_extra *bfe,
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
FTNODE *node_p,
bool* msgs_applied
......@@ -159,7 +159,7 @@ void toku_pin_ftnode(
FT ft,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
ftnode_fetch_extra *bfe,
pair_lock_type lock_type,
FTNODE *node_p,
bool move_messages
......@@ -171,7 +171,7 @@ void toku_pin_ftnode_with_dep_nodes(
FT ft,
BLOCKNUM blocknum,
uint32_t fullhash,
FTNODE_FETCH_EXTRA bfe,
ftnode_fetch_extra *bfe,
pair_lock_type lock_type,
uint32_t num_dependent_nodes,
FTNODE *dependent_nodes,
......
......@@ -491,8 +491,8 @@ ct_maybe_merge_child(struct flusher_advice *fa,
uint32_t fullhash;
CACHEKEY root;
toku_calculate_root_offset_pointer(ft, &root, &fullhash);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode(ft, root, fullhash, &bfe, PL_WRITE_EXPENSIVE, &root_node, true);
toku_ftnode_assert_fully_in_memory(root_node);
}
......@@ -1075,8 +1075,8 @@ ft_split_child(
static void bring_node_fully_into_memory(FTNODE node, FT ft) {
if (!toku_ftnode_fully_in_memory(node)) {
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_cachetable_pf_pinned_pair(
node,
toku_ftnode_pf_callback,
......@@ -1379,8 +1379,8 @@ ft_merge_child(
FTNODE childa, childb;
{
uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnuma);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &node, &childa, true);
}
// for test
......@@ -1390,8 +1390,8 @@ ft_merge_child(
dep_nodes[0] = node;
dep_nodes[1] = childa;
uint32_t childfullhash = compute_child_fullhash(ft->cf, node, childnumb);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode_with_dep_nodes(ft, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, PL_WRITE_EXPENSIVE, 2, dep_nodes, &childb, true);
}
......@@ -1520,10 +1520,10 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
ft->blocktable.verify_blocknum_allocated(targetchild);
uint32_t childfullhash = compute_child_fullhash(ft->cf, parent, childnum);
FTNODE child;
struct ftnode_fetch_extra bfe;
ftnode_fetch_extra bfe;
// Note that we don't read the entire node into memory yet.
// The idea is let's try to do the minimum work before releasing the parent lock
fill_bfe_for_min_read(&bfe, ft);
bfe.create_for_min_read(ft);
toku_pin_ftnode_with_dep_nodes(ft, targetchild, childfullhash, &bfe, PL_WRITE_EXPENSIVE, 1, &parent, &child, true);
// for test
......
......@@ -328,8 +328,8 @@ toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
// Get root node (the first parent of each successive HOT
// call.)
toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
toku_pin_ftnode(ft_handle->ft,
(BLOCKNUM) root_key,
fullhash,
......
......@@ -298,6 +298,92 @@ struct ft_handle {
PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
PAIR_ATTR make_invalid_pair_attr(void);
//
// Field in ftnode_fetch_extra that tells the
// partial fetch callback what piece of the node
// is needed by the ydb
//
enum ftnode_fetch_type {
ftnode_fetch_none = 1, // no partitions needed.
ftnode_fetch_subset, // some subset of partitions needed
ftnode_fetch_prefetch, // this is part of a prefetch call
ftnode_fetch_all, // every partition is needed
ftnode_fetch_keymatch, // one child is needed if it holds both keys
};
// Info passed to cachetable fetch callbacks to say which parts of a node
// should be fetched (perhaps a subset, perhaps the whole thing, depending
// on operation)
class ftnode_fetch_extra {
public:
// Used when the whole node must be in memory, such as for flushes.
void create_for_full_read(FT ft);
// A subset of children are necessary. Used by point queries.
void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right,
bool left_is_neg_infty, bool right_is_pos_infty,
bool disable_prefetching, bool read_all_partitions);
// No partitions are necessary - only pivots and/or subtree estimates.
// Currently used for stat64.
void create_for_min_read(FT ft);
// Used to prefetch partitions that fall within the bounds given by the cursor.
void create_for_prefetch(FT ft, struct ft_cursor *cursor);
// Only a portion of the node (within a keyrange) is required.
// Used by keysrange when the left and right key are in the same basement node.
void create_for_keymatch(FT ft, const DBT *left, const DBT *right,
bool disable_prefetching, bool read_all_partitions);
void destroy(void);
// return: true if a specific childnum is required to be in memory
bool wants_child_available(int childnum) const;
// return: the childnum of the leftmost child that is required to be in memory
int leftmost_child_wanted(FTNODE node) const;
// return: the childnum of the rightmost child that is required to be in memory
int rightmost_child_wanted(FTNODE node) const;
// needed for reading a node off disk
FT ft;
enum ftnode_fetch_type type;
// used in the case where type == ftnode_fetch_subset
// parameters needed to find out which child needs to be decompressed (so it can be read)
ft_search *search;
DBT range_lock_left_key, range_lock_right_key;
bool left_is_neg_infty, right_is_pos_infty;
// states if we should try to aggressively fetch basement nodes
// that are not specifically needed for current query,
// but may be needed for other cursor operations user is doing
// For example, if we have not disabled prefetching,
// and the user is doing a dictionary wide scan, then
// even though a query may only want one basement node,
// we fetch all basement nodes in a leaf node.
bool disable_prefetching;
// this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
int child_to_read;
// when we read internal nodes, we want to read all the data off disk in one I/O
// then we'll treat it as normal and only decompress the needed partitions etc.
bool read_all_partitions;
// Accounting: How many bytes were read, and how much time did we spend doing I/O?
uint64_t bytes_read;
tokutime_t io_time;
tokutime_t decompress_time;
tokutime_t deserialize_time;
private:
void _create_internal(FT ft_);
};
// Only exported for tests.
// Cachetable callbacks for ftnodes.
......@@ -333,47 +419,6 @@ STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
void toku_verify_or_set_counts(FTNODE);
//
// Helper function to fill a ftnode_fetch_extra with data
// that will tell the fetch callback that the entire node is
// necessary. Used in cases where the entire node
// is required, such as for flushes.
//
void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft);
//
// Helper function to fill a ftnode_fetch_extra with data
// that will tell the fetch callback that an explicit range of children is
// necessary. Used in cases where the portion of the node that is required
// is known in advance, e.g. for keysrange when the left and right key
// are in the same basement node.
//
void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
const DBT *left, const DBT *right,
bool disable_prefetching, bool read_all_partitions);
//
// Helper function to fill a ftnode_fetch_extra with data
// that will tell the fetch callback that some subset of the node
// necessary. Used in cases where some of the node is required
// such as for a point query.
//
void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
const DBT *left, const DBT *right,
bool left_is_neg_infty, bool right_is_pos_infty,
bool disable_prefetching, bool read_all_partitions);
//
// Helper function to fill a ftnode_fetch_extra with data
// that will tell the fetch callback that no partitions are
// necessary, only the pivots and/or subtree estimates.
// Currently used for stat64.
//
void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft);
void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor);
void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe);
// TODO: consider moving this to ft/pivotkeys.cc
class pivot_bounds {
public:
......@@ -396,11 +441,6 @@ class pivot_bounds {
const DBT _upper_bound_inclusive;
};
// TODO: move into the ftnode_fetch_extra class
bool toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum);
int toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
int toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node);
// allocate a block number
// allocate and initialize a ftnode
// put the ftnode into the cache table
......@@ -584,7 +624,7 @@ typedef struct {
TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
} FT_STATUS_S, *FT_STATUS;
void toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe);
void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe);
void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
......
......@@ -608,42 +608,123 @@ next_dict_id(void) {
return d;
}
//
// Given a bfe and a childnum, returns whether the query that constructed the bfe
// wants the child available.
// Requires: bfe->child_to_read to have been set
//
bool
toku_bfe_wants_child_available (struct ftnode_fetch_extra* bfe, int childnum)
{
return bfe->type == ftnode_fetch_all ||
(bfe->child_to_read == childnum &&
(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_keymatch));
// TODO: This isn't so pretty
void ftnode_fetch_extra::_create_internal(FT ft_) {
ft = ft_;
toku_init_dbt(&range_lock_left_key);
toku_init_dbt(&range_lock_right_key);
left_is_neg_infty = false;
right_is_pos_infty = false;
child_to_read = -1;
disable_prefetching = false;
read_all_partitions = false;
bytes_read = 0;
io_time = 0;
deserialize_time = 0;
decompress_time = 0;
}
int
toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
{
paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch);
if (bfe->left_is_neg_infty) {
void ftnode_fetch_extra::create_for_full_read(FT ft_) {
_create_internal(ft_);
type = ftnode_fetch_all;
}
void ftnode_fetch_extra::create_for_keymatch(FT ft_,
const DBT *left, const DBT *right,
bool disable_prefetching_, bool read_all_partitions_) {
_create_internal(ft_);
invariant(ft->h->type == FT_CURRENT);
type = ftnode_fetch_keymatch;
if (left != nullptr) {
toku_copyref_dbt(&range_lock_left_key, *left);
}
if (right != nullptr) {
toku_copyref_dbt(&range_lock_right_key, *right);
}
disable_prefetching = disable_prefetching_;
read_all_partitions = read_all_partitions_;
}
void ftnode_fetch_extra::create_for_subset_read(FT ft_, ft_search *search_,
const DBT *left, const DBT *right,
bool left_is_neg_infty_, bool right_is_pos_infty_,
bool disable_prefetching_, bool read_all_partitions_) {
_create_internal(ft_);
invariant(ft->h->type == FT_CURRENT);
type = ftnode_fetch_subset;
search = search_;
if (left != nullptr) {
toku_copyref_dbt(&range_lock_left_key, *left);
}
if (right != nullptr) {
toku_copyref_dbt(&range_lock_right_key, *right);
}
left_is_neg_infty = left_is_neg_infty_;
right_is_pos_infty = right_is_pos_infty_;
disable_prefetching = disable_prefetching_;
read_all_partitions = read_all_partitions_;
}
void ftnode_fetch_extra::create_for_min_read(FT ft_) {
_create_internal(ft_);
invariant(ft->h->type == FT_CURRENT);
type = ftnode_fetch_none;
}
void ftnode_fetch_extra::create_for_prefetch(FT ft_, struct ft_cursor *cursor) {
_create_internal(ft_);
invariant(ft->h->type == FT_CURRENT);
type = ftnode_fetch_prefetch;
const DBT *left = &cursor->range_lock_left_key;
if (left->data) {
toku_clone_dbt(&range_lock_left_key, *left);
}
const DBT *right = &cursor->range_lock_right_key;
if (right->data) {
toku_clone_dbt(&range_lock_right_key, *right);
}
left_is_neg_infty = cursor->left_is_neg_infty;
right_is_pos_infty = cursor->right_is_pos_infty;
disable_prefetching = cursor->disable_prefetching;
}
void ftnode_fetch_extra::destroy(void) {
toku_destroy_dbt(&range_lock_left_key);
toku_destroy_dbt(&range_lock_right_key);
}
// Requires: child_to_read to have been set
bool ftnode_fetch_extra::wants_child_available(int childnum) const {
return type == ftnode_fetch_all ||
(child_to_read == childnum &&
(type == ftnode_fetch_subset || type == ftnode_fetch_keymatch));
}
int ftnode_fetch_extra::leftmost_child_wanted(FTNODE node) const {
paranoid_invariant(type == ftnode_fetch_subset || type == ftnode_fetch_prefetch || type == ftnode_fetch_keymatch);
if (left_is_neg_infty) {
return 0;
} else if (bfe->range_lock_left_key.data == nullptr) {
} else if (range_lock_left_key.data == nullptr) {
return -1;
} else {
return toku_ftnode_which_child(node, &bfe->range_lock_left_key, bfe->ft->cmp);
return toku_ftnode_which_child(node, &range_lock_left_key, ft->cmp);
}
}
int
toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
{
paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch || bfe->type == ftnode_fetch_keymatch);
if (bfe->right_is_pos_infty) {
int ftnode_fetch_extra::rightmost_child_wanted(FTNODE node) const {
paranoid_invariant(type == ftnode_fetch_subset || type == ftnode_fetch_prefetch || type == ftnode_fetch_keymatch);
if (right_is_pos_infty) {
return node->n_children - 1;
} else if (bfe->range_lock_right_key.data == nullptr) {
} else if (range_lock_right_key.data == nullptr) {
return -1;
} else {
return toku_ftnode_which_child(node, &bfe->range_lock_right_key, bfe->ft->cmp);
return toku_ftnode_which_child(node, &range_lock_right_key, ft->cmp);
}
}
......@@ -843,7 +924,7 @@ void toku_ftnode_flush_callback(
}
void
toku_ft_status_update_pivot_fetch_reason(struct ftnode_fetch_extra *bfe)
toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe)
{
if (bfe->type == ftnode_fetch_prefetch) {
STATUS_INC(FT_NUM_PIVOTS_FETCHED_PREFETCH, 1);
......@@ -865,7 +946,7 @@ int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNU
assert(extraargs);
assert(*ftnode_pv == NULL);
FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *)extraargs;
ftnode_fetch_extra *bfe = (ftnode_fetch_extra *)extraargs;
FTNODE *node=(FTNODE*)ftnode_pv;
// deserialize the node, must pass the bfe in because we cannot
// evaluate what piece of the the node is necessary until we get it at
......@@ -1125,7 +1206,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
// placeholder for now
bool retval = false;
FTNODE node = (FTNODE) ftnode_pv;
struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs;
ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs;
//
// The three types of fetches that the ft layer may request are:
// - ftnode_fetch_none: no partitions are necessary (example use: stat64)
......@@ -1169,8 +1250,8 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
// makes no sense to have prefetching disabled
// and still call this function
paranoid_invariant(!bfe->disable_prefetching);
int lc = toku_bfe_leftmost_child_wanted(bfe, node);
int rc = toku_bfe_rightmost_child_wanted(bfe, node);
int lc = bfe->leftmost_child_wanted(node);
int rc = bfe->rightmost_child_wanted(node);
for (int i = lc; i <= rc; ++i) {
if (BP_STATE(node, i) != PT_AVAIL) {
retval = true;
......@@ -1183,8 +1264,8 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
// we find out what basement node the query cares about
// and check if it is available
if (node->height == 0) {
int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
int left_child = bfe->leftmost_child_wanted(node);
int right_child = bfe->rightmost_child_wanted(node);
if (left_child == right_child) {
bfe->child_to_read = left_child;
unsafe_touch_clock(node,bfe->child_to_read);
......@@ -1201,7 +1282,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs) {
static void
ft_status_update_partial_fetch_reason(
struct ftnode_fetch_extra* bfe,
ftnode_fetch_extra *bfe,
int childnum,
enum pt_state state,
bool is_leaf
......@@ -1334,7 +1415,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
int r = 0;
FTNODE node = (FTNODE) ftnode_pv;
FTNODE_DISK_DATA ndd = (FTNODE_DISK_DATA) disk_data;
struct ftnode_fetch_extra *bfe = (struct ftnode_fetch_extra *) read_extraargs;
ftnode_fetch_extra *bfe = (ftnode_fetch_extra *) read_extraargs;
// there must be a reason this is being called. If we get a garbage type or the type is ftnode_fetch_none,
// then something went wrong
assert((bfe->type == ftnode_fetch_subset) || (bfe->type == ftnode_fetch_all) || (bfe->type == ftnode_fetch_prefetch) || (bfe->type == ftnode_fetch_keymatch));
......@@ -1344,8 +1425,8 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch)
)
{
lc = toku_bfe_leftmost_child_wanted(bfe, node);
rc = toku_bfe_rightmost_child_wanted(bfe, node);
lc = bfe->leftmost_child_wanted(node);
rc = bfe->rightmost_child_wanted(node);
} else {
lc = -1;
rc = -1;
......@@ -1354,7 +1435,7 @@ int toku_ftnode_pf_callback(void* ftnode_pv, void* disk_data, void* read_extraar
if (BP_STATE(node,i) == PT_AVAIL) {
continue;
}
if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) {
if ((lc <= i && i <= rc) || bfe->wants_child_available(i)) {
enum pt_state state = BP_STATE(node, i);
if (state == PT_COMPRESSED) {
r = toku_deserialize_bp_from_compressed(node, i, bfe);
......@@ -1388,127 +1469,6 @@ int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_he
return be.cmp(&kdbt, be.key);
}
void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT ft) {
bfe->type = ftnode_fetch_all;
bfe->ft = ft;
bfe->search = nullptr;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
bfe->left_is_neg_infty = false;
bfe->right_is_pos_infty = false;
bfe->child_to_read = -1;
bfe->disable_prefetching = false;
bfe->read_all_partitions = false;
bfe->bytes_read = 0;
bfe->io_time = 0;
bfe->deserialize_time = 0;
bfe->decompress_time = 0;
}
void fill_bfe_for_keymatch(struct ftnode_fetch_extra *bfe, FT ft,
const DBT *left, const DBT *right,
bool disable_prefetching, bool read_all_partitions) {
paranoid_invariant(ft->h->type == FT_CURRENT);
bfe->type = ftnode_fetch_keymatch;
bfe->ft = ft;
bfe->search = nullptr;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
if (left) {
toku_copyref_dbt(&bfe->range_lock_left_key, *left);
}
if (right) {
toku_copyref_dbt(&bfe->range_lock_right_key, *right);
}
bfe->left_is_neg_infty = left == nullptr;
bfe->right_is_pos_infty = right == nullptr;
bfe->child_to_read = -1;
bfe->disable_prefetching = disable_prefetching;
bfe->read_all_partitions = read_all_partitions;
bfe->bytes_read = 0;
bfe->io_time = 0;
bfe->deserialize_time = 0;
bfe->decompress_time = 0;
}
void fill_bfe_for_subset_read(struct ftnode_fetch_extra *bfe, FT ft, ft_search *search,
const DBT *left, const DBT *right,
bool left_is_neg_infty, bool right_is_pos_infty,
bool disable_prefetching, bool read_all_partitions) {
paranoid_invariant(ft->h->type == FT_CURRENT);
bfe->type = ftnode_fetch_subset;
bfe->ft = ft;
bfe->search = search;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
if (left) {
toku_copyref_dbt(&bfe->range_lock_left_key, *left);
}
if (right) {
toku_copyref_dbt(&bfe->range_lock_right_key, *right);
}
bfe->left_is_neg_infty = left_is_neg_infty;
bfe->right_is_pos_infty = right_is_pos_infty;
bfe->child_to_read = -1;
bfe->disable_prefetching = disable_prefetching;
bfe->read_all_partitions = read_all_partitions;
bfe->bytes_read = 0;
bfe->io_time = 0;
bfe->deserialize_time = 0;
bfe->decompress_time = 0;
}
void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT ft) {
paranoid_invariant(ft->h->type == FT_CURRENT);
bfe->type = ftnode_fetch_none;
bfe->ft = ft;
bfe->search = nullptr;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
bfe->left_is_neg_infty = false;
bfe->right_is_pos_infty = false;
bfe->child_to_read = -1;
bfe->disable_prefetching = false;
bfe->read_all_partitions = false;
bfe->bytes_read = 0;
bfe->io_time = 0;
bfe->deserialize_time = 0;
bfe->decompress_time = 0;
}
void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, FT ft, struct ft_cursor *cursor) {
paranoid_invariant(ft->h->type == FT_CURRENT);
bfe->type = ftnode_fetch_prefetch;
bfe->ft = ft;
bfe->search = nullptr;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
const DBT *left = &cursor->range_lock_left_key;
if (left->data) {
toku_clone_dbt(&bfe->range_lock_left_key, *left);
}
const DBT *right = &cursor->range_lock_right_key;
if (right->data) {
toku_clone_dbt(&bfe->range_lock_right_key, *right);
}
bfe->left_is_neg_infty = cursor->left_is_neg_infty;
bfe->right_is_pos_infty = cursor->right_is_pos_infty;
bfe->child_to_read = -1;
bfe->disable_prefetching = cursor->disable_prefetching;
bfe->read_all_partitions = false;
bfe->bytes_read = 0;
bfe->io_time = 0;
bfe->deserialize_time = 0;
bfe->decompress_time = 0;
}
void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
paranoid_invariant(bfe->type == ftnode_fetch_prefetch);
toku_destroy_dbt(&bfe->range_lock_left_key);
toku_destroy_dbt(&bfe->range_lock_right_key);
}
static void
ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
// Effect: Create a new root node whose two children are the split of oldroot.
......@@ -1567,8 +1527,8 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp)
// ft_split_child released locks on newroot
// and oldroot, so now we repin and
// return to caller
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode(
ft,
old_blocknum,
......@@ -1702,8 +1662,8 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
int parent_n_children = parent->n_children;
toku_unpin_ftnode_read_only(ft, child);
toku_unpin_ftnode_read_only(ft, parent);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
FTNODE newparent, newchild;
toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true);
if (newparent->height != parent_height || newparent->n_children != parent_n_children ||
......@@ -1754,8 +1714,8 @@ static bool process_maybe_reactive_child(FT ft, FTNODE parent, FTNODE child, int
uint32_t parent_fullhash = toku_cachetable_hash(ft->cf, parent_blocknum);
toku_unpin_ftnode_read_only(ft, child);
toku_unpin_ftnode_read_only(ft, parent);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
FTNODE newparent, newchild;
toku_pin_ftnode(ft, parent_blocknum, parent_fullhash, &bfe, PL_WRITE_CHEAP, &newparent, true);
if (newparent->height != parent_height || childnum >= newparent->n_children) {
......@@ -1796,8 +1756,8 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f
{
toku::context inject_ctx(CTX_MESSAGE_INJECTION);
FTNODE node;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode(ft, cachekey, fullhash, &bfe, PL_WRITE_CHEAP, &node, true);
toku_ftnode_assert_fully_in_memory(node);
paranoid_invariant(node->fullhash==fullhash);
......@@ -1945,8 +1905,8 @@ static void push_something_in_subtree(
// promote and we're in the top two levels of the
// tree, don't stop just because someone else has the
// node locked.
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
if (lock_type == PL_WRITE_CHEAP) {
// We intend to take the write lock for message injection
toku::context inject_ctx(CTX_MESSAGE_INJECTION);
......@@ -1985,8 +1945,8 @@ static void push_something_in_subtree(
if (did_split_or_merge) {
// Need to re-pin this node and try at this level again.
FTNODE newparent;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft); // should be fully in memory, we just split it
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft); // should be fully in memory, we just split it
toku_pin_ftnode(ft, subtree_root_blocknum, subtree_root_fullhash, &bfe, PL_READ, &newparent, true);
push_something_in_subtree(ft, newparent, -1, msg, flow_deltas, gc_info, depth, loc, true);
return;
......@@ -2072,8 +2032,8 @@ void toku_ft_root_put_msg(
uint32_t fullhash;
CACHEKEY root_key;
toku_calculate_root_offset_pointer(ft, &root_key, &fullhash);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
size_t flow_deltas[] = { message_buffer::msg_memsize_in_buffer(msg), 0 };
......@@ -2324,8 +2284,8 @@ static int ft_maybe_insert_into_rightmost_leaf(FT ft, DBT *key, DBT *val, XIDS m
// Pin the rightmost leaf with a write lock.
rightmost_fullhash = toku_cachetable_hash(ft->cf, rightmost_blocknum);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode(ft, rightmost_blocknum, rightmost_fullhash, &bfe, PL_WRITE_CHEAP, &rightmost_leaf, true);
// The rightmost blocknum never chances once it is initialized to something
......@@ -3482,9 +3442,9 @@ static int
ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs)
{
int r = toku_ftnode_fetch_callback(cf, p, fd, blocknum, fullhash, ftnode_pv, disk_data, sizep, dirtyp, extraargs);
struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, extraargs);
destroy_bfe_for_prefetch(ffe);
toku_free(ffe);
ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, extraargs);
bfe->destroy();
toku_free(bfe);
return r;
}
......@@ -3492,9 +3452,9 @@ static int
ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep)
{
int r = toku_ftnode_pf_callback(ftnode_pv, disk_data, read_extraargs, fd, sizep);
struct ftnode_fetch_extra *CAST_FROM_VOIDP(ffe, read_extraargs);
destroy_bfe_for_prefetch(ffe);
toku_free(ffe);
ftnode_fetch_extra *CAST_FROM_VOIDP(bfe, read_extraargs);
bfe->destroy();
toku_free(bfe);
return r;
}
......@@ -3522,8 +3482,8 @@ ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR
for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) {
BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i);
uint32_t nextfullhash = compute_child_fullhash(ft_handle->ft->cf, node, i);
struct ftnode_fetch_extra *MALLOC(bfe);
fill_bfe_for_prefetch(bfe, ft_handle->ft, ftcursor);
ftnode_fetch_extra *XCALLOC(bfe);
bfe->create_for_prefetch(ft_handle->ft, ftcursor);
bool doing_prefetch = false;
toku_cachefile_prefetch(
ft_handle->ft->cf,
......@@ -3537,7 +3497,7 @@ ft_node_maybe_prefetch(FT_HANDLE ft_handle, FTNODE node, int childnum, FT_CURSOR
&doing_prefetch
);
if (!doing_prefetch) {
destroy_bfe_for_prefetch(bfe);
bfe->destroy();
toku_free(bfe);
}
*doprefetch = false;
......@@ -3583,9 +3543,8 @@ ft_search_child(FT_HANDLE ft_handle, FTNODE node, int childnum, ft_search *searc
// If the current node's height is greater than 1, then its child is an internal node.
// Therefore, to warm the cache better (#5798), we want to read all the partitions off disk in one shot.
bool read_all_partitions = node->height > 1;
struct ftnode_fetch_extra bfe;
fill_bfe_for_subset_read(
&bfe,
ftnode_fetch_extra bfe;
bfe.create_for_subset_read(
ft_handle->ft,
search,
&ftcursor->range_lock_left_key,
......@@ -3879,9 +3838,8 @@ int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCT
// - At this point, toku_ftnode_pin_holding_lock has returned, with bfe.child_to_read set,
// - ft_search_node is called, assuming that the node and its relevant partition are in memory.
//
struct ftnode_fetch_extra bfe;
fill_bfe_for_subset_read(
&bfe,
ftnode_fetch_extra bfe;
bfe.create_for_subset_read(
ft,
search,
&ftcursor->range_lock_left_key,
......@@ -4068,8 +4026,8 @@ toku_ft_keysrange_internal (FT_HANDLE ft_handle, FTNODE node,
uint64_t* less, uint64_t* equal_left, uint64_t* middle,
uint64_t* equal_right, uint64_t* greater, bool* single_basement_node,
uint64_t estimated_num_rows,
struct ftnode_fetch_extra *min_bfe, // set up to read a minimal read.
struct ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it
ftnode_fetch_extra *min_bfe, // set up to read a minimal read.
ftnode_fetch_extra *match_bfe, // set up to read a basement node iff both keys in it
struct unlockers *unlockers, ANCESTORS ancestors, const pivot_bounds &bounds)
// Implementation note: Assign values to less, equal, and greater, and then on the way out (returning up the stack) we add more values in.
{
......@@ -4166,10 +4124,10 @@ void toku_ft_keysrange(FT_HANDLE ft_handle, DBT* key_left, DBT* key_right, uint6
return;
}
paranoid_invariant(!(!key_left && key_right));
struct ftnode_fetch_extra min_bfe;
struct ftnode_fetch_extra match_bfe;
fill_bfe_for_min_read(&min_bfe, ft_handle->ft); // read pivot keys but not message buffers
fill_bfe_for_keymatch(&match_bfe, ft_handle->ft, key_left, key_right, false, false); // read basement node only if both keys in it.
ftnode_fetch_extra min_bfe;
ftnode_fetch_extra match_bfe;
min_bfe.create_for_min_read(ft_handle->ft); // read pivot keys but not message buffers
match_bfe.create_for_keymatch(ft_handle->ft, key_left, key_right, false, false); // read basement node only if both keys in it.
try_again:
{
uint64_t less = 0, equal_left = 0, middle = 0, equal_right = 0, greater = 0;
......@@ -4304,9 +4262,9 @@ static int get_key_after_bytes_in_basementnode(FT ft, BASEMENTNODE bn, const DBT
return r;
}
static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped);
static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, int childnum, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
int r;
struct ancestors next_ancestors = {node, childnum, ancestors};
BLOCKNUM childblocknum = BP_BLOCKNUM(node, childnum);
......@@ -4325,7 +4283,7 @@ static int get_key_after_bytes_in_child(FT_HANDLE ft_h, FT ft, FTNODE node, UNLO
return get_key_after_bytes_in_subtree(ft_h, ft, child, &next_unlockers, &next_ancestors, next_bounds, bfe, search, subtree_bytes, start_key, skip_len, callback, cb_extra, skipped);
}
static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, FTNODE_FETCH_EXTRA bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
static int get_key_after_bytes_in_subtree(FT_HANDLE ft_h, FT ft, FTNODE node, UNLOCKERS unlockers, ANCESTORS ancestors, const pivot_bounds &bounds, ftnode_fetch_extra *bfe, ft_search *search, uint64_t subtree_bytes, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *, uint64_t, void *), void *cb_extra, uint64_t *skipped) {
int r;
int childnum = toku_ft_search_which_child(ft->cmp, node, search);
const uint64_t child_subtree_bytes = subtree_bytes / node->n_children;
......@@ -4389,8 +4347,8 @@ int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t s
// an error code otherwise
{
FT ft = ft_h->ft;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft);
while (true) {
FTNODE root;
{
......@@ -4453,8 +4411,8 @@ toku_dump_ftnode (FILE *file, FT_HANDLE ft_handle, BLOCKNUM blocknum, int depth,
toku_get_node_for_verify(blocknum, ft_handle, &node);
result=toku_verify_ftnode(ft_handle, ft_handle->ft->h->max_msn_in_ft, ft_handle->ft->h->max_msn_in_ft, false, node, -1, lorange, hirange, NULL, NULL, 0, 1, 0);
uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
toku_pin_ftnode(
ft_handle->ft,
blocknum,
......@@ -4653,8 +4611,8 @@ static bool is_empty_fast_iter (FT_HANDLE ft_handle, FTNODE node) {
{
BLOCKNUM childblocknum = BP_BLOCKNUM(node,childnum);
uint32_t fullhash = compute_child_fullhash(ft_handle->ft->cf, node, childnum);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
// don't need to pass in dependent nodes as we are not
// modifying nodes we are pinning
toku_pin_ftnode(
......@@ -4692,8 +4650,8 @@ bool toku_ft_is_empty_fast (FT_HANDLE ft_handle)
{
CACHEKEY root_key;
toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
toku_pin_ftnode(
ft_handle->ft,
root_key,
......
......@@ -177,8 +177,8 @@ int toku_testsetup_get_sersize(FT_HANDLE ft_handle, BLOCKNUM diskoff) // Return
{
assert(testsetup_initialized);
void *node_v;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
int r = toku_cachetable_get_and_pin(
ft_handle->ft->cf, diskoff,
toku_cachetable_hash(ft_handle->ft->cf, diskoff),
......@@ -204,8 +204,8 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const
assert(testsetup_initialized);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
r = toku_cachetable_get_and_pin(
ft_handle->ft->cf,
blocknum,
......@@ -258,8 +258,8 @@ testhelper_string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
void
toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t)
{
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
b,
......@@ -277,8 +277,8 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, en
assert(testsetup_initialized);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
r = toku_cachetable_get_and_pin(
ft_handle->ft->cf,
blocknum,
......
......@@ -288,8 +288,8 @@ toku_get_node_for_verify(
)
{
uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_handle->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_handle->ft);
toku_pin_ftnode(
ft_handle->ft,
blocknum,
......
......@@ -1045,8 +1045,8 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e
struct garbage_helper_extra *CAST_FROM_VOIDP(info, extra);
FTNODE node;
FTNODE_DISK_DATA ndd;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, info->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(info->ft);
int fd = toku_cachefile_get_fd(info->ft->cf);
int r = toku_deserialize_ftnode_from(fd, blocknum, 0, &node, &ndd, &bfe);
if (r != 0) {
......
......@@ -366,73 +366,6 @@ void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, in
int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
void toku_ftnode_save_ct_pair(CACHEKEY key, void *value_data, PAIR p);
//
// Field in ftnode_fetch_extra that tells the
// partial fetch callback what piece of the node
// is needed by the ydb
//
enum ftnode_fetch_type {
ftnode_fetch_none=1, // no partitions needed.
ftnode_fetch_subset, // some subset of partitions needed
ftnode_fetch_prefetch, // this is part of a prefetch call
ftnode_fetch_all, // every partition is needed
ftnode_fetch_keymatch, // one child is needed if it holds both keys
};
static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) UU();
static bool is_valid_ftnode_fetch_type(enum ftnode_fetch_type type) {
switch (type) {
case ftnode_fetch_none:
case ftnode_fetch_subset:
case ftnode_fetch_prefetch:
case ftnode_fetch_all:
case ftnode_fetch_keymatch:
return true;
default:
return false;
}
}
//
// An extra parameter passed to cachetable functions
// That is used in all types of fetch callbacks.
// The contents help the partial fetch and fetch
// callbacks retrieve the pieces of a node necessary
// for the ensuing operation (flush, query, ...)
//
struct ft_search;
struct ftnode_fetch_extra {
enum ftnode_fetch_type type;
// needed for reading a node off disk
FT ft;
// used in the case where type == ftnode_fetch_subset
// parameters needed to find out which child needs to be decompressed (so it can be read)
ft_search *search;
DBT range_lock_left_key, range_lock_right_key;
bool left_is_neg_infty, right_is_pos_infty;
// states if we should try to aggressively fetch basement nodes
// that are not specifically needed for current query,
// but may be needed for other cursor operations user is doing
// For example, if we have not disabled prefetching,
// and the user is doing a dictionary wide scan, then
// even though a query may only want one basement node,
// we fetch all basement nodes in a leaf node.
bool disable_prefetching;
// this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
int child_to_read;
// when we read internal nodes, we want to read all the data off disk in one I/O
// then we'll treat it as normal and only decompress the needed partitions etc.
bool read_all_partitions;
// Accounting: How many bytes were read, and how much time did we spend doing I/O?
uint64_t bytes_read;
tokutime_t io_time;
tokutime_t decompress_time;
tokutime_t deserialize_time;
};
typedef struct ftnode_fetch_extra *FTNODE_FETCH_EXTRA;
//
// TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces
//
......
......@@ -1110,7 +1110,7 @@ static const int read_header_heuristic_max = 32*1024;
// Effect: If the header part of the node is small enough, then read it into the rbuf. The rbuf will be allocated to be big enough in any case.
static void read_ftnode_header_from_fd_into_rbuf_if_small_enough(int fd, BLOCKNUM blocknum,
FT ft, struct rbuf *rb,
struct ftnode_fetch_extra *bfe) {
ftnode_fetch_extra *bfe) {
DISKOFF offset, size;
ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size);
DISKOFF read_size = roundup_to_multiple(512, MIN(read_header_heuristic_max, size));
......@@ -1277,7 +1277,7 @@ setup_available_ftnode_partition(FTNODE node, int i) {
// Assign the child_to_read member of the bfe from the given ftnode
// that has been brought into memory.
static void
update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
update_bfe_using_ftnode(FTNODE node, ftnode_fetch_extra *bfe)
{
if (bfe->type == ftnode_fetch_subset && bfe->search != NULL) {
// we do not take into account prefetching yet
......@@ -1297,8 +1297,8 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
// we find out what basement node the query cares about
// and check if it is available
if (node->height == 0) {
int left_child = toku_bfe_leftmost_child_wanted(bfe, node);
int right_child = toku_bfe_rightmost_child_wanted(bfe, node);
int left_child = bfe->leftmost_child_wanted(node);
int right_child = bfe->rightmost_child_wanted(node);
if (left_child == right_child) {
bfe->child_to_read = left_child;
}
......@@ -1310,14 +1310,14 @@ update_bfe_using_ftnode(FTNODE node, struct ftnode_fetch_extra *bfe)
// initialize all of the given ftnode's partitions.
static void
setup_partitions_using_bfe(FTNODE node,
struct ftnode_fetch_extra *bfe,
ftnode_fetch_extra *bfe,
bool data_in_memory)
{
// Leftmost and Rightmost Child bounds.
int lc, rc;
if (bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch) {
lc = toku_bfe_leftmost_child_wanted(bfe, node);
rc = toku_bfe_rightmost_child_wanted(bfe, node);
lc = bfe->leftmost_child_wanted(node);
rc = bfe->rightmost_child_wanted(node);
} else {
lc = -1;
rc = -1;
......@@ -1330,7 +1330,7 @@ setup_partitions_using_bfe(FTNODE node,
for (int i = 0; i < node->n_children; i++) {
BP_INIT_UNTOUCHED_CLOCK(node,i);
if (data_in_memory) {
BP_STATE(node, i) = ((toku_bfe_wants_child_available(bfe, i) || (lc <= i && i <= rc))
BP_STATE(node, i) = ((bfe->wants_child_available(i) || (lc <= i && i <= rc))
? PT_AVAIL : PT_COMPRESSED);
} else {
BP_STATE(node, i) = PT_ON_DISK;
......@@ -1354,7 +1354,7 @@ setup_partitions_using_bfe(FTNODE node,
}
}
static void setup_ftnode_partitions(FTNODE node, struct ftnode_fetch_extra* bfe, bool data_in_memory)
static void setup_ftnode_partitions(FTNODE node, ftnode_fetch_extra *bfe, bool data_in_memory)
// Effect: Used when reading a ftnode into main memory, this sets up the partitions.
// We set bfe->child_to_read as well as the BP_STATE and the data pointers (e.g., with set_BSB or set_BNULL or other set_ operations).
// Arguments: Node: the node to set up.
......@@ -1473,7 +1473,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
FTNODE_DISK_DATA* ndd,
BLOCKNUM blocknum,
uint32_t fullhash,
struct ftnode_fetch_extra *bfe,
ftnode_fetch_extra *bfe,
struct rbuf *rb,
int fd)
// If we have enough information in the rbuf to construct a header, then do so.
......@@ -1604,7 +1604,6 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
// rbuf, so we might be able to store the compressed data for some
// objects.
// We can proceed to deserialize the individual subblocks.
paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type));
// setup the memory of the partitions
// for partitions being decompressed, create either message buffer or basement node
......@@ -1627,7 +1626,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
// handle clock
for (int i = 0; i < node->n_children; i++) {
if (toku_bfe_wants_child_available(bfe, i)) {
if (bfe->wants_child_available(i)) {
paranoid_invariant(BP_STATE(node,i) == PT_AVAIL);
BP_TOUCH_CLOCK(node,i);
}
......@@ -1660,7 +1659,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
static int
deserialize_and_upgrade_internal_node(FTNODE node,
struct rbuf *rb,
struct ftnode_fetch_extra* bfe,
ftnode_fetch_extra *bfe,
STAT64INFO info)
{
int version = node->layout_version_read_from_disk;
......@@ -1719,8 +1718,8 @@ deserialize_and_upgrade_internal_node(FTNODE node,
// sure we properly intitialize our partitions before filling them
// in from our soon-to-be-upgraded node.
update_bfe_using_ftnode(node, bfe);
struct ftnode_fetch_extra temp_bfe;
temp_bfe.type = ftnode_fetch_all;
ftnode_fetch_extra temp_bfe;
temp_bfe.create_for_full_read(nullptr);
setup_partitions_using_bfe(node, &temp_bfe, true);
// Cache the highest MSN generated for the message buffers. This
......@@ -1780,7 +1779,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
static int
deserialize_and_upgrade_leaf_node(FTNODE node,
struct rbuf *rb,
struct ftnode_fetch_extra* bfe,
ftnode_fetch_extra *bfe,
STAT64INFO info)
{
int r = 0;
......@@ -1821,8 +1820,8 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
// Create one basement node to contain all the leaf entries by
// setting up the single partition and updating the bfe.
update_bfe_using_ftnode(node, bfe);
struct ftnode_fetch_extra temp_bfe;
fill_bfe_for_full_read(&temp_bfe, bfe->ft);
ftnode_fetch_extra temp_bfe;
temp_bfe.create_for_full_read(bfe->ft);
setup_partitions_using_bfe(node, &temp_bfe, true);
// 11. Deserialize the partition maps, though they are not used in the
......@@ -1933,7 +1932,7 @@ static int
deserialize_and_upgrade_ftnode(FTNODE node,
FTNODE_DISK_DATA* ndd,
BLOCKNUM blocknum,
struct ftnode_fetch_extra* bfe,
ftnode_fetch_extra *bfe,
STAT64INFO info,
int fd)
{
......@@ -2023,7 +2022,7 @@ deserialize_ftnode_from_rbuf(
FTNODE_DISK_DATA* ndd,
BLOCKNUM blocknum,
uint32_t fullhash,
struct ftnode_fetch_extra* bfe,
ftnode_fetch_extra *bfe,
STAT64INFO info,
struct rbuf *rb,
int fd
......@@ -2120,7 +2119,6 @@ deserialize_ftnode_from_rbuf(
// now that the node info has been deserialized, we can proceed to deserialize
// the individual sub blocks
paranoid_invariant(is_valid_ftnode_fetch_type(bfe->type));
// setup the memory of the partitions
// for partitions being decompressed, create either message buffer or basement node
......@@ -2207,7 +2205,7 @@ deserialize_ftnode_from_rbuf(
}
int
toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe) {
toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe) {
int r = 0;
assert(BP_STATE(node,childnum) == PT_ON_DISK);
assert(node->bp[childnum].ptr.tag == BCT_NULL);
......@@ -2287,7 +2285,7 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
// Take a ftnode partition that is in the compressed state, and make it avail
int
toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe) {
toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe) {
int r = 0;
assert(BP_STATE(node, childnum) == PT_COMPRESSED);
SUB_BLOCK curr_sb = BSB(node, childnum);
......@@ -2332,7 +2330,7 @@ deserialize_ftnode_from_fd(int fd,
uint32_t fullhash,
FTNODE *ftnode,
FTNODE_DISK_DATA *ndd,
struct ftnode_fetch_extra *bfe,
ftnode_fetch_extra *bfe,
STAT64INFO info)
{
struct rbuf rb = RBUF_INITIALIZER;
......@@ -2361,7 +2359,7 @@ toku_deserialize_ftnode_from (int fd,
uint32_t fullhash,
FTNODE *ftnode,
FTNODE_DISK_DATA* ndd,
struct ftnode_fetch_extra* bfe
ftnode_fetch_extra *bfe
)
// Effect: Read a node in. If possible, read just the header.
{
......@@ -2864,8 +2862,8 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft)
FTNODE unused_node = NULL;
FTNODE_DISK_DATA unused_ndd = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft);
r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &unused_node, &unused_ndd,
&bfe, &ft->h->on_disk_stats);
ft->in_memory_stats = ft->h->on_disk_stats;
......@@ -2888,8 +2886,8 @@ toku_upgrade_msn_from_root_to_header(int fd, FT ft)
FTNODE node;
FTNODE_DISK_DATA ndd;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft);
r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr);
if (r != 0) {
goto exit;
......
......@@ -108,9 +108,9 @@ int toku_serialize_rollback_log_to(int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROL
void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT ft);
int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra *bfe);
int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, struct ftnode_fetch_extra *bfe);
int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, ftnode_fetch_extra *bfe);
int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe);
int toku_deserialize_ftnode_from(int fd, BLOCKNUM off, uint32_t fullhash, FTNODE *node, FTNODE_DISK_DATA *ndd, ftnode_fetch_extra *bfe);
// used by nonleaf node partial eviction
void toku_create_compressed_partition_from_available(FTNODE node, int childnum,
......
......@@ -114,12 +114,12 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
cursor->right_is_pos_infty = true;
cursor->disable_prefetching = false;
struct ftnode_fetch_extra bfe;
ftnode_fetch_extra bfe;
// quick test to see that we have the right behavior when we set
// disable_prefetching to true
cursor->disable_prefetching = true;
fill_bfe_for_prefetch(&bfe, ft_h, cursor);
bfe.create_for_prefetch( ft_h, cursor);
FTNODE_DISK_DATA ndd = NULL;
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
assert(r==0);
......@@ -131,14 +131,14 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_ON_DISK);
destroy_bfe_for_prefetch(&bfe);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
// now enable prefetching again
cursor->disable_prefetching = false;
fill_bfe_for_prefetch(&bfe, ft_h, cursor);
bfe.create_for_prefetch( ft_h, cursor);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
assert(r==0);
assert(dn->n_children == 3);
......@@ -153,14 +153,14 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
assert(BP_STATE(dn,0) == PT_AVAIL);
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_AVAIL);
destroy_bfe_for_prefetch(&bfe);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
uint64_t left_key = 150;
toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
cursor->left_is_neg_infty = false;
fill_bfe_for_prefetch(&bfe, ft_h, cursor);
bfe.create_for_prefetch( ft_h, cursor);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
assert(r==0);
assert(dn->n_children == 3);
......@@ -175,14 +175,14 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_AVAIL);
destroy_bfe_for_prefetch(&bfe);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
uint64_t right_key = 151;
toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
cursor->right_is_pos_infty = false;
fill_bfe_for_prefetch(&bfe, ft_h, cursor);
bfe.create_for_prefetch( ft_h, cursor);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
assert(r==0);
assert(dn->n_children == 3);
......@@ -197,13 +197,13 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_AVAIL);
assert(BP_STATE(dn,2) == PT_ON_DISK);
destroy_bfe_for_prefetch(&bfe);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
left_key = 100000;
right_key = 100000;
fill_bfe_for_prefetch(&bfe, ft_h, cursor);
bfe.create_for_prefetch( ft_h, cursor);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
assert(r==0);
assert(dn->n_children == 3);
......@@ -218,13 +218,13 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
destroy_bfe_for_prefetch(&bfe);
bfe.destroy();
toku_free(ndd);
toku_ftnode_free(&dn);
left_key = 100;
right_key = 100;
fill_bfe_for_prefetch(&bfe, ft_h, cursor);
bfe.create_for_prefetch( ft_h, cursor);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
assert(r==0);
assert(dn->n_children == 3);
......@@ -239,7 +239,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
assert(BP_STATE(dn,0) == PT_AVAIL);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_ON_DISK);
destroy_bfe_for_prefetch(&bfe);
bfe.destroy();
toku_ftnode_free(&dn);
toku_free(ndd);
......@@ -260,15 +260,14 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
cursor->left_is_neg_infty = true;
cursor->right_is_pos_infty = true;
struct ftnode_fetch_extra bfe;
uint64_t left_key = 150;
uint64_t right_key = 151;
DBT left, right;
toku_fill_dbt(&left, &left_key, sizeof(left_key));
toku_fill_dbt(&right, &right_key, sizeof(right_key));
fill_bfe_for_subset_read(
&bfe,
ftnode_fetch_extra bfe;
bfe.create_for_subset_read(
ft_h,
NULL,
&left,
......
......@@ -146,8 +146,8 @@ le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
static void
test1(int fd, FT ft_h, FTNODE *dn) {
int r;
struct ftnode_fetch_extra bfe_all;
fill_bfe_for_full_read(&bfe_all, ft_h);
ftnode_fetch_extra bfe_all;
bfe_all.create_for_full_read(ft_h);
FTNODE_DISK_DATA ndd = NULL;
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
bool is_leaf = ((*dn)->height == 0);
......@@ -217,7 +217,6 @@ static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
static void
test2(int fd, FT ft_h, FTNODE *dn) {
struct ftnode_fetch_extra bfe_subset;
DBT left, right;
DB dummy_db;
memset(&dummy_db, 0, sizeof(dummy_db));
......@@ -225,8 +224,8 @@ test2(int fd, FT ft_h, FTNODE *dn) {
memset(&right, 0, sizeof(right));
ft_search search;
fill_bfe_for_subset_read(
&bfe_subset,
ftnode_fetch_extra bfe_subset;
bfe_subset.create_for_subset_read(
ft_h,
ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
&left,
......@@ -236,6 +235,7 @@ test2(int fd, FT ft_h, FTNODE *dn) {
false,
false
);
FTNODE_DISK_DATA ndd = NULL;
int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
assert(r==0);
......@@ -270,17 +270,15 @@ test2(int fd, FT ft_h, FTNODE *dn) {
static void
test3_leaf(int fd, FT ft_h, FTNODE *dn) {
struct ftnode_fetch_extra bfe_min;
DBT left, right;
DB dummy_db;
memset(&dummy_db, 0, sizeof(dummy_db));
memset(&left, 0, sizeof(left));
memset(&right, 0, sizeof(right));
fill_bfe_for_min_read(
&bfe_min,
ft_h
);
ftnode_fetch_extra bfe_min;
bfe_min.create_for_min_read(ft_h);
FTNODE_DISK_DATA ndd = NULL;
int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
assert(r==0);
......
......@@ -247,9 +247,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
struct ftnode_fetch_extra bfe;
ftnode_fetch_extra bfe;
for (int i = 0; i < deser_runs; i++) {
fill_bfe_for_full_read(&bfe, ft_h);
bfe.create_for_full_read(ft_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
......@@ -392,8 +392,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
dt *= 1000;
printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_h);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
......
......@@ -165,14 +165,14 @@ static void
setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
int r;
if (bft == read_all) {
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft_h);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft_h);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
assert(r==0);
}
else if (bft == read_compressed || bft == read_none) {
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft_h);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft_h);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
assert(r==0);
// assert all bp's are compressed or on disk.
......@@ -199,7 +199,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_D
// that it is available
// then run partial eviction to get it compressed
PAIR_ATTR attr;
fill_bfe_for_full_read(&bfe, ft_h);
bfe.create_for_full_read(ft_h);
assert(toku_ftnode_pf_req_callback(*dn, &bfe));
r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
assert(r==0);
......@@ -221,7 +221,7 @@ setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_D
}
}
// now decompress them
fill_bfe_for_full_read(&bfe, ft_h);
bfe.create_for_full_read(ft_h);
assert(toku_ftnode_pf_req_callback(*dn, &bfe));
PAIR_ATTR attr;
r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
......
......@@ -227,8 +227,8 @@ doit (bool after_child_pin) {
);
FTNODE node = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
node_root,
......@@ -282,7 +282,7 @@ doit (bool after_child_pin) {
//
// now pin the root, verify that we have a message in there, and that it is clean
//
fill_bfe_for_full_read(&bfe, c_ft->ft);
bfe.create_for_full_read(c_ft->ft);
toku_pin_ftnode(
c_ft->ft,
node_root,
......
......@@ -245,8 +245,8 @@ doit (int state) {
toku_unpin_ftnode(t->ft, node);
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode_with_dep_nodes(
t->ft,
node_root,
......@@ -305,7 +305,7 @@ doit (int state) {
//
// now pin the root, verify that the state is what we expect
//
fill_bfe_for_full_read(&bfe, c_ft->ft);
bfe.create_for_full_read(c_ft->ft);
toku_pin_ftnode_with_dep_nodes(
c_ft->ft,
node_root,
......
......@@ -265,8 +265,8 @@ doit (int state) {
toku_unpin_ftnode(t->ft, node);
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
node_root,
......@@ -321,7 +321,7 @@ doit (int state) {
//
// now pin the root, verify that the state is what we expect
//
fill_bfe_for_full_read(&bfe, c_ft->ft);
bfe.create_for_full_read(c_ft->ft);
toku_pin_ftnode(
c_ft->ft,
node_root,
......
......@@ -241,8 +241,8 @@ doit (bool after_split) {
);
FTNODE node = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
node_root,
......@@ -297,7 +297,7 @@ doit (bool after_split) {
//
// now pin the root, verify that we have a message in there, and that it is clean
//
fill_bfe_for_full_read(&bfe, c_ft->ft);
bfe.create_for_full_read(c_ft->ft);
toku_pin_ftnode(
c_ft->ft,
node_root,
......
......@@ -237,8 +237,8 @@ doit (void) {
// now lock and release the leaf node to make sure it is what we expect it to be.
FTNODE node = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode_with_dep_nodes(
ft->ft,
node_leaf,
......@@ -268,7 +268,7 @@ doit (void) {
// node is in memory and another is
// on disk
//
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode_with_dep_nodes(
ft->ft,
node_leaf,
......@@ -289,7 +289,7 @@ doit (void) {
//
// now let us induce a clean on the internal node
//
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode_with_dep_nodes(
ft->ft,
node_internal,
......@@ -314,7 +314,7 @@ doit (void) {
);
// verify that node_internal's buffer is empty
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode_with_dep_nodes(
ft->ft,
node_internal,
......
......@@ -243,8 +243,8 @@ doit (bool keep_other_bn_in_memory) {
assert_zero(r);
// now lock and release the leaf node to make sure it is what we expect it to be.
FTNODE node = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode(
ft->ft,
node_leaf,
......@@ -280,7 +280,7 @@ doit (bool keep_other_bn_in_memory) {
// but only one should have broadcast message
// applied.
//
fill_bfe_for_full_read(&bfe, ft->ft);
bfe.create_for_full_read(ft->ft);
}
else {
//
......@@ -289,7 +289,7 @@ doit (bool keep_other_bn_in_memory) {
// node is in memory and another is
// on disk
//
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
}
toku_pin_ftnode(
ft->ft,
......@@ -314,7 +314,7 @@ doit (bool keep_other_bn_in_memory) {
//
// now let us induce a clean on the internal node
//
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode(
ft->ft,
node_internal,
......@@ -337,7 +337,7 @@ doit (bool keep_other_bn_in_memory) {
);
// verify that node_internal's buffer is empty
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode(
ft->ft,
node_internal,
......
......@@ -180,8 +180,8 @@ doit (void) {
// the root, one in each buffer, let's verify this.
FTNODE node = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
node_root,
......@@ -210,7 +210,7 @@ doit (void) {
// at this point, we have should have flushed
// only the middle buffer, let's verify this.
node = NULL;
fill_bfe_for_min_read(&bfe, t->ft);
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
node_root,
......
......@@ -229,8 +229,8 @@ doit (void) {
r = toku_ft_lookup(ft, toku_fill_dbt(&k, "a", 2), lookup_checkf, &pair);
assert(r==0);
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, ft->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode(
ft->ft,
node_internal,
......@@ -252,7 +252,7 @@ doit (void) {
);
// verify that node_internal's buffer is empty
fill_bfe_for_min_read(&bfe, ft->ft);
bfe.create_for_min_read(ft->ft);
toku_pin_ftnode(
ft->ft,
node_internal,
......
......@@ -167,8 +167,8 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
// first verify the child
FTNODE node = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_min_read(t->ft);
toku_pin_ftnode(
t->ft,
child_nonleaf_blocknum,
......
......@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
// it used to be the case that we copied the left and right keys of a
// range to be prelocked but never freed them, this test checks that they
// are freed (as of this time, this happens in destroy_bfe_for_prefetch)
// are freed (as of this time, this happens in ftnode_fetch_extra::destroy())
#include "test.h"
......
......@@ -90,7 +90,7 @@ PATENT RIGHTS GRANT:
// it used to be the case that we copied the left and right keys of a
// range to be prelocked but never freed them, this test checks that they
// are freed (as of this time, this happens in destroy_bfe_for_prefetch)
// are freed (as of this time, this happens in ftnode_fetch_extra::destroy())
#include "test.h"
......
......@@ -147,8 +147,8 @@ doit (void) {
// then node_internal should be huge
// we pin it and verify that it is not
FTNODE node;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, t->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(t->ft);
toku_pin_ftnode(
t->ft,
node_internal,
......
......@@ -143,8 +143,8 @@ static void test_split_merge(void) {
BLOCKNUM root_blocknum = ft->h->root_blocknum;
FTNODE root_node;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
toku_pin_ftnode(ft, root_blocknum,
toku_cachetable_hash(ft->cf, ft->h->root_blocknum),
&bfe, PL_WRITE_EXPENSIVE, &root_node, true);
......
......@@ -229,9 +229,9 @@ static int print_le(const void* key, const uint32_t keylen, const LEAFENTRY &le,
static void dump_node(int fd, BLOCKNUM blocknum, FT ft) {
FTNODE n;
struct ftnode_fetch_extra bfe;
FTNODE_DISK_DATA ndd = NULL;
fill_bfe_for_full_read(&bfe, ft);
FTNODE_DISK_DATA ndd = nullptr;
ftnode_fetch_extra bfe;
bfe.create_for_full_read(ft);
int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe);
assert_zero(r);
assert(n!=0);
......@@ -366,8 +366,8 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void
frag_help_extra *CAST_FROM_VOIDP(info, extra);
FTNODE n;
FTNODE_DISK_DATA ndd = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, info->ft);
ftnode_fetch_extra bfe;
bfe.create_for_full_read(info->ft);
int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe);
if (r==0) {
info->blocksizes += size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment