Commit 80b8bb97 authored by John Esmet's avatar John Esmet Committed by Yoni Fogel

refs #5710 add counters for the number of bytes read and time spent doing IO,...

refs #5710 add counters for the number of bytes read and time spent doing IO, for the various ways you can do a fetch. improve the name of get_tokutime() and get rid of the now defunct toku_current_time_nanoseconds()


git-svn-id: file:///svn/toku/tokudb@50481 c7de825b-a66e-492c-adef-691d508d4ae1
parent a8798833
...@@ -91,6 +91,9 @@ struct ftnode_fetch_extra { ...@@ -91,6 +91,9 @@ struct ftnode_fetch_extra {
// this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
int child_to_read; int child_to_read;
// Accounting: How many bytes were fetched, and how much time did it take?
tokutime_t bytes_read;
uint64_t read_time;
}; };
struct toku_fifo_entry_key_msn_heaviside_extra { struct toku_fifo_entry_key_msn_heaviside_extra {
...@@ -718,6 +721,8 @@ static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) ...@@ -718,6 +721,8 @@ static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h)
bfe->right_is_pos_infty = false; bfe->right_is_pos_infty = false;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = false; bfe->disable_prefetching = false;
bfe->bytes_read = 0;
bfe->read_time = 0;
} }
// //
...@@ -747,6 +752,8 @@ static inline void fill_bfe_for_subset_read( ...@@ -747,6 +752,8 @@ static inline void fill_bfe_for_subset_read(
bfe->right_is_pos_infty = right_is_pos_infty; bfe->right_is_pos_infty = right_is_pos_infty;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = disable_prefetching; bfe->disable_prefetching = disable_prefetching;
bfe->bytes_read = 0;
bfe->read_time = 0;
} }
// //
...@@ -766,6 +773,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) { ...@@ -766,6 +773,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
bfe->right_is_pos_infty = false; bfe->right_is_pos_infty = false;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = false; bfe->disable_prefetching = false;
bfe->bytes_read = 0;
bfe->read_time = 0;
} }
static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) { static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
...@@ -812,6 +821,8 @@ static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe, ...@@ -812,6 +821,8 @@ static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
bfe->right_is_pos_infty = c->right_is_pos_infty; bfe->right_is_pos_infty = c->right_is_pos_infty;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = c->disable_prefetching; bfe->disable_prefetching = c->disable_prefetching;
bfe->bytes_read = 0;
bfe->read_time = 0;
} }
struct ancestors { struct ancestors {
...@@ -973,11 +984,9 @@ typedef enum { ...@@ -973,11 +984,9 @@ typedef enum {
FT_CREATE_NONLEAF, // number of nonleaf nodes created FT_CREATE_NONLEAF, // number of nonleaf nodes created
FT_DESTROY_LEAF, // number of leaf nodes destroyed FT_DESTROY_LEAF, // number of leaf nodes destroyed
FT_DESTROY_NONLEAF, // number of nonleaf nodes destroyed FT_DESTROY_NONLEAF, // number of nonleaf nodes destroyed
FT_MSG_KEYVAL_BYTES_IN, // how many bytes of keyval data ingested by the tree (all tree, no overhead counted)
FT_MSG_BYTES_IN, // how many bytes of messages injected at root (for all trees) FT_MSG_BYTES_IN, // how many bytes of messages injected at root (for all trees)
FT_MSG_BYTES_OUT, // how many bytes of messages flushed from h1 nodes to leaves FT_MSG_BYTES_OUT, // how many bytes of messages flushed from h1 nodes to leaves
FT_MSG_BYTES_CURR, // how many bytes of messages currently in trees (estimate) FT_MSG_BYTES_CURR, // how many bytes of messages currently in trees (estimate)
//FT_MSG_BYTES_MAX, // how many bytes of messages currently in trees (estimate)
FT_MSG_NUM, // how many messages injected at root FT_MSG_NUM, // how many messages injected at root
FT_MSG_NUM_BROADCAST, // how many broadcast messages injected at root FT_MSG_NUM_BROADCAST, // how many broadcast messages injected at root
FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, // how many basement nodes were decompressed because they were the target of a query FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, // how many basement nodes were decompressed because they were the target of a query
...@@ -989,16 +998,38 @@ typedef enum { ...@@ -989,16 +998,38 @@ typedef enum {
FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH,
FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE,
FT_NUM_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query FT_NUM_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
FT_BYTES_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
FT_NANOTIME_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
FT_NUM_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch FT_NUM_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
FT_BYTES_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
FT_NANOTIME_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
FT_NUM_PIVOTS_FETCHED_WRITE, // ... for a write FT_NUM_PIVOTS_FETCHED_WRITE, // ... for a write
FT_BYTES_PIVOTS_FETCHED_WRITE, // ... for a write
FT_NANOTIME_PIVOTS_FETCHED_WRITE, // ... for a write
FT_NUM_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query FT_NUM_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
FT_BYTES_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
FT_NANOTIME_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
FT_NANOTIME_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
FT_NUM_BASEMENTS_FETCHED_PREFETCH, FT_NUM_BASEMENTS_FETCHED_PREFETCH,
FT_BYTES_BASEMENTS_FETCHED_PREFETCH,
FT_NANOTIME_BASEMENTS_FETCHED_PREFETCH,
FT_NUM_BASEMENTS_FETCHED_WRITE, FT_NUM_BASEMENTS_FETCHED_WRITE,
FT_BYTES_BASEMENTS_FETCHED_WRITE,
FT_NANOTIME_BASEMENTS_FETCHED_WRITE,
FT_NUM_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query FT_NUM_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
FT_NANOTIME_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
FT_NANOTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
FT_NUM_MSG_BUFFER_FETCHED_PREFETCH, FT_NUM_MSG_BUFFER_FETCHED_PREFETCH,
FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH,
FT_NANOTIME_MSG_BUFFER_FETCHED_PREFETCH,
FT_NUM_MSG_BUFFER_FETCHED_WRITE, FT_NUM_MSG_BUFFER_FETCHED_WRITE,
FT_BYTES_MSG_BUFFER_FETCHED_WRITE,
FT_NANOTIME_MSG_BUFFER_FETCHED_WRITE,
FT_PRO_NUM_ROOT_SPLIT, FT_PRO_NUM_ROOT_SPLIT,
FT_PRO_NUM_ROOT_H0_INJECT, FT_PRO_NUM_ROOT_H0_INJECT,
FT_PRO_NUM_ROOT_H1_INJECT, FT_PRO_NUM_ROOT_H1_INJECT,
......
This diff is collapsed.
...@@ -1132,7 +1132,7 @@ static const int read_header_heuristic_max = 32*1024; ...@@ -1132,7 +1132,7 @@ static const int read_header_heuristic_max = 32*1024;
#define MIN(a,b) (((a)>(b)) ? (b) : (a)) #define MIN(a,b) (((a)>(b)) ? (b) : (a))
#endif #endif
static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT h, struct rbuf *rb) static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT h, struct rbuf *rb, struct ftnode_fetch_extra *bfe)
// Effect: If the header part of the node is small enough, then read it into the rbuf. The rbuf will be allocated to be big enough in any case. // Effect: If the header part of the node is small enough, then read it into the rbuf. The rbuf will be allocated to be big enough in any case.
{ {
DISKOFF offset, size; DISKOFF offset, size;
...@@ -1142,11 +1142,15 @@ static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKN ...@@ -1142,11 +1142,15 @@ static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKN
rbuf_init(rb, raw_block, read_size); rbuf_init(rb, raw_block, read_size);
{ {
// read the block // read the block
tokutime_t io_t0 = toku_time_now();
ssize_t rlen = toku_os_pread(fd, raw_block, read_size, offset); ssize_t rlen = toku_os_pread(fd, raw_block, read_size, offset);
tokutime_t io_t1 = toku_time_now();
assert(rlen>=0); assert(rlen>=0);
rbuf_init(rb, raw_block, rlen); rbuf_init(rb, raw_block, rlen);
bfe->bytes_read = rlen;
bfe->read_time = io_t1 - io_t0;
toku_ft_status_update_pivot_fetch_reason(bfe);
} }
} }
// //
...@@ -1590,9 +1594,6 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode, ...@@ -1590,9 +1594,6 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
goto cleanup; goto cleanup;
} }
// We got the entire header and node info!
toku_ft_status_update_pivot_fetch_reason(bfe);
// Finish reading compressed the sub_block // Finish reading compressed the sub_block
bytevec* cp; bytevec* cp;
cp = (bytevec*)&sb_node_info.compressed_ptr; cp = (bytevec*)&sb_node_info.compressed_ptr;
...@@ -2409,8 +2410,7 @@ deserialize_ftnode_from_fd(int fd, ...@@ -2409,8 +2410,7 @@ deserialize_ftnode_from_fd(int fd,
STAT64INFO info) STAT64INFO info)
{ {
struct rbuf rb = RBUF_INITIALIZER; struct rbuf rb = RBUF_INITIALIZER;
read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb); read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb);
int r = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd); int r = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd);
if (r != 0) { if (r != 0) {
dump_bad_block(rb.buf,rb.size); dump_bad_block(rb.buf,rb.size);
...@@ -2433,7 +2433,7 @@ toku_deserialize_ftnode_from (int fd, ...@@ -2433,7 +2433,7 @@ toku_deserialize_ftnode_from (int fd,
toku_trace("deserial start"); toku_trace("deserial start");
int r = 0; int r = 0;
struct rbuf rb = RBUF_INITIALIZER; struct rbuf rb = RBUF_INITIALIZER;
read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb); read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb, bfe);
r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd); r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd);
if (r != 0) { if (r != 0) {
......
...@@ -15,7 +15,7 @@ add_library(${LIBTOKUPORTABILITY}_static STATIC ${tokuportability_srcs}) ...@@ -15,7 +15,7 @@ add_library(${LIBTOKUPORTABILITY}_static STATIC ${tokuportability_srcs})
maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} ${LIBTOKUPORTABILITY}_static) maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} ${LIBTOKUPORTABILITY}_static)
set_property(TARGET ${LIBTOKUPORTABILITY} ${LIBTOKUPORTABILITY}_static APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE) set_property(TARGET ${LIBTOKUPORTABILITY} ${LIBTOKUPORTABILITY}_static APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE)
set_target_properties(${LIBTOKUPORTABILITY}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${LIBTOKUPORTABILITY}_static PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC rt ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
set_property(SOURCE file memory os_malloc portability toku_assert toku_rwlock APPEND PROPERTY set_property(SOURCE file memory os_malloc portability toku_assert toku_rwlock APPEND PROPERTY
COMPILE_DEFINITIONS TOKU_ALLOW_DEPRECATED=1) COMPILE_DEFINITIONS TOKU_ALLOW_DEPRECATED=1)
......
...@@ -82,8 +82,8 @@ typedef uint64_t tokutime_t; // Time type used in by tokutek timers. ...@@ -82,8 +82,8 @@ typedef uint64_t tokutime_t; // Time type used in by tokutek timers.
// //
double tokutime_to_seconds(tokutime_t) __attribute__((__visibility__("default"))); // Convert tokutime to seconds. double tokutime_to_seconds(tokutime_t) __attribute__((__visibility__("default"))); // Convert tokutime to seconds.
// Get tokutime. We want this to be fast, so we expose the implementation as RDTSC. // Get the value of tokutime for right now. We want this to be fast, so we expose the implementation as RDTSC.
static inline tokutime_t get_tokutime (void) { static inline tokutime_t toku_time_now(void) {
uint32_t lo, hi; uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo; return (uint64_t)hi << 32 | lo;
...@@ -95,11 +95,4 @@ static inline uint64_t toku_current_time_microsec(void) { ...@@ -95,11 +95,4 @@ static inline uint64_t toku_current_time_microsec(void) {
return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec; return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec;
} }
static inline uint64_t toku_current_time_nanosec(void) {
struct timespec t;
int r = toku_clock_gettime(CLOCK_REALTIME, &t);
assert(r == 0);
return t.tv_sec * (1UL * 1000 * 1000 * 1000) + t.tv_nsec;
}
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment