Commit df283598 authored by Rich Prohaska's avatar Rich Prohaska

#69 measure long tail lock tree and cache table stalls

parent 2004b5bc
...@@ -558,6 +558,10 @@ private: ...@@ -558,6 +558,10 @@ private:
PARTITIONED_COUNTER m_size_leaf; PARTITIONED_COUNTER m_size_leaf;
PARTITIONED_COUNTER m_size_rollback; PARTITIONED_COUNTER m_size_rollback;
PARTITIONED_COUNTER m_size_cachepressure; PARTITIONED_COUNTER m_size_cachepressure;
PARTITIONED_COUNTER m_wait_pressure_count;
PARTITIONED_COUNTER m_wait_pressure_time;
PARTITIONED_COUNTER m_long_wait_pressure_count;
PARTITIONED_COUNTER m_long_wait_pressure_time;
KIBBUTZ m_kibbutz; KIBBUTZ m_kibbutz;
......
...@@ -146,6 +146,11 @@ status_init(void) { ...@@ -146,6 +146,11 @@ status_init(void) {
STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "long time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
ct_status.initialized = true; ct_status.initialized = true;
} }
#undef STATUS_INIT #undef STATUS_INIT
...@@ -3652,6 +3657,10 @@ void evictor::init(long _size_limit, pair_list* _pl, KIBBUTZ _kibbutz, uint32_t ...@@ -3652,6 +3657,10 @@ void evictor::init(long _size_limit, pair_list* _pl, KIBBUTZ _kibbutz, uint32_t
m_size_leaf = create_partitioned_counter(); m_size_leaf = create_partitioned_counter();
m_size_rollback = create_partitioned_counter(); m_size_rollback = create_partitioned_counter();
m_size_cachepressure = create_partitioned_counter(); m_size_cachepressure = create_partitioned_counter();
m_wait_pressure_count = create_partitioned_counter();
m_wait_pressure_time = create_partitioned_counter();
m_long_wait_pressure_count = create_partitioned_counter();
m_long_wait_pressure_time = create_partitioned_counter();
m_pl = _pl; m_pl = _pl;
m_kibbutz = _kibbutz; m_kibbutz = _kibbutz;
...@@ -3706,6 +3715,11 @@ void evictor::destroy() { ...@@ -3706,6 +3715,11 @@ void evictor::destroy() {
destroy_partitioned_counter(m_size_cachepressure); destroy_partitioned_counter(m_size_cachepressure);
m_size_cachepressure = NULL; m_size_cachepressure = NULL;
destroy_partitioned_counter(m_wait_pressure_count); m_wait_pressure_count = NULL;
destroy_partitioned_counter(m_wait_pressure_time); m_wait_pressure_time = NULL;
destroy_partitioned_counter(m_long_wait_pressure_count); m_long_wait_pressure_count = NULL;
destroy_partitioned_counter(m_long_wait_pressure_time); m_long_wait_pressure_time = NULL;
toku_cond_destroy(&m_flow_control_cond); toku_cond_destroy(&m_flow_control_cond);
toku_cond_destroy(&m_ev_thread_cond); toku_cond_destroy(&m_ev_thread_cond);
toku_mutex_destroy(&m_ev_thread_lock); toku_mutex_destroy(&m_ev_thread_lock);
...@@ -4173,13 +4187,21 @@ void evictor::decrease_size_evicting(long size_evicting_estimate) { ...@@ -4173,13 +4187,21 @@ void evictor::decrease_size_evicting(long size_evicting_estimate) {
// size_evicting is number of bytes queued up to be evicted // size_evicting is number of bytes queued up to be evicted
// //
void evictor::wait_for_cache_pressure_to_subside() { void evictor::wait_for_cache_pressure_to_subside() {
uint64_t t0 = toku_current_time_microsec();
toku_mutex_lock(&m_ev_thread_lock); toku_mutex_lock(&m_ev_thread_lock);
m_num_sleepers++; m_num_sleepers++;
this->signal_eviction_thread(); this->signal_eviction_thread();
toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock); toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock);
m_num_sleepers--; m_num_sleepers--;
toku_mutex_unlock(&m_ev_thread_lock); toku_mutex_unlock(&m_ev_thread_lock);
uint64_t t1 = toku_current_time_microsec();
increment_partitioned_counter(m_wait_pressure_count, 1);
uint64_t tdelta = t1 - t0;
increment_partitioned_counter(m_wait_pressure_time, tdelta);
if (tdelta > 1000000) {
increment_partitioned_counter(m_long_wait_pressure_count, 1);
increment_partitioned_counter(m_long_wait_pressure_time, tdelta);
}
} }
// //
...@@ -4267,6 +4289,10 @@ void evictor::fill_engine_status() { ...@@ -4267,6 +4289,10 @@ void evictor::fill_engine_status() {
STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf); STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf);
STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback); STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback);
STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure); STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure);
STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count);
STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time);
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count);
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
......
...@@ -609,6 +609,10 @@ typedef enum { ...@@ -609,6 +609,10 @@ typedef enum {
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
CT_CLEANER_PERIOD, CT_CLEANER_PERIOD,
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
CT_WAIT_PRESSURE_COUNT,
CT_WAIT_PRESSURE_TIME,
CT_LONG_WAIT_PRESSURE_COUNT,
CT_LONG_WAIT_PRESSURE_TIME,
CT_STATUS_NUM_ROWS CT_STATUS_NUM_ROWS
} ct_status_entry; } ct_status_entry;
......
...@@ -120,6 +120,10 @@ enum { ...@@ -120,6 +120,10 @@ enum {
LTM_LONG_WAIT_COUNT, LTM_LONG_WAIT_COUNT,
LTM_LONG_WAIT_TIME, LTM_LONG_WAIT_TIME,
LTM_TIMEOUT_COUNT, LTM_TIMEOUT_COUNT,
LTM_WAIT_ESCALATION_COUNT,
LTM_WAIT_ESCALATION_TIME,
LTM_LONG_WAIT_ESCALATION_COUNT,
LTM_LONG_WAIT_ESCALATION_TIME,
LTM_STATUS_NUM_ROWS // must be last LTM_STATUS_NUM_ROWS // must be last
}; };
...@@ -306,6 +310,8 @@ public: ...@@ -306,6 +310,8 @@ public:
void *extra); void *extra);
int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra); int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra);
void escalation_wait(uint64_t t);
private: private:
static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024; static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024;
static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0; static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0;
...@@ -319,6 +325,10 @@ public: ...@@ -319,6 +325,10 @@ public:
uint64_t m_escalation_count; uint64_t m_escalation_count;
tokutime_t m_escalation_time; tokutime_t m_escalation_time;
uint64_t m_escalation_latest_result; uint64_t m_escalation_latest_result;
uint64_t m_wait_escalation_count;
uint64_t m_wait_escalation_time;
uint64_t m_long_wait_escalation_count;
uint64_t m_long_wait_escalation_time;
struct lt_counters m_lt_counters; struct lt_counters m_lt_counters;
......
...@@ -104,6 +104,10 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, ...@@ -104,6 +104,10 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb,
m_current_lock_memory = 0; m_current_lock_memory = 0;
m_escalation_count = 0; m_escalation_count = 0;
m_escalation_time = 0; m_escalation_time = 0;
m_wait_escalation_count = 0;
m_wait_escalation_time = 0;
m_long_wait_escalation_count = 0;
m_long_wait_escalation_time = 0;
m_escalation_latest_result = 0; m_escalation_latest_result = 0;
m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME; m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME;
m_mem_tracker.set_manager(this); m_mem_tracker.set_manager(this);
...@@ -350,6 +354,7 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) { ...@@ -350,6 +354,7 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) {
// mutex and check again. if we're still out of locks, run escalation. // mutex and check again. if we're still out of locks, run escalation.
// return an error if we're still out of locks after escalation. // return an error if we're still out of locks after escalation.
if (out_of_locks()) { if (out_of_locks()) {
uint64_t t0 = toku_current_time_microsec();
m_mgr->mutex_lock(); m_mgr->mutex_lock();
if (out_of_locks()) { if (out_of_locks()) {
m_mgr->run_escalation(); m_mgr->run_escalation();
...@@ -357,6 +362,8 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) { ...@@ -357,6 +362,8 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) {
r = TOKUDB_OUT_OF_LOCKS; r = TOKUDB_OUT_OF_LOCKS;
} }
} }
uint64_t t1 = toku_current_time_microsec();
m_mgr->escalation_wait(t1 - t0);
m_mgr->mutex_unlock(); m_mgr->mutex_unlock();
} }
return r; return r;
...@@ -404,6 +411,15 @@ int locktree::manager::iterate_pending_lock_requests( ...@@ -404,6 +411,15 @@ int locktree::manager::iterate_pending_lock_requests(
return r; return r;
} }
void locktree::manager::escalation_wait(uint64_t t) {
m_wait_escalation_count += 1;
m_wait_escalation_time += t;
if (t >= 1000000) {
m_long_wait_escalation_count += 1;
m_long_wait_escalation_time += t;
}
}
#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc) #define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc)
void locktree::manager::status_init(void) { void locktree::manager::status_init(void) {
...@@ -420,10 +436,15 @@ void locktree::manager::status_init(void) { ...@@ -420,10 +436,15 @@ void locktree::manager::status_init(void) {
STATUS_INIT(LTM_WAIT_COUNT, LOCKTREE_WAIT_COUNT, UINT64, "number of wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_WAIT_COUNT, LOCKTREE_WAIT_COUNT, UINT64, "number of wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_TIME, LOCKTREE_WAIT_TIME, UINT64, "time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_WAIT_TIME, LOCKTREE_WAIT_TIME, UINT64, "time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_TIME, LOCKTREE_LONG_WAIT_TIME, UINT64, "long time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_LONG_WAIT_TIME, LOCKTREE_LONG_WAIT_TIME, UINT64, "long time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_TIMEOUT_COUNT, LOCKTREE_TIMEOUT_COUNT, UINT64, "number of lock timeouts", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(LTM_TIMEOUT_COUNT, LOCKTREE_TIMEOUT_COUNT, UINT64, "number of lock timeouts", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_ESCALATION_COUNT, LOCKTREE_WAIT_ESCALATION_COUNT, UINT64, "number of waits on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_ESCALATION_TIME, LOCKTREE_WAIT_ESCALATION_TIME, UINT64, "time waiting on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_ESCALATION_COUNT, LOCKTREE_LONG_WAIT_ESCALATION_COUNT, UINT64, "number of long waits on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_ESCALATION_TIME, LOCKTREE_LONG_WAIT_ESCALATION_TIME, UINT64, "long time waiting on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
status.initialized = true; status.initialized = true;
} }
...@@ -441,6 +462,10 @@ void locktree::manager::get_status(LTM_STATUS statp) { ...@@ -441,6 +462,10 @@ void locktree::manager::get_status(LTM_STATUS statp) {
STATUS_VALUE(LTM_ESCALATION_COUNT) = m_escalation_count; STATUS_VALUE(LTM_ESCALATION_COUNT) = m_escalation_count;
STATUS_VALUE(LTM_ESCALATION_TIME) = m_escalation_time; STATUS_VALUE(LTM_ESCALATION_TIME) = m_escalation_time;
STATUS_VALUE(LTM_ESCALATION_LATEST_RESULT) = m_escalation_latest_result; STATUS_VALUE(LTM_ESCALATION_LATEST_RESULT) = m_escalation_latest_result;
STATUS_VALUE(LTM_WAIT_ESCALATION_COUNT) = m_wait_escalation_count;
STATUS_VALUE(LTM_WAIT_ESCALATION_TIME) = m_wait_escalation_time;
STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_COUNT) = m_long_wait_escalation_count;
STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_TIME) = m_long_wait_escalation_time;
mutex_lock(); mutex_lock();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment