Commit e44c7d7a authored by Barry Perlman's avatar Barry Perlman Committed by Yoni Fogel

[t:4362] Add more checkpoint info to engine status. Refs #4362.

git-svn-id: file:///svn/toku/tokudb@38544 c7de825b-a66e-492c-adef-691d508d4ae1
parent 3d751acc
...@@ -57,7 +57,7 @@ ...@@ -57,7 +57,7 @@
#include "logger.h" #include "logger.h"
#include "checkpoint.h" #include "checkpoint.h"
static CHECKPOINT_STATUS_S status; static CHECKPOINT_STATUS_S cp_status;
static LSN last_completed_checkpoint_lsn; static LSN last_completed_checkpoint_lsn;
static toku_pthread_rwlock_t checkpoint_safe_lock; static toku_pthread_rwlock_t checkpoint_safe_lock;
...@@ -68,7 +68,8 @@ static void (*ydb_lock)(void) = NULL; ...@@ -68,7 +68,8 @@ static void (*ydb_lock)(void) = NULL;
static void (*ydb_unlock)(void) = NULL; static void (*ydb_unlock)(void) = NULL;
static BOOL initialized = FALSE; // sanity check static BOOL initialized = FALSE; // sanity check
static BOOL locked_mo = FALSE; // true when the multi_operation write lock is held (by checkpoint)
static BOOL locked_cs = FALSE; // true when the checkpoint_safe write lock is held (by checkpoint)
// Note following static functions are called from checkpoint internal logic only, // Note following static functions are called from checkpoint internal logic only,
...@@ -83,6 +84,7 @@ multi_operation_lock_init(void) { ...@@ -83,6 +84,7 @@ multi_operation_lock_init(void) {
int r = toku_pthread_rwlock_init(&multi_operation_lock, &attr); int r = toku_pthread_rwlock_init(&multi_operation_lock, &attr);
pthread_rwlockattr_destroy(&attr); pthread_rwlockattr_destroy(&attr);
assert(r == 0); assert(r == 0);
locked_mo = FALSE;
return r; return r;
} }
...@@ -97,12 +99,14 @@ static void ...@@ -97,12 +99,14 @@ static void
multi_operation_checkpoint_lock(void) { multi_operation_checkpoint_lock(void) {
int r = toku_pthread_rwlock_wrlock(&multi_operation_lock); int r = toku_pthread_rwlock_wrlock(&multi_operation_lock);
assert(r == 0); assert(r == 0);
locked_mo = TRUE;
} }
static void static void
multi_operation_checkpoint_unlock(void) { multi_operation_checkpoint_unlock(void) {
int r = toku_pthread_rwlock_wrunlock(&multi_operation_lock); int r = toku_pthread_rwlock_wrunlock(&multi_operation_lock);
assert(r == 0); assert(r == 0);
locked_mo = FALSE;
} }
...@@ -110,6 +114,7 @@ static int ...@@ -110,6 +114,7 @@ static int
checkpoint_safe_lock_init(void) { checkpoint_safe_lock_init(void) {
int r = toku_pthread_rwlock_init(&checkpoint_safe_lock, NULL); int r = toku_pthread_rwlock_init(&checkpoint_safe_lock, NULL);
assert(r == 0); assert(r == 0);
locked_cs = FALSE;
return r; return r;
} }
...@@ -124,12 +129,14 @@ static void ...@@ -124,12 +129,14 @@ static void
checkpoint_safe_checkpoint_lock(void) { checkpoint_safe_checkpoint_lock(void) {
int r = toku_pthread_rwlock_wrlock(&checkpoint_safe_lock); int r = toku_pthread_rwlock_wrlock(&checkpoint_safe_lock);
assert(r == 0); assert(r == 0);
locked_cs = TRUE;
} }
static void static void
checkpoint_safe_checkpoint_unlock(void) { checkpoint_safe_checkpoint_unlock(void) {
int r = toku_pthread_rwlock_wrunlock(&checkpoint_safe_lock); int r = toku_pthread_rwlock_wrunlock(&checkpoint_safe_lock);
assert(r == 0); assert(r == 0);
locked_cs = FALSE;
} }
...@@ -138,6 +145,8 @@ checkpoint_safe_checkpoint_unlock(void) { ...@@ -138,6 +145,8 @@ checkpoint_safe_checkpoint_unlock(void) {
void void
toku_multi_operation_client_lock(void) { toku_multi_operation_client_lock(void) {
if (locked_mo)
(void) __sync_fetch_and_add(&cp_status.client_wait_on_mo, 1);
int r = toku_pthread_rwlock_rdlock(&multi_operation_lock); int r = toku_pthread_rwlock_rdlock(&multi_operation_lock);
assert(r == 0); assert(r == 0);
} }
...@@ -150,6 +159,8 @@ toku_multi_operation_client_unlock(void) { ...@@ -150,6 +159,8 @@ toku_multi_operation_client_unlock(void) {
void void
toku_checkpoint_safe_client_lock(void) { toku_checkpoint_safe_client_lock(void) {
if (locked_cs)
(void) __sync_fetch_and_add(&cp_status.client_wait_on_cs, 1);
int r = toku_pthread_rwlock_rdlock(&checkpoint_safe_lock); int r = toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);
assert(r == 0); assert(r == 0);
toku_multi_operation_client_lock(); toku_multi_operation_client_lock();
...@@ -165,7 +176,7 @@ toku_checkpoint_safe_client_unlock(void) { ...@@ -165,7 +176,7 @@ toku_checkpoint_safe_client_unlock(void) {
void void
toku_checkpoint_get_status(CHECKPOINT_STATUS s) { toku_checkpoint_get_status(CHECKPOINT_STATUS s) {
*s = status; *s = cp_status;
} }
...@@ -196,7 +207,7 @@ toku_checkpoint_destroy(void) { ...@@ -196,7 +207,7 @@ toku_checkpoint_destroy(void) {
return r; return r;
} }
#define SET_CHECKPOINT_FOOTPRINT(x) status.footprint = footprint_offset + x; #define SET_CHECKPOINT_FOOTPRINT(x) cp_status.footprint = footprint_offset + x;
// Take a checkpoint of all currently open dictionaries // Take a checkpoint of all currently open dictionaries
...@@ -209,19 +220,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -209,19 +220,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
int footprint_offset = (int) caller_id * 1000; int footprint_offset = (int) caller_id * 1000;
assert(initialized); assert(initialized);
(void) __sync_fetch_and_add(&status.waiters_now, 1); (void) __sync_fetch_and_add(&cp_status.waiters_now, 1);
if (locked_cs) {
if (caller_id == SCHEDULED_CHECKPOINT)
(void) __sync_fetch_and_add(&cp_status.cp_wait_sched, 1);
else if (caller_id == CLIENT_CHECKPOINT)
(void) __sync_fetch_and_add(&cp_status.cp_wait_client, 1);
else if (caller_id == TXN_COMMIT_CHECKPOINT)
(void) __sync_fetch_and_add(&cp_status.cp_wait_txn, 1);
else
(void) __sync_fetch_and_add(&cp_status.cp_wait_other, 1);
}
checkpoint_safe_checkpoint_lock(); checkpoint_safe_checkpoint_lock();
(void) __sync_fetch_and_sub(&status.waiters_now, 1); (void) __sync_fetch_and_sub(&cp_status.waiters_now, 1);
if (status.waiters_now > status.waiters_max) if (cp_status.waiters_now > cp_status.waiters_max)
status.waiters_max = status.waiters_now; // threadsafe, within checkpoint_safe lock cp_status.waiters_max = cp_status.waiters_now; // threadsafe, within checkpoint_safe lock
SET_CHECKPOINT_FOOTPRINT(10) SET_CHECKPOINT_FOOTPRINT(10)
multi_operation_checkpoint_lock(); multi_operation_checkpoint_lock();
SET_CHECKPOINT_FOOTPRINT(20) SET_CHECKPOINT_FOOTPRINT(20)
ydb_lock(); ydb_lock();
SET_CHECKPOINT_FOOTPRINT(30) SET_CHECKPOINT_FOOTPRINT(30)
status.time_last_checkpoint_begin = time(NULL); cp_status.time_last_checkpoint_begin = time(NULL);
r = toku_cachetable_begin_checkpoint(ct, logger); r = toku_cachetable_begin_checkpoint(ct, logger);
multi_operation_checkpoint_unlock(); multi_operation_checkpoint_unlock();
...@@ -237,30 +258,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -237,30 +258,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
if (r==0 && logger) { if (r==0 && logger) {
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn; last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn); r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
status.last_lsn = last_completed_checkpoint_lsn.lsn; cp_status.last_lsn = last_completed_checkpoint_lsn.lsn;
} }
SET_CHECKPOINT_FOOTPRINT(60); SET_CHECKPOINT_FOOTPRINT(60);
status.time_last_checkpoint_end = time(NULL); cp_status.time_last_checkpoint_end = time(NULL);
status.time_last_checkpoint_begin_complete = status.time_last_checkpoint_begin; cp_status.time_last_checkpoint_begin_complete = cp_status.time_last_checkpoint_begin;
if (r == 0) if (r == 0)
status.checkpoint_count++; cp_status.checkpoint_count++;
else else
status.checkpoint_count_fail++; cp_status.checkpoint_count_fail++;
status.footprint = 0; cp_status.footprint = 0;
checkpoint_safe_checkpoint_unlock(); checkpoint_safe_checkpoint_unlock();
return r; return r;
} }
#undef SET_CHECKPOINT_FOOTPRINT #undef SET_CHECKPOINT_FOOTPRINT
// Can we get rid of this (placating drd), now that all status is updated when holding the checkpoint_safe lock?
#include <valgrind/drd.h> #include <valgrind/drd.h>
void __attribute__((__constructor__)) toku_checkpoint_drd_ignore(void); void __attribute__((__constructor__)) toku_checkpoint_drd_ignore(void);
void void
toku_checkpoint_drd_ignore(void) { toku_checkpoint_drd_ignore(void) {
DRD_IGNORE_VAR(status); DRD_IGNORE_VAR(cp_status);
} }
...@@ -96,6 +96,12 @@ typedef struct { ...@@ -96,6 +96,12 @@ typedef struct {
uint64_t checkpoint_count_fail; uint64_t checkpoint_count_fail;
uint64_t waiters_now; // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint uint64_t waiters_now; // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint
uint64_t waiters_max; // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint uint64_t waiters_max; // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
uint64_t client_wait_on_mo; // how many times a client thread waited for the multi_operation lock
uint64_t client_wait_on_cs; // how many times a client thread waited for the checkpoint_safe lock
uint64_t cp_wait_sched; // how many times a scheduled checkpoint waited for the checkpoint_safe lock
uint64_t cp_wait_client; // how many times a client checkpoint waited for the checkpoint_safe lock
uint64_t cp_wait_txn; // how many times a txn_commit checkpoint waited for the checkpoint_safe lock
uint64_t cp_wait_other; // how many times a checkpoint for another purpose waited for the checkpoint_safe lock
} CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS; } CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS;
void toku_checkpoint_get_status(CHECKPOINT_STATUS stat); void toku_checkpoint_get_status(CHECKPOINT_STATUS stat);
......
...@@ -1934,6 +1934,12 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st ...@@ -1934,6 +1934,12 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st
engstat->checkpoint_count_fail = cpstat.checkpoint_count_fail; engstat->checkpoint_count_fail = cpstat.checkpoint_count_fail;
engstat->checkpoint_waiters_now = cpstat.waiters_now; engstat->checkpoint_waiters_now = cpstat.waiters_now;
engstat->checkpoint_waiters_max = cpstat.waiters_max; engstat->checkpoint_waiters_max = cpstat.waiters_max;
engstat->checkpoint_client_wait_on_mo = cpstat.client_wait_on_mo;
engstat->checkpoint_client_wait_on_cs = cpstat.client_wait_on_cs;
engstat->checkpoint_wait_sched = cpstat.cp_wait_sched;
engstat->checkpoint_wait_client = cpstat.cp_wait_client;
engstat->checkpoint_wait_txn = cpstat.cp_wait_txn;
engstat->checkpoint_wait_other = cpstat.cp_wait_other;
} }
engstat->cleaner_period = toku_get_cleaner_period_unlocked(env->i->cachetable); engstat->cleaner_period = toku_get_cleaner_period_unlocked(env->i->cachetable);
engstat->cleaner_iterations = toku_get_cleaner_iterations_unlocked(env->i->cachetable); engstat->cleaner_iterations = toku_get_cleaner_iterations_unlocked(env->i->cachetable);
...@@ -2276,6 +2282,12 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) { ...@@ -2276,6 +2282,12 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "checkpoint_count_fail %"PRIu64"\n", engstat.checkpoint_count_fail); n += snprintf(buff + n, bufsiz - n, "checkpoint_count_fail %"PRIu64"\n", engstat.checkpoint_count_fail);
n += snprintf(buff + n, bufsiz - n, "checkpoint_waiters_now %"PRIu64"\n", engstat.checkpoint_waiters_now); n += snprintf(buff + n, bufsiz - n, "checkpoint_waiters_now %"PRIu64"\n", engstat.checkpoint_waiters_now);
n += snprintf(buff + n, bufsiz - n, "checkpoint_waiters_max %"PRIu64"\n", engstat.checkpoint_waiters_max); n += snprintf(buff + n, bufsiz - n, "checkpoint_waiters_max %"PRIu64"\n", engstat.checkpoint_waiters_max);
n += snprintf(buff + n, bufsiz - n, "checkpoint_client_wait_on_mo %"PRIu64"\n", engstat.checkpoint_client_wait_on_mo);
n += snprintf(buff + n, bufsiz - n, "checkpoint_client_wait_on_cs %"PRIu64"\n", engstat.checkpoint_client_wait_on_cs);
n += snprintf(buff + n, bufsiz - n, "checkpoint_wait_sched %"PRIu64"\n", engstat.checkpoint_wait_sched);
n += snprintf(buff + n, bufsiz - n, "checkpoint_wait_client %"PRIu64"\n", engstat.checkpoint_wait_client);
n += snprintf(buff + n, bufsiz - n, "checkpoint_wait_txn %"PRIu64"\n", engstat.checkpoint_wait_txn);
n += snprintf(buff + n, bufsiz - n, "checkpoint_wait_other %"PRIu64"\n", engstat.checkpoint_wait_other);
n += snprintf(buff + n, bufsiz - n, "cleaner_period %"PRIu64"\n", engstat.cleaner_period); n += snprintf(buff + n, bufsiz - n, "cleaner_period %"PRIu64"\n", engstat.cleaner_period);
n += snprintf(buff + n, bufsiz - n, "cleaner_iterations %"PRIu64"\n", engstat.cleaner_iterations); n += snprintf(buff + n, bufsiz - n, "cleaner_iterations %"PRIu64"\n", engstat.cleaner_iterations);
n += snprintf(buff + n, bufsiz - n, "txn_begin %"PRIu64"\n", engstat.txn_begin); n += snprintf(buff + n, bufsiz - n, "txn_begin %"PRIu64"\n", engstat.txn_begin);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment