fixes #146 Add context class and track the current writer/context in the

frwlock

fixes #146 Add context class and track the current writer/context in the
frwlock
fbc1f593 · John Esmet · 8fc37abd · fbc1f593 · fbc1f593 · fbc1f593
Commit fbc1f593 authored Jan 22, 2014 by John Esmet
12 changed files
--- a/ft/cachetable.cc
+++ b/ft/cachetable.cc
@@ -106,6 +106,7 @@ PATENT RIGHTS GRANT:
 #include <portability/toku_time.h>
 #include <util/rwlock.h>
 #include <util/status.h>
+#include <util/context.h>

 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
@@ -1483,6 +1484,8 @@ static bool try_pin_pair(
    bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs);
    
    if (partial_fetch_required) {    
+        toku::context pf_ctx(CTX_PARTIAL_FETCH);
+
        if (ct->ev.should_client_thread_sleep() && !already_slept) {
            pair_lock(p);
            unpin_pair(p, (lock_type == PL_READ));
@@ -1634,6 +1637,8 @@ beginning:
        }
    }
    else {
+        toku::context fetch_ctx(CTX_FULL_FETCH);
+
        ct->list.pair_unlock_by_fullhash(fullhash);
        // we only want to sleep once per call to get_and_pin. If we have already
        // slept and there is still cache pressure, then we might as 
@@ -2068,6 +2073,8 @@ try_again:
    ct->list.pair_lock_by_fullhash(fullhash);
    PAIR p = ct->list.find_pair(cf, key, fullhash);
    if (p == NULL) {
+        toku::context fetch_ctx(CTX_FULL_FETCH);
+
        // Not found
        ct->list.pair_unlock_by_fullhash(fullhash);
        ct->list.write_list_lock();
@@ -2143,6 +2150,8 @@ try_again:
        // still check for partial fetch
        bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs);
        if (partial_fetch_required) {
+            toku::context fetch_ctx(CTX_PARTIAL_FETCH);
+
            run_unlockers(unlockers);

            // we are now getting an expensive write lock, because we
@@ -3131,6 +3140,8 @@ void cleaner::set_period(uint32_t new_period) {
 // start).  At this point, we can safely unlock the cachetable, do the
 // work (callback), and unlock/release our claim to the cachefile.
 int cleaner::run_cleaner(void) {
+    toku::context cleaner_ctx(CTX_CLEANER);
+
    int r;
    uint32_t num_iterations = this->get_iterations();
    for (uint32_t i = 0; i < num_iterations; ++i) {
@@ -4024,6 +4035,8 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
    m_pl->read_list_unlock();
    ret_val = true;
    if (curr_in_clock->count > 0) {
+        toku::context pe_ctx(CTX_PARTIAL_EVICTION);
+
        uint32_t curr_size = curr_in_clock->attr.size;
        // if the size of this PAIR is greater than the average size of PAIRs
        // in the cachetable, then decrement it, otherwise, decrement
@@ -4100,6 +4113,8 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
        }        
    }
    else {
+        toku::context pe_ctx(CTX_FULL_EVICTION);
+
        // responsibility of try_evict_pair to eventually remove background job
        // pair's mutex is still grabbed here
        this->try_evict_pair(curr_in_clock);

--- a/ft/ft-cachetable-wrappers.cc
+++ b/ft/ft-cachetable-wrappers.cc
@@ -94,7 +94,8 @@ PATENT RIGHTS GRANT:
 #include <fttypes.h>
 #include <ft-flusher.h>
 #include <ft-internal.h>
-#include "ft.h"
+#include <ft.h>
+#include <util/context.h>

 static void
 ftnode_get_key_and_fullhash(
@@ -252,6 +253,8 @@ toku_pin_ftnode_batched(
            bfe->child_to_read
            );
        if (needs_ancestors_messages) {
+            toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
+
            toku_unpin_ftnode_read_only(brt->ft, node);
            int rr = toku_cachetable_get_and_pin_nonblocking_batched(
                    brt->ft->cf,

--- a/ft/ft-flusher.cc
+++ b/ft/ft-flusher.cc
@@ -97,6 +97,7 @@ PATENT RIGHTS GRANT:
 #include <toku_assert.h>
 #include <portability/toku_atomic.h>
 #include <util/status.h>
+#include <util/context.h>

 /* Status is intended for display to humans to help understand system behavior.
 * It does not need to be perfectly thread-safe.
@@ -1866,6 +1867,7 @@ struct flusher_extra {
 //
 static void flush_node_fun(void *fe_v)
 {
+    toku::context flush_ctx(CTX_FLUSH);
    struct flusher_extra* fe = (struct flusher_extra *) fe_v;
    // The node that has been placed on the background
    // thread may not be fully in memory. Some message
@@ -1950,6 +1952,7 @@ place_node_and_bnc_on_background_thread(
 //
 void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
 {
+    toku::context flush_ctx(CTX_FLUSH);
    TXNID oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
    //
    // first let's see if we can detach buffer on client thread

--- a/ft/ft-hot-flusher.cc
+++ b/ft/ft-hot-flusher.cc
@@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft.h>
 #include <portability/toku_atomic.h>
 #include <util/status.h>
+#include <util/context.h>

 // Member Descirption:
 // 1. highest_pivot_key - this is the key that corresponds to the 
@@ -301,6 +302,8 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
                      int (*progress_callback)(void *extra, float progress),
                      void *progress_extra, uint64_t* loops_run)
 {
+    toku::context flush_ctx(CTX_FLUSH);
+
    int r = 0;
    struct hot_flusher_extra flusher;
    struct flusher_advice advice;

--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
@@ -218,6 +218,7 @@ basement nodes, bulk fetch,  and partial fetch:

 #include <portability/toku_atomic.h>

+#include <util/context.h>
 #include <util/mempool.h>
 #include <util/status.h>
 #include <util/rwlock.h>
@@ -2830,6 +2831,7 @@ static void inject_message_at_this_blocknum(FT ft, CACHEKEY cachekey, uint32_t f
 //  Inject cmd into the node at this blocknum (cachekey).
 //  Gets a write lock on the node for you.
 {
+    toku::context inject_ctx(CTX_MESSAGE_INJECTION);
    FTNODE node;
    struct ftnode_fetch_extra bfe;
    fill_bfe_for_full_read(&bfe, ft);
@@ -2954,7 +2956,15 @@ static void push_something_in_subtree(
                    // node locked.
                    struct ftnode_fetch_extra bfe;
                    fill_bfe_for_full_read(&bfe, ft);
-                    toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, lock_type, 0, nullptr, &child);
+                    if (lock_type == PL_WRITE_CHEAP) {
+                        // We intend to take the write lock for message injection
+                        toku::context inject_ctx(CTX_MESSAGE_INJECTION);
+                        toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, lock_type, 0, nullptr, &child);
+                    } else {
+                        // We're going to keep promoting
+                        toku::context promo_ctx(CTX_PROMO);
+                        toku_pin_ftnode_off_client_thread_batched(ft, child_blocknum, child_fullhash, &bfe, lock_type, 0, nullptr, &child);
+                    }
                } else {
                    r = toku_maybe_pin_ftnode_clean(ft, child_blocknum, child_fullhash, lock_type, &child);
                    if (r != 0) {
@@ -3060,6 +3070,8 @@ void toku_ft_root_put_cmd(
 //    in any checkpoint that contains this put's logentry.
 //    Holding the mo lock throughout this function ensures that fact.
 {
+    toku::context promo_ctx(CTX_PROMO);
+
    // blackhole fractal trees drop all messages, so do nothing.
    if (ft->blackhole) {
        return;
@@ -5315,6 +5327,8 @@ toku_ft_search (FT_HANDLE brt, ft_search_t *search, FT_GET_CALLBACK_FUNCTION get
    uint trycount = 0;     // How many tries did it take to get the result?
    FT ft = brt->ft;

+    toku::context search_ctx(CTX_SEARCH);
+
 try_again:

    trycount++;
@@ -6366,6 +6380,7 @@ void toku_ft_layer_destroy(void) {
    toku_checkpoint_destroy();
    status_destroy();
    txn_status_destroy();
+    toku_context_status_destroy();
    partitioned_counters_destroy();
    //Portability must be cleaned up last
    toku_portability_destroy();

--- a/src/ydb.cc
+++ b/src/ydb.cc
@@ -111,6 +111,7 @@ const char *toku_copyright_string = "Copyright (c) 2007-2013 Tokutek Inc.  All r
 #include <sys/types.h>

 #include <util/status.h>
+#include <util/context.h>

 #include <ft/ft-flusher.h>
 #include <ft/cachetable.h>
@@ -1914,6 +1915,7 @@ env_get_engine_status_num_rows (DB_ENV * UU(env), uint64_t * num_rowsp) {
    num_rows += FS_STATUS_NUM_ROWS;
    num_rows += INDEXER_STATUS_NUM_ROWS;
    num_rows += LOADER_STATUS_NUM_ROWS;
+    num_rows += CTX_STATUS_NUM_ROWS;
 #if 0
    // enable when upgrade is supported
    num_rows += FT_UPGRADE_STATUS_NUM_ROWS;
@@ -2100,6 +2102,15 @@ env_get_engine_status (DB_ENV * env, TOKU_ENGINE_STATUS_ROW engstat, uint64_t ma
                }
            }
        }
+        {
+            struct context_status ctxstatus;
+            toku_context_get_status(&ctxstatus);
+            for (int i = 0; i < CTX_STATUS_NUM_ROWS && row < maxrows; i++) {
+                if (ctxstatus.status[i].include & include_flags) {
+                    engstat[row++] = ctxstatus.status[i];
+                }
+            }
+        }
 #if 0
        // enable when upgrade is supported
        {

--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
 set(util_srcs
+  context
  kibbutz
  mempool
  partitioned_counter

--- a/util/context.cc
+++ b/util/context.cc
--- a/util/context.h
+++ b/util/context.h
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*
+COPYING CONDITIONS NOTICE:
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation, and provided that the
+  following conditions are met:
+
+      * Redistributions of source code must retain this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below).
+
+      * Redistributions in binary form must reproduce this COPYING
+        CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
+        DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
+        PATENT MARKING NOTICE (below), and the PATENT RIGHTS
+        GRANT (below) in the documentation and/or other materials
+        provided with the distribution.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+
+COPYRIGHT NOTICE:
+
+  TokuDB, Tokutek Fractal Tree Indexing Library.
+  Copyright (C) 2007-2014 Tokutek, Inc.
+
+DISCLAIMER:
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+UNIVERSITY PATENT NOTICE:
+
+  The technology is licensed by the Massachusetts Institute of
+  Technology, Rutgers State University of New Jersey, and the Research
+  Foundation of State University of New York at Stony Brook under
+  United States of America Serial No. 11/760379 and to the patents
+  and/or patent applications resulting from it.
+
+PATENT MARKING NOTICE:
+
+  This software is covered by US Patent No. 8,185,551.
+  This software is covered by US Patent No. 8,489,638.
+
+PATENT RIGHTS GRANT:
+
+  "THIS IMPLEMENTATION" means the copyrightable works distributed by
+  Tokutek as part of the Fractal Tree project.
+
+  "PATENT CLAIMS" means the claims of patents that are owned or
+  licensable by Tokutek, both currently or in the future; and that in
+  the absence of this license would be infringed by THIS
+  IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
+
+  "PATENT CHALLENGE" shall mean a challenge to the validity,
+  patentability, enforceability and/or non-infringement of any of the
+  PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
+
+  Tokutek hereby grants to you, for the term and geographical scope of
+  the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
+  irrevocable (except as stated in this section) patent license to
+  make, have made, use, offer to sell, sell, import, transfer, and
+  otherwise run, modify, and propagate the contents of THIS
+  IMPLEMENTATION, where such license applies only to the PATENT
+  CLAIMS.  This grant does not include claims that would be infringed
+  only as a consequence of further modifications of THIS
+  IMPLEMENTATION.  If you or your agent or licensee institute or order
+  or agree to the institution of patent litigation against any entity
+  (including a cross-claim or counterclaim in a lawsuit) alleging that
+  THIS IMPLEMENTATION constitutes direct or contributory patent
+  infringement, or inducement of patent infringement, then any rights
+  granted to you under this License shall terminate as of the date
+  such litigation is filed.  If you or your agent or exclusive
+  licensee institute or order or agree to the institution of a PATENT
+  CHALLENGE, then Tokutek may terminate any rights granted to you
+  under this License.
+*/
+
+#pragma once
+
+#ident "Copyright (c) 2007-2014 Tokutek Inc.  All rights reserved."
+#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
+
+#include <toku_include/toku_portability.h>
+
+#include <db.h>
+
+#include <util/status.h>
+
+enum context_id {
+    CTX_INVALID = -1,
+    CTX_DEFAULT = 0,          // default context for when no context is set
+    CTX_SEARCH,               // searching for a key at the bottom of the tree
+    CTX_PROMO,                // promoting a message down the tree
+    CTX_FULL_FETCH,           // performing full fetch (pivots + some partial fetch)
+    CTX_PARTIAL_FETCH,        // performing partial fetch
+    CTX_FULL_EVICTION,        // running partial eviction
+    CTX_PARTIAL_EVICTION,     // running partial eviction
+    CTX_MESSAGE_INJECTION,    // injecting a message into a buffer
+    CTX_MESSAGE_APPLICATION,  // applying ancestor's messages to a basement node
+    CTX_FLUSH,                // flushing a buffer
+    CTX_CLEANER               // doing work as the cleaner thread
+};
+
+// Note a contention event in engine status
+void toku_context_note_frwlock_contention(const context_id blocking, const context_id blocked);
+
+namespace toku {
+
+    // class for tracking what a thread is doing
+    //
+    // usage:
+    //
+    // // automatically tag and document what you're doing
+    // void my_interesting_function(void) {
+    //     toku::context ctx("doing something interesting", INTERESTING_FN_1);
+    //     ...
+    //     {
+    //         toku::context inner_ctx("doing something expensive", EXPENSIVE_FN_1);
+    //         my_rwlock.wrlock();
+    //         expensive();
+    //         my_rwlock.wrunlock();
+    //     }
+    //     ...
+    // }
+    //
+    // // ... so later you can write code like this.
+    // // here, we save some info to help determine why a lock could not be acquired
+    // void my_rwlock::wrlock() {
+    //     r = try_acquire_write_lock();
+    //     if (r == 0) {
+    //         m_write_locked_context_id = get_thread_local_context()->get_id();
+    //         ...
+    //     } else {
+    //         if (m_write_locked_context_id == EXPENSIVE_FN_1) {
+    //             status.blocked_because_of_expensive_fn_1++;
+    //         } else if (...) {
+    //            ...
+    //         }
+    //         ...
+    //     }
+    // }
+    class context {
+    public:
+        context(const context_id id); 
+
+        ~context();
+
+        context_id get_id() const {
+            return m_id;
+        }
+
+    private:
+        // each thread has a stack of contexts, rooted at the trivial "root context"
+        const context *m_old_ctx;
+        const context_id m_id;
+    };
+
+} // namespace toku
+
+// Get the current context of this thread
+const toku::context *toku_thread_get_context();
+
+enum context_status_entry {
+    CTX_SEARCH_BLOCKED_BY_FULL_FETCH = 0,
+    CTX_SEARCH_BLOCKED_BY_PARTIAL_FETCH,
+    CTX_SEARCH_BLOCKED_BY_FULL_EVICTION,
+    CTX_SEARCH_BLOCKED_BY_PARTIAL_EVICTION,
+    CTX_SEARCH_BLOCKED_BY_MESSAGE_INJECTION,
+    CTX_SEARCH_BLOCKED_BY_MESSAGE_APPLICATION,
+    CTX_SEARCH_BLOCKED_BY_FLUSH,
+    CTX_SEARCH_BLOCKED_BY_CLEANER,
+    CTX_SEARCH_BLOCKED_OTHER,
+    CTX_PROMO_BLOCKED_BY_FULL_FETCH,
+    CTX_PROMO_BLOCKED_BY_PARTIAL_FETCH,
+    CTX_PROMO_BLOCKED_BY_FULL_EVICTION,
+    CTX_PROMO_BLOCKED_BY_PARTIAL_EVICTION,
+    CTX_PROMO_BLOCKED_BY_MESSAGE_INJECTION,
+    CTX_PROMO_BLOCKED_BY_MESSAGE_APPLICATION,
+    CTX_PROMO_BLOCKED_BY_FLUSH,
+    CTX_PROMO_BLOCKED_BY_CLEANER,
+    CTX_PROMO_BLOCKED_OTHER,
+    CTX_BLOCKED_OTHER,
+    CTX_STATUS_NUM_ROWS
+};
+
+struct context_status {
+    bool initialized;
+    TOKU_ENGINE_STATUS_ROW_S status[CTX_STATUS_NUM_ROWS];
+};
+
+void toku_context_get_status(struct context_status *status);
+
+void toku_context_status_destroy(void);
--- a/util/frwlock.cc
+++ b/util/frwlock.cc
@@ -91,8 +91,18 @@ PATENT RIGHTS GRANT:

 #include <toku_assert.h>

+#include <util/context.h>
+
 namespace toku {

+static __thread int thread_local_tid = -1;
+static int get_local_tid() {
+    if (thread_local_tid == -1) {
+        thread_local_tid = toku_os_gettid();
+    }
+    return thread_local_tid;
+}
+
 void frwlock::init(toku_mutex_t *const mutex) {
    m_mutex = mutex;

@@ -109,6 +119,8 @@ void frwlock::init(toku_mutex_t *const mutex) {
    m_wait_read_is_in_queue = false;
    m_current_writer_expensive = false;
    m_read_wait_expensive = false;
+    m_current_writer_tid = -1;
+    m_blocking_writer_context_id = CTX_INVALID;

    m_wait_head = nullptr;
    m_wait_tail = nullptr;
@@ -160,6 +172,13 @@ void frwlock::write_lock(bool expensive) {
    if (expensive) {
        ++m_num_expensive_want_write;
    }
+    if (m_num_writers == 0 && m_num_want_write == 1) {
+        // We are the first to want a write lock. No new readers can get the lock.
+        // Set our thread id and context for proper instrumentation.
+        // see: toku_context_note_frwlock_contention()
+        m_current_writer_tid = get_local_tid();
+        m_blocking_writer_context_id = toku_thread_get_context()->get_id();
+    }
    toku_cond_wait(&cond, m_mutex);
    toku_cond_destroy(&cond);

@@ -176,6 +195,8 @@ void frwlock::write_lock(bool expensive) {
    }
    m_num_writers = 1;
    m_current_writer_expensive = expensive;
+    m_current_writer_tid = get_local_tid();
+    m_blocking_writer_context_id = toku_thread_get_context()->get_id();
 }

 bool frwlock::try_write_lock(bool expensive) {
@@ -188,6 +209,8 @@ bool frwlock::try_write_lock(bool expensive) {
    paranoid_invariant_zero(m_num_want_read);
    m_num_writers = 1;
    m_current_writer_expensive = expensive;
+    m_current_writer_tid = get_local_tid();
+    m_blocking_writer_context_id = toku_thread_get_context()->get_id();
    return true;
 }

@@ -207,6 +230,12 @@ void frwlock::read_lock(void) {
                );
        }

+        // Note this contention event in engine status.
+        toku_context_note_frwlock_contention(
+            toku_thread_get_context()->get_id(),
+            m_blocking_writer_context_id
+            );
+
        // Wait for our turn.
        ++m_num_want_read;
        toku_cond_wait(&m_wait_read, m_mutex);
@@ -294,6 +323,8 @@ void frwlock::write_unlock(void) {
    paranoid_invariant(m_num_writers == 1);
    m_num_writers = 0;
    m_current_writer_expensive = false;
+    m_current_writer_tid = -1;
+    m_blocking_writer_context_id = CTX_INVALID;
    this->maybe_signal_or_broadcast_next();
 }
 bool frwlock::write_lock_is_expensive(void) {

--- a/util/frwlock.h
+++ b/util/frwlock.h
@@ -95,6 +95,7 @@ PATENT RIGHTS GRANT:
 #include <toku_pthread.h>
 #include <stdbool.h>
 #include <stdint.h>
+#include <util/context.h>

 //TODO: update comment, this is from rwlock.h

@@ -154,6 +155,12 @@ private:
    // is expensive
    // if there are currently no waiting readers, then set to false
    bool m_read_wait_expensive;
+    // thread-id of the current writer
+    int m_current_writer_tid;
+    // context id describing the context of the current writer blocking
+    // new readers (either because this writer holds the write lock or
+    // is the first to want the write lock).
+    context_id m_blocking_writer_context_id;
    
    toku_cond_t m_wait_read;
    queue_item m_queue_item_read;

--- a/util/tests/test-rwlock-cheapness.cc
+++ b/util/tests/test-rwlock-cheapness.cc
@@ -103,6 +103,10 @@ PATENT RIGHTS GRANT:
 #include <util/rwlock.h>
 #include "rwlock_condvar.h"

+// We need to manually intialize partitioned counters so that the
+// ones automatically incremented by the frwlock get handled properly.
+#include <util/partitioned_counter.h>
+
 toku_mutex_t mutex;
 toku::frwlock w;

@@ -288,6 +292,12 @@ static void test_write_cheapness(void) {
 }

 int main (int UU(argc), const char* UU(argv[])) {
+    // Ultra ugly. We manually init/destroy partitioned counters
+    // and context because normally toku_ft_layer_init() does that
+    // for us, but we don't want to initialize everything.
+    partitioned_counters_init();
    test_write_cheapness();
+    toku_context_status_destroy();
+    partitioned_counters_destroy();
    return 0;
 }