Commit c8993dec authored by Rich Prohaska's avatar Rich Prohaska

#141 identify a big transaction and have it run lock escalation

parent 96a2a6fc
...@@ -125,9 +125,7 @@ void lock_request::destroy(void) { ...@@ -125,9 +125,7 @@ void lock_request::destroy(void) {
} }
// set the lock request parameters. this API allows a lock request to be reused. // set the lock request parameters. this API allows a lock request to be reused.
void lock_request::set(locktree *lt, TXNID txnid, void lock_request::set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key, lock_request::type lock_type, bool big_txn) {
const DBT *left_key, const DBT *right_key,
lock_request::type lock_type) {
invariant(m_state != state::PENDING); invariant(m_state != state::PENDING);
m_lt = lt; m_lt = lt;
m_txnid = txnid; m_txnid = txnid;
...@@ -138,6 +136,7 @@ void lock_request::set(locktree *lt, TXNID txnid, ...@@ -138,6 +136,7 @@ void lock_request::set(locktree *lt, TXNID txnid,
m_type = lock_type; m_type = lock_type;
m_state = state::INITIALIZED; m_state = state::INITIALIZED;
m_info = lt->get_lock_request_info(); m_info = lt->get_lock_request_info();
m_big_txn = big_txn;
} }
// get rid of any stored left and right key copies and // get rid of any stored left and right key copies and
...@@ -207,10 +206,10 @@ int lock_request::start(void) { ...@@ -207,10 +206,10 @@ int lock_request::start(void) {
txnid_set conflicts; txnid_set conflicts;
conflicts.create(); conflicts.create();
if (m_type == type::WRITE) { if (m_type == type::WRITE) {
r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, &conflicts); r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, &conflicts, m_big_txn);
} else { } else {
invariant(m_type == type::READ); invariant(m_type == type::READ);
r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, &conflicts); r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, &conflicts, m_big_txn);
} }
// if the lock is not granted, save it to the set of lock requests // if the lock is not granted, save it to the set of lock requests
...@@ -310,9 +309,9 @@ int lock_request::retry(void) { ...@@ -310,9 +309,9 @@ int lock_request::retry(void) {
invariant(m_state == state::PENDING); invariant(m_state == state::PENDING);
if (m_type == type::WRITE) { if (m_type == type::WRITE) {
r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, nullptr); r = m_lt->acquire_write_lock(m_txnid, m_left_key, m_right_key, nullptr, m_big_txn);
} else { } else {
r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, nullptr); r = m_lt->acquire_read_lock(m_txnid, m_left_key, m_right_key, nullptr, m_big_txn);
} }
// if the acquisition succeeded then remove ourselves from the // if the acquisition succeeded then remove ourselves from the
......
...@@ -133,8 +133,7 @@ public: ...@@ -133,8 +133,7 @@ public:
// effect: Resets the lock request parameters, allowing it to be reused. // effect: Resets the lock request parameters, allowing it to be reused.
// requires: Lock request was already created at some point // requires: Lock request was already created at some point
void set(locktree *lt, TXNID txnid, void set(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key, type lock_type, bool big_txn);
const DBT *left_key, const DBT *right_key, type lock_type);
// effect: Tries to acquire a lock described by this lock request. // effect: Tries to acquire a lock described by this lock request.
// returns: The return code of locktree::acquire_[write,read]_lock() // returns: The return code of locktree::acquire_[write,read]_lock()
...@@ -198,6 +197,8 @@ private: ...@@ -198,6 +197,8 @@ private:
toku_cond_t m_wait_cond; toku_cond_t m_wait_cond;
bool m_big_txn;
// the lock request info state stored in the // the lock request info state stored in the
// locktree that this lock request is for. // locktree that this lock request is for.
struct locktree::lt_lock_request_info *m_info; struct locktree::lt_lock_request_info *m_info;
......
...@@ -119,6 +119,7 @@ namespace toku { ...@@ -119,6 +119,7 @@ namespace toku {
void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_id, void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_id,
DESCRIPTOR desc, ft_compare_func cmp) { DESCRIPTOR desc, ft_compare_func cmp) {
m_mem_tracker = mem_tracker; m_mem_tracker = mem_tracker;
m_mgr = mem_tracker->get_manager();
m_dict_id = dict_id; m_dict_id = dict_id;
// the only reason m_cmp is malloc'd here is to prevent gdb from printing // the only reason m_cmp is malloc'd here is to prevent gdb from printing
...@@ -410,8 +411,8 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid, ...@@ -410,8 +411,8 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid,
} }
int locktree::try_acquire_lock(bool is_write_request, TXNID txnid, int locktree::try_acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) {
int r = m_mem_tracker->check_current_lock_constraints(); int r = m_mgr->check_current_lock_constraints(big_txn);
if (r == 0) { if (r == 0) {
r = acquire_lock(is_write_request, txnid, left_key, right_key, conflicts); r = acquire_lock(is_write_request, txnid, left_key, right_key, conflicts);
} }
...@@ -420,13 +421,13 @@ int locktree::try_acquire_lock(bool is_write_request, TXNID txnid, ...@@ -420,13 +421,13 @@ int locktree::try_acquire_lock(bool is_write_request, TXNID txnid,
// the locktree silently upgrades read locks to write locks for simplicity // the locktree silently upgrades read locks to write locks for simplicity
int locktree::acquire_read_lock(TXNID txnid, int locktree::acquire_read_lock(TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) {
return acquire_write_lock(txnid, left_key, right_key, conflicts); return acquire_write_lock(txnid, left_key, right_key, conflicts, big_txn);
} }
int locktree::acquire_write_lock(TXNID txnid, int locktree::acquire_write_lock(TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) { const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn) {
return try_acquire_lock(true, txnid, left_key, right_key, conflicts); return try_acquire_lock(true, txnid, left_key, right_key, conflicts, big_txn);
} }
void locktree::get_conflicts(bool is_write_request, TXNID txnid, void locktree::get_conflicts(bool is_write_request, TXNID txnid,
......
...@@ -105,6 +105,11 @@ PATENT RIGHTS GRANT: ...@@ -105,6 +105,11 @@ PATENT RIGHTS GRANT:
#include "wfg.h" #include "wfg.h"
#include "range_buffer.h" #include "range_buffer.h"
#define TOKU_LOCKTREE_ESCALATOR_LAMBDA 0
#if TOKU_LOCKTREE_ESCALATOR_LAMBDA
#include <functional>
#endif
enum { enum {
LTM_SIZE_CURRENT = 0, LTM_SIZE_CURRENT = 0,
LTM_SIZE_LIMIT, LTM_SIZE_LIMIT,
...@@ -164,15 +169,13 @@ public: ...@@ -164,15 +169,13 @@ public:
// If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS.
// note: Read locks cannot be shared between txnids, as one would expect. // note: Read locks cannot be shared between txnids, as one would expect.
// This is for simplicity since read locks are rare in MySQL. // This is for simplicity since read locks are rare in MySQL.
int acquire_read_lock(TXNID txnid, int acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn);
const DBT *left_key, const DBT *right_key, txnid_set *conflicts);
// effect: Attempts to grant a write lock for the range of keys between [left_key, right_key]. // effect: Attempts to grant a write lock for the range of keys between [left_key, right_key].
// returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the // returns: If the lock cannot be granted, return DB_LOCK_NOTGRANTED, and populate the
// given conflicts set with the txnids that hold conflicting locks in the range. // given conflicts set with the txnids that hold conflicting locks in the range.
// If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS. // If the locktree cannot create more locks, return TOKUDB_OUT_OF_LOCKS.
int acquire_write_lock(TXNID txnid, int acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn);
const DBT *left_key, const DBT *right_key, txnid_set *conflicts);
// effect: populate the conflicts set with the txnids that would preventing // effect: populate the conflicts set with the txnids that would preventing
// the given txnid from getting a lock on [left_key, right_key] // the given txnid from getting a lock on [left_key, right_key]
...@@ -215,6 +218,25 @@ public: ...@@ -215,6 +218,25 @@ public:
// since the lock_request object is opaque // since the lock_request object is opaque
struct lt_lock_request_info *get_lock_request_info(void); struct lt_lock_request_info *get_lock_request_info(void);
class manager;
// the escalator coordinates escalation on a set of locktrees for a bunch of threads
class escalator {
public:
void create(void);
void destroy(void);
#if TOKU_LOCKTREE_ESCALATOR_LAMBDA
void run(manager *mgr, std::function<void (void)> escalate_locktrees_fun);
#else
void run(manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra);
#endif
private:
toku_mutex_t m_escalator_mutex;
toku_cond_t m_escalator_done;
bool m_escalator_running;
};
ENSURE_POD(escalator);
// The locktree manager manages a set of locktrees, // The locktree manager manages a set of locktrees,
// one for each open dictionary. Locktrees are accessed through // one for each open dictionary. Locktrees are accessed through
// the manager, and when they are no longer needed, they can // the manager, and when they are no longer needed, they can
...@@ -265,6 +287,7 @@ public: ...@@ -265,6 +287,7 @@ public:
class memory_tracker { class memory_tracker {
public: public:
void set_manager(manager *mgr); void set_manager(manager *mgr);
manager *get_manager(void);
// effect: Determines if too many locks or too much memory is being used, // effect: Determines if too many locks or too much memory is being used,
// Runs escalation on the manager if so. // Runs escalation on the manager if so.
...@@ -273,6 +296,8 @@ public: ...@@ -273,6 +296,8 @@ public:
// enough resources for a new lock. // enough resources for a new lock.
int check_current_lock_constraints(void); int check_current_lock_constraints(void);
bool over_big_threshold(void);
void note_mem_used(uint64_t mem_used); void note_mem_used(uint64_t mem_used);
void note_mem_released(uint64_t mem_freed); void note_mem_released(uint64_t mem_freed);
...@@ -297,6 +322,7 @@ public: ...@@ -297,6 +322,7 @@ public:
// rationale: to get better stress test coverage, we want a way to // rationale: to get better stress test coverage, we want a way to
// deterministicly trigger lock escalation. // deterministicly trigger lock escalation.
void run_escalation_for_test(void); void run_escalation_for_test(void);
void run_escalation(void);
void get_status(LTM_STATUS status); void get_status(LTM_STATUS status);
...@@ -311,6 +337,20 @@ public: ...@@ -311,6 +337,20 @@ public:
void *extra); void *extra);
int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra); int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra);
int check_current_lock_constraints(bool big_txn);
// Escalate locktrees touched by a txn
void escalate_lock_trees_for_txn(TXNID, locktree *lt);
// Escalate all locktrees
void escalate_all_locktrees(void);
// Escalate a set of locktrees
void escalate_locktrees(locktree **locktrees, int num_locktrees);
// Add time t to the escalator's wait time statistics
void add_escalator_wait_time(uint64_t t);
private: private:
static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024; static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024;
static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0; static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0;
...@@ -357,23 +397,14 @@ public: ...@@ -357,23 +397,14 @@ public:
// requires: Manager's mutex is held // requires: Manager's mutex is held
void locktree_map_remove(locktree *lt); void locktree_map_remove(locktree *lt);
// effect: Runs escalation on all locktrees.
void run_escalation(void);
static int find_by_dict_id(locktree *const &lt, const DICTIONARY_ID &dict_id); static int find_by_dict_id(locktree *const &lt, const DICTIONARY_ID &dict_id);
void escalator_init(void); void escalator_init(void);
void escalator_destroy(void); void escalator_destroy(void);
// effect: Add time t to the escalator's wait time statistics
void add_escalator_wait_time(uint64_t t);
// effect: escalate's the locks in each locktree
// requires: manager's mutex is held
void escalate_all_locktrees(void);
// statistics about lock escalation. // statistics about lock escalation.
toku_mutex_t m_escalation_mutex;
uint64_t m_escalation_count; uint64_t m_escalation_count;
tokutime_t m_escalation_time; tokutime_t m_escalation_time;
uint64_t m_escalation_latest_result; uint64_t m_escalation_latest_result;
...@@ -382,25 +413,16 @@ public: ...@@ -382,25 +413,16 @@ public:
uint64_t m_long_wait_escalation_count; uint64_t m_long_wait_escalation_count;
uint64_t m_long_wait_escalation_time; uint64_t m_long_wait_escalation_time;
toku_mutex_t m_escalator_mutex; escalator m_escalator;
#define DO_ESCALATOR_THREAD 0
#if DO_ESCALATOR_THREAD
toku_cond_t m_escalator_work; // signal the escalator to run
toku_cond_t m_escalator_done; // signal that escalation is done
bool m_escalator_killed;
toku_pthread_t m_escalator_id;
#endif
friend class manager_unit_test; friend class manager_unit_test;
public:
void escalator_work(void);
}; };
ENSURE_POD(manager); ENSURE_POD(manager);
manager::memory_tracker *get_mem_tracker(void) const; manager::memory_tracker *get_mem_tracker(void) const;
private: private:
manager *m_mgr;
manager::memory_tracker *m_mem_tracker; manager::memory_tracker *m_mem_tracker;
DICTIONARY_ID m_dict_id; DICTIONARY_ID m_dict_id;
...@@ -418,7 +440,6 @@ private: ...@@ -418,7 +440,6 @@ private:
uint32_t m_reference_count; uint32_t m_reference_count;
// the locktree stores locks in a concurrent, non-overlapping rangetree
concurrent_tree *m_rangetree; concurrent_tree *m_rangetree;
void *m_userdata; void *m_userdata;
...@@ -590,7 +611,7 @@ private: ...@@ -590,7 +611,7 @@ private:
const DBT *left_key, const DBT *right_key, txnid_set *conflicts); const DBT *left_key, const DBT *right_key, txnid_set *conflicts);
int try_acquire_lock(bool is_write_request, TXNID txnid, int try_acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts); const DBT *left_key, const DBT *right_key, txnid_set *conflicts, bool big_txn);
void escalate(manager::lt_escalate_cb after_escalate_callback, void *extra); void escalate(manager::lt_escalate_cb after_escalate_callback, void *extra);
......
...@@ -103,7 +103,6 @@ namespace toku { ...@@ -103,7 +103,6 @@ namespace toku {
void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb escalate_cb, void *escalate_extra) { void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, lt_escalate_cb escalate_cb, void *escalate_extra) {
m_max_lock_memory = DEFAULT_MAX_LOCK_MEMORY; m_max_lock_memory = DEFAULT_MAX_LOCK_MEMORY;
m_current_lock_memory = 0; m_current_lock_memory = 0;
escalator_init();
m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME; m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME;
m_mem_tracker.set_manager(this); m_mem_tracker.set_manager(this);
...@@ -118,6 +117,8 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb, ...@@ -118,6 +117,8 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb,
ZERO_STRUCT(status); ZERO_STRUCT(status);
ZERO_STRUCT(m_lt_counters); ZERO_STRUCT(m_lt_counters);
escalator_init();
} }
void locktree::manager::destroy(void) { void locktree::manager::destroy(void) {
...@@ -317,31 +318,44 @@ void locktree::manager::release_lt(locktree *lt) { ...@@ -317,31 +318,44 @@ void locktree::manager::release_lt(locktree *lt) {
} }
// test-only version of lock escalation // test-only version of lock escalation
#if TOKU_LOCKTREE_ESCALATOR_LAMBDA
void locktree::manager::run_escalation(void) {
m_escalator.run(this, [this] () -> void { escalate_all_locktrees(); });
}
#else
static void manager_run_escalation_fun(void *extra) {
locktree::manager *thismanager = (locktree::manager *) extra;
thismanager->escalate_all_locktrees();
}
void locktree::manager::run_escalation(void) {
m_escalator.run(this, manager_run_escalation_fun, this);
}
#endif
void locktree::manager::run_escalation_for_test(void) { void locktree::manager::run_escalation_for_test(void) {
run_escalation(); run_escalation();
} }
void locktree::manager::run_escalation(void) { void locktree::manager::escalate_all_locktrees(void) {
if (0) fprintf(stderr, "%d %s:%u\n", toku_os_gettid(), __PRETTY_FUNCTION__, __LINE__);
uint64_t t0 = toku_current_time_microsec(); uint64_t t0 = toku_current_time_microsec();
#if DO_ESCALATOR_THREAD
// run escalation on the background thread mutex_lock();
int r;
toku_mutex_lock(&m_escalator_mutex); // get all locktrees
toku_cond_broadcast(&m_escalator_work); int num_locktrees = m_locktree_map.size();
struct timeval tv; locktree **locktrees = new locktree *[num_locktrees];
r = gettimeofday(&tv, 0); for (int i = 0; i < num_locktrees; i++) {
assert_zero(r); int r = m_locktree_map.fetch(i, &locktrees[i]);
uint64_t t = tv.tv_sec * 1000000 + tv.tv_usec; invariant_zero(r);
t += 100000; // 100 milliseconds }
toku_timespec_t wakeup_time;
wakeup_time.tv_sec = t / 1000000; // escalate them
wakeup_time.tv_nsec = (t % 1000000) * 1000; escalate_locktrees(locktrees, num_locktrees);
r = toku_cond_timedwait(&m_escalator_done, &m_escalator_mutex, &wakeup_time); delete [] locktrees;
toku_mutex_unlock(&m_escalator_mutex); mutex_unlock();
#else
// else run escalation on this thread
mutex_lock(); escalate_all_locktrees(); mutex_unlock();
#endif
uint64_t t1 = toku_current_time_microsec(); uint64_t t1 = toku_current_time_microsec();
add_escalator_wait_time(t1 - t0); add_escalator_wait_time(t1 - t0);
} }
...@@ -350,6 +364,10 @@ void locktree::manager::memory_tracker::set_manager(manager *mgr) { ...@@ -350,6 +364,10 @@ void locktree::manager::memory_tracker::set_manager(manager *mgr) {
m_mgr = mgr; m_mgr = mgr;
} }
locktree::manager *locktree::manager::memory_tracker::get_manager(void) {
return m_mgr;
}
int locktree::manager::memory_tracker::check_current_lock_constraints(void) { int locktree::manager::memory_tracker::check_current_lock_constraints(void) {
int r = 0; int r = 0;
// check if we're out of locks without the mutex first. then, grab the // check if we're out of locks without the mutex first. then, grab the
...@@ -377,6 +395,10 @@ bool locktree::manager::memory_tracker::out_of_locks(void) const { ...@@ -377,6 +395,10 @@ bool locktree::manager::memory_tracker::out_of_locks(void) const {
return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory; return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory;
} }
bool locktree::manager::memory_tracker::over_big_threshold(void) {
return m_mgr->m_current_lock_memory >= m_mgr->m_max_lock_memory / 2;
}
int locktree::manager::iterate_pending_lock_requests( int locktree::manager::iterate_pending_lock_requests(
lock_request_iterate_callback callback, void *extra) { lock_request_iterate_callback callback, void *extra) {
mutex_lock(); mutex_lock();
...@@ -406,22 +428,23 @@ int locktree::manager::iterate_pending_lock_requests( ...@@ -406,22 +428,23 @@ int locktree::manager::iterate_pending_lock_requests(
return r; return r;
} }
#if DO_ESCALATOR_THREAD int locktree::manager::check_current_lock_constraints(bool big_txn) {
static void *escalator_thread(void *arg) { int r = 0;
locktree::manager *mgr = reinterpret_cast<locktree::manager*>(arg); if (big_txn && m_mem_tracker.over_big_threshold()) {
mgr->escalator_work(); run_escalation();
return arg; if (m_mem_tracker.over_big_threshold()) {
r = TOKUDB_OUT_OF_LOCKS;
}
}
if (r == 0) {
r = m_mem_tracker.check_current_lock_constraints();
}
return r;
} }
#endif
void locktree::manager::escalator_init(void) { void locktree::manager::escalator_init(void) {
ZERO_STRUCT(m_escalator_mutex); ZERO_STRUCT(m_escalation_mutex);
toku_mutex_init(&m_escalator_mutex, nullptr); toku_mutex_init(&m_escalation_mutex, nullptr);
#if DO_ESCALATOR_THREAD
toku_cond_init(&m_escalator_work, nullptr);
toku_cond_init(&m_escalator_done, nullptr);
m_escalator_killed = false;
#endif
m_escalation_count = 0; m_escalation_count = 0;
m_escalation_time = 0; m_escalation_time = 0;
m_wait_escalation_count = 0; m_wait_escalation_count = 0;
...@@ -429,30 +452,27 @@ void locktree::manager::escalator_init(void) { ...@@ -429,30 +452,27 @@ void locktree::manager::escalator_init(void) {
m_long_wait_escalation_count = 0; m_long_wait_escalation_count = 0;
m_long_wait_escalation_time = 0; m_long_wait_escalation_time = 0;
m_escalation_latest_result = 0; m_escalation_latest_result = 0;
#if DO_ESCALATOR_THREAD m_escalator.create();
int r = toku_pthread_create(&m_escalator_id, nullptr, escalator_thread, this);
assert_zero(r);
#endif
} }
void locktree::manager::escalator_destroy(void) { void locktree::manager::escalator_destroy(void) {
#if DO_ESCALATOR_THREAD m_escalator.destroy();
toku_mutex_lock(&m_escalator_mutex); toku_mutex_destroy(&m_escalation_mutex);
m_escalator_killed = true;
toku_cond_broadcast(&m_escalator_work);
toku_mutex_unlock(&m_escalator_mutex);
void *ret;
int r = toku_pthread_join(m_escalator_id, &ret);
assert_zero(r);
#endif
toku_mutex_destroy(&m_escalator_mutex);
#if DO_ESCALATOR_THREAD
toku_cond_destroy(&m_escalator_work);
toku_cond_destroy(&m_escalator_done);
#endif
} }
void locktree::manager::escalate_all_locktrees(void) { void locktree::manager::add_escalator_wait_time(uint64_t t) {
toku_mutex_lock(&m_escalation_mutex);
m_wait_escalation_count += 1;
m_wait_escalation_time += t;
if (t >= 1000000) {
m_long_wait_escalation_count += 1;
m_long_wait_escalation_time += t;
}
toku_mutex_unlock(&m_escalation_mutex);
}
void locktree::manager::escalate_locktrees(locktree **locktrees, int num_locktrees) {
if (0) fprintf(stderr, "%d %s:%u %d\n", toku_os_gettid(), __PRETTY_FUNCTION__, __LINE__, num_locktrees);
// there are too many row locks in the system and we need to tidy up. // there are too many row locks in the system and we need to tidy up.
// //
// a simple implementation of escalation does not attempt // a simple implementation of escalation does not attempt
...@@ -461,50 +481,86 @@ void locktree::manager::escalate_all_locktrees(void) { ...@@ -461,50 +481,86 @@ void locktree::manager::escalate_all_locktrees(void) {
// and more complicated locking. for now, just escalate each // and more complicated locking. for now, just escalate each
// locktree individually, in-place. // locktree individually, in-place.
tokutime_t t0 = toku_time_now(); tokutime_t t0 = toku_time_now();
size_t num_locktrees = m_locktree_map.size(); for (int i = 0; i < num_locktrees; i++) {
for (size_t i = 0; i < num_locktrees; i++) { locktrees[i]->escalate(m_lt_escalate_callback, m_lt_escalate_callback_extra);
locktree *lt;
int r = m_locktree_map.fetch(i, &lt);
invariant_zero(r);
lt->escalate(m_lt_escalate_callback, m_lt_escalate_callback_extra);
} }
tokutime_t t1 = toku_time_now(); tokutime_t t1 = toku_time_now();
toku_mutex_lock(&m_escalator_mutex); toku_mutex_lock(&m_escalation_mutex);
m_escalation_count++; m_escalation_count++;
m_escalation_time += (t1 - t0); m_escalation_time += (t1 - t0);
m_escalation_latest_result = m_current_lock_memory; m_escalation_latest_result = m_current_lock_memory;
toku_mutex_unlock(&m_escalator_mutex); toku_mutex_unlock(&m_escalation_mutex);
} }
void locktree::manager::add_escalator_wait_time(uint64_t t) { #if !TOKU_LOCKTREE_ESCALATOR_LAMBDA
toku_mutex_lock(&m_escalator_mutex); struct escalate_args {
m_wait_escalation_count += 1; locktree::manager *mgr;
m_wait_escalation_time += t; locktree **locktrees;
if (t >= 1000000) { int num_locktrees;
m_long_wait_escalation_count += 1; };
m_long_wait_escalation_time += t;
} static void manager_escalate_locktrees(void *extra) {
toku_mutex_unlock(&m_escalator_mutex); escalate_args *args = (escalate_args *) extra;
args->mgr->escalate_locktrees(args->locktrees, args->num_locktrees);
}
#endif
void locktree::manager::escalate_lock_trees_for_txn(TXNID txnid UU(), locktree *lt UU()) {
// get lock trees for txnid
const int num_locktrees = 1;
locktree *locktrees[1] = { lt };
// escalate these lock trees
locktree::escalator this_escalator;
this_escalator.create();
#if TOKU_LOCKTREE_ESCALATOR_LAMBDA
this_escalator.run(this, [this,locktrees,num_locktrees] () -> void { escalate_locktrees(locktrees, num_locktrees); });
#else
escalate_args args = { this, locktrees, num_locktrees };
this_escalator.run(this, manager_escalate_locktrees, &args);
#endif
this_escalator.destroy();
}
void locktree::escalator::create(void) {
ZERO_STRUCT(m_escalator_mutex);
toku_mutex_init(&m_escalator_mutex, nullptr);
toku_cond_init(&m_escalator_done, nullptr);
m_escalator_running = false;
}
void locktree::escalator::destroy(void) {
toku_cond_destroy(&m_escalator_done);
toku_mutex_destroy(&m_escalator_mutex);
} }
#if DO_ESCALATOR_THREAD #if TOKU_LOCKTREE_ESCALATOR_LAMBDA
void locktree::manager::escalator_work(void) { void locktree::escalator::run(locktree::manager *mgr, std::function<void (void)> escalate_locktrees_fun) {
#else
void locktree::escalator::run(locktree::manager *mgr, void (*escalate_locktrees_fun)(void *extra), void *extra) {
#endif
uint64_t t0 = toku_current_time_microsec();
toku_mutex_lock(&m_escalator_mutex); toku_mutex_lock(&m_escalator_mutex);
while (!m_escalator_killed) { if (!m_escalator_running) {
toku_cond_wait(&m_escalator_work, &m_escalator_mutex); // run escalation on this thread
if (!m_escalator_killed) { m_escalator_running = true;
toku_mutex_unlock(&m_escalator_mutex); toku_mutex_unlock(&m_escalator_mutex);
mutex_lock(); #if TOKU_LOCKTREE_ESCALATOR_LAMBDA
escalate_all_locktrees(); escalate_locktrees_fun();
mutex_unlock(); #else
toku_mutex_lock(&m_escalator_mutex); escalate_locktrees_fun(extra);
toku_cond_broadcast(&m_escalator_done); #endif
} toku_mutex_lock(&m_escalator_mutex);
m_escalator_running = false;
toku_cond_broadcast(&m_escalator_done);
} else {
toku_cond_wait(&m_escalator_done, &m_escalator_mutex);
} }
toku_mutex_unlock(&m_escalator_mutex); toku_mutex_unlock(&m_escalator_mutex);
uint64_t t1 = toku_current_time_microsec();
mgr->add_escalator_wait_time(t1 - t0);
} }
#endif
#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc) #define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc)
......
...@@ -111,20 +111,20 @@ void lock_request_unit_test::test_get_set_keys(void) { ...@@ -111,20 +111,20 @@ void lock_request_unit_test::test_get_set_keys(void) {
// request should not copy dbts for neg/pos inf, so get_left // request should not copy dbts for neg/pos inf, so get_left
// and get_right should return the same pointer given // and get_right should return the same pointer given
request.set(null_lt, txnid_a, neg_inf, pos_inf, lock_request::type::WRITE); request.set(null_lt, txnid_a, neg_inf, pos_inf, lock_request::type::WRITE, false);
invariant(request.get_left_key() == neg_inf); invariant(request.get_left_key() == neg_inf);
invariant(request.get_right_key() == pos_inf); invariant(request.get_right_key() == pos_inf);
// request should make copies of non-infinity-valued keys. // request should make copies of non-infinity-valued keys.
request.set(null_lt, txnid_a, neg_inf, one, lock_request::type::WRITE); request.set(null_lt, txnid_a, neg_inf, one, lock_request::type::WRITE, false);
invariant(request.get_left_key() == neg_inf); invariant(request.get_left_key() == neg_inf);
invariant(request.get_right_key() == one); invariant(request.get_right_key() == one);
request.set(null_lt, txnid_a, two, pos_inf, lock_request::type::WRITE); request.set(null_lt, txnid_a, two, pos_inf, lock_request::type::WRITE, false);
invariant(request.get_left_key() == two); invariant(request.get_left_key() == two);
invariant(request.get_right_key() == pos_inf); invariant(request.get_right_key() == pos_inf);
request.set(null_lt, txnid_a, one, two, lock_request::type::WRITE); request.set(null_lt, txnid_a, one, two, lock_request::type::WRITE, false);
invariant(request.get_left_key() == one); invariant(request.get_left_key() == one);
invariant(request.get_right_key() == two); invariant(request.get_right_key() == two);
......
...@@ -119,30 +119,30 @@ void lock_request_unit_test::test_start_deadlock(void) { ...@@ -119,30 +119,30 @@ void lock_request_unit_test::test_start_deadlock(void) {
const DBT *two = get_dbt(2); const DBT *two = get_dbt(2);
// start and succeed 1,1 for A and 2,2 for B. // start and succeed 1,1 for A and 2,2 for B.
request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE); request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false);
r = request_a.start(); r = request_a.start();
invariant_zero(r); invariant_zero(r);
request_b.set(lt, txnid_b, two, two, lock_request::type::WRITE); request_b.set(lt, txnid_b, two, two, lock_request::type::WRITE, false);
r = request_b.start(); r = request_b.start();
invariant_zero(r); invariant_zero(r);
// txnid A should not be granted a lock on 2,2, so it goes pending. // txnid A should not be granted a lock on 2,2, so it goes pending.
request_a.set(lt, txnid_a, two, two, lock_request::type::WRITE); request_a.set(lt, txnid_a, two, two, lock_request::type::WRITE, false);
r = request_a.start(); r = request_a.start();
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
// if txnid B wants a lock on 1,1 it should deadlock with A // if txnid B wants a lock on 1,1 it should deadlock with A
request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE); request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false);
r = request_b.start(); r = request_b.start();
invariant(r == DB_LOCK_DEADLOCK); invariant(r == DB_LOCK_DEADLOCK);
// txnid C should not deadlock on either of these - it should just time out. // txnid C should not deadlock on either of these - it should just time out.
request_c.set(lt, txnid_c, one, one, lock_request::type::WRITE); request_c.set(lt, txnid_c, one, one, lock_request::type::WRITE, false);
r = request_c.start(); r = request_c.start();
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = request_c.wait(lock_wait_time); r = request_c.wait(lock_wait_time);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
request_c.set(lt, txnid_c, two, two, lock_request::type::WRITE); request_c.set(lt, txnid_c, two, two, lock_request::type::WRITE, false);
r = request_c.start(); r = request_c.start();
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = request_c.wait(lock_wait_time); r = request_c.wait(lock_wait_time);
......
...@@ -113,7 +113,7 @@ void lock_request_unit_test::test_start_pending(void) { ...@@ -113,7 +113,7 @@ void lock_request_unit_test::test_start_pending(void) {
const DBT *two = get_dbt(2); const DBT *two = get_dbt(2);
// take a range lock using txnid b // take a range lock using txnid b
r = lt->acquire_write_lock(txnid_b, zero, two, nullptr); r = lt->acquire_write_lock(txnid_b, zero, two, nullptr, false);
invariant_zero(r); invariant_zero(r);
locktree::lt_lock_request_info *info = lt->get_lock_request_info(); locktree::lt_lock_request_info *info = lt->get_lock_request_info();
...@@ -121,7 +121,7 @@ void lock_request_unit_test::test_start_pending(void) { ...@@ -121,7 +121,7 @@ void lock_request_unit_test::test_start_pending(void) {
// start a lock request for 1,1 // start a lock request for 1,1
// it should fail. the request should be stored and in the pending state. // it should fail. the request should be stored and in the pending state.
request.create(); request.create();
request.set(lt, txnid_a, one, one, lock_request::type::WRITE); request.set(lt, txnid_a, one, one, lock_request::type::WRITE, false);
r = request.start(); r = request.start();
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
invariant(info->pending_lock_requests.size() == 1); invariant(info->pending_lock_requests.size() == 1);
......
...@@ -123,12 +123,12 @@ void lock_request_unit_test::test_wait_time_callback(void) { ...@@ -123,12 +123,12 @@ void lock_request_unit_test::test_wait_time_callback(void) {
const DBT *two = get_dbt(2); const DBT *two = get_dbt(2);
// a locks 'one' // a locks 'one'
request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE); request_a.set(lt, txnid_a, one, one, lock_request::type::WRITE, false);
r = request_a.start(); r = request_a.start();
assert_zero(r); assert_zero(r);
// b tries to lock 'one' // b tries to lock 'one'
request_b.set(lt, txnid_b, one, two, lock_request::type::WRITE); request_b.set(lt, txnid_b, one, two, lock_request::type::WRITE, false);
r = request_b.start(); r = request_b.start();
assert(r == DB_LOCK_NOTGRANTED); assert(r == DB_LOCK_NOTGRANTED);
assert(my_calls == 0); assert(my_calls == 0);
......
...@@ -125,8 +125,8 @@ void locktree_unit_test::test_conflicts(void) { ...@@ -125,8 +125,8 @@ void locktree_unit_test::test_conflicts(void) {
// test_run == 0 means test with read lock // test_run == 0 means test with read lock
// test_run == 1 means test with write lock // test_run == 1 means test with write lock
#define ACQUIRE_LOCK(txn, left, right, conflicts) \ #define ACQUIRE_LOCK(txn, left, right, conflicts) \
test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts) \ test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \
: lt->acquire_write_lock(txn, left, right, conflicts) : lt->acquire_write_lock(txn, left, right, conflicts, false)
// acquire some locks for txnid_a // acquire some locks for txnid_a
r = ACQUIRE_LOCK(txnid_a, one, one, nullptr); r = ACQUIRE_LOCK(txnid_a, one, one, nullptr);
...@@ -142,8 +142,8 @@ void locktree_unit_test::test_conflicts(void) { ...@@ -142,8 +142,8 @@ void locktree_unit_test::test_conflicts(void) {
// if test_run == 0, then read locks exist. only test write locks. // if test_run == 0, then read locks exist. only test write locks.
#define ACQUIRE_LOCK(txn, left, right, conflicts) \ #define ACQUIRE_LOCK(txn, left, right, conflicts) \
sub_test_run == 0 && test_run == 1 ? \ sub_test_run == 0 && test_run == 1 ? \
lt->acquire_read_lock(txn, left, right, conflicts) \ lt->acquire_read_lock(txn, left, right, conflicts, false) \
: lt->acquire_write_lock(txn, left, right, conflicts) : lt->acquire_write_lock(txn, left, right, conflicts, false)
// try to get point write locks for txnid_b, should fail // try to get point write locks for txnid_b, should fail
r = ACQUIRE_LOCK(txnid_b, one, one, nullptr); r = ACQUIRE_LOCK(txnid_b, one, one, nullptr);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
......
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <stdio.h>
#include "locktree.h"
#include "test.h"
// ensure that small transactions do not get stalled by a big transaction that has lots of locks
// ./locktree_escalation_big7_small1 --stalls 100 --max_lock_memory 1000000000 --verbose
using namespace toku;
static int verbose = 0;
static int killed = 0;
static pthread_t big_id, small_id;
static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) {
range_buffer buffer;
buffer.create();
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
buffer.append(&left, &right);
lt->release_locks(txn_id, &buffer);
buffer.destroy();
}
// grab a write range lock on int64 keys bounded by left_k and right_k
static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) {
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn);
}
#if 0
static locktree **big_txn_lt;
static int n_big_txn_lt;
static int get_locktrees_touched_by_txn(TXNID txn_id UU(), void *txn_extra UU(), locktree ***ret_locktrees, int *ret_num_locktrees) {
locktree **locktrees = (locktree **) toku_malloc(n_big_txn_lt * sizeof (locktree *));
for (int i = 0; i < n_big_txn_lt; i++)
locktrees[i] = big_txn_lt[i];
*ret_locktrees = locktrees;
*ret_num_locktrees = n_big_txn_lt;
return 0;
}
#endif
static void run_big_txn(locktree::manager *mgr UU(), locktree **lt, int n_lt, TXNID txn_id) {
int64_t last_i = -1;
for (int64_t i = 0; !killed; i++) {
for (int j = 0; j < n_lt; j++) {
uint64_t t_start = toku_current_time_microsec();
int r = locktree_write_lock(lt[j], txn_id, i, i, true);
assert(r == 0);
last_i = i;
uint64_t t_end = toku_current_time_microsec();
uint64_t t_duration = t_end - t_start;
if (t_duration > 100000) {
printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration);
}
}
toku_pthread_yield();
}
if (last_i != -1)
for (int j = 0; j < n_lt; j++)
locktree_release_lock(lt[j], txn_id, 0, last_i); // release the range 0 .. last_i
if (verbose)
printf("%u %s %" PRId64 "\n", toku_os_gettid(), __FUNCTION__, last_i);
}
struct big_arg {
locktree::manager *mgr;
locktree **lt;
int n_lt;
TXNID txn_id;
};
static void *big_f(void *_arg) {
struct big_arg *arg = (struct big_arg *) _arg;
assert(pthread_equal(pthread_self(), big_id));
printf("%u %s\n", toku_os_gettid(), __FUNCTION__);
run_big_txn(arg->mgr, arg->lt, arg->n_lt, arg->txn_id);
return arg;
}
static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) {
int64_t i;
for (i = 0; !killed; i++) {
uint64_t t_start = toku_current_time_microsec();
int r = locktree_write_lock(lt, txn_id, k, k, false);
assert(r == 0);
uint64_t t_end = toku_current_time_microsec();
uint64_t t_duration = t_end - t_start;
if (t_duration > 100000) {
printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration);
}
locktree_release_lock(lt, txn_id, k, k);
toku_pthread_yield();
}
if (verbose)
printf("%u %s %" PRId64 "\n", toku_os_gettid(), __FUNCTION__, i);
}
struct small_arg {
locktree::manager *mgr;
locktree *lt;
TXNID txn_id;
int64_t k;
};
static void *small_f(void *_arg) {
struct small_arg *arg = (struct small_arg *) _arg;
printf("%u %s\n", toku_os_gettid(), __FUNCTION__);
run_small_txn(arg->mgr, arg->lt, arg->txn_id, arg->k);
return arg;
}
static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) {
assert(pthread_equal(pthread_self(), big_id));
if (verbose)
printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra);
}
static uint64_t get_escalation_count(locktree::manager &mgr) {
LTM_STATUS_S ltm_status;
mgr.get_status(&ltm_status);
TOKU_ENGINE_STATUS_ROW key_status = NULL;
// lookup keyname in status
for (int i = 0; ; i++) {
TOKU_ENGINE_STATUS_ROW status = &ltm_status.status[i];
if (status->keyname == NULL)
break;
if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) {
key_status = status;
break;
}
}
assert(key_status);
return key_status->value.num;
}
int main(int argc, const char *argv[]) {
uint64_t stalls = 1;
int n_big = 7;
uint64_t max_lock_memory = 1000000;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) {
stalls = atoll(argv[++i]);
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoll(argv[++i]);
continue;
}
}
int r;
// create a manager
locktree::manager mgr;
mgr.create(nullptr, nullptr, e_callback, nullptr);
mgr.set_max_lock_memory(max_lock_memory);
// create lock trees
uint64_t next_dict_id = 1;
DICTIONARY_ID dict_id;
locktree *big_lt[n_big];
for (int i = 0; i < n_big; i++) {
dict_id = { next_dict_id }; next_dict_id++;
big_lt[i] = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr);
}
#if 0
big_txn_lt = big_lt;
n_big_txn_lt = n_big;
#endif
dict_id = { next_dict_id }; next_dict_id++;
locktree *small_lt = mgr.get_lt(dict_id, nullptr, compare_dbts, nullptr);
// create the worker threads
struct big_arg big_arg = { &mgr, big_lt, n_big, 1000 };
r = toku_pthread_create(&big_id, nullptr, big_f, &big_arg);
assert(r == 0);
struct small_arg small_arg = { &mgr, small_lt, 2000, 0 };
r = toku_pthread_create(&small_id, nullptr, small_f, &small_arg);
assert(r == 0);
// wait for some escalations to occur
while (get_escalation_count(mgr) < stalls) {
sleep(1);
}
killed = 1;
// cleanup
void *ret;
r = toku_pthread_join(big_id, &ret);
assert(r == 0);
r = toku_pthread_join(small_id, &ret);
assert(r == 0);
for (int i = 0; i < n_big; i++)
mgr.release_lt(big_lt[i]);
mgr.release_lt(small_lt);
mgr.destroy();
return 0;
}
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <stdio.h>
#include "locktree.h"
#include "test.h"
// One client locks 1,2,3...
// The other client locks -1,-2,-3...
// Eventually lock escalation runs.
using namespace toku;
static int verbose = 0;
static int killed = 0;
static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) {
range_buffer buffer;
buffer.create();
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
buffer.append(&left, &right);
lt->release_locks(txn_id, &buffer);
buffer.destroy();
}
// grab a write range lock on int64 keys bounded by left_k and right_k
static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) {
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn);
}
static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) {
fprintf(stderr, "%u run_big_txn %p %" PRIu64 " %" PRId64 "\n", toku_os_gettid(), lt, txn_id, start_i);
int64_t last_i = -1;
for (int64_t i = start_i; !killed; i++) {
if (0)
printf("%u %" PRId64 "\n", toku_os_gettid(), i);
uint64_t t_start = toku_current_time_microsec();
int r = locktree_write_lock(lt, txn_id, i, i, true);
if (r != 0)
break;
last_i = i;
uint64_t t_end = toku_current_time_microsec();
uint64_t t_duration = t_end - t_start;
if (t_duration > 100000) {
printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration);
}
toku_pthread_yield();
}
if (last_i != -1)
locktree_release_lock(lt, txn_id, start_i, last_i); // release the range start_i .. last_i
}
struct arg {
locktree::manager *mgr;
locktree *lt;
TXNID txn_id;
int64_t start_i;
};
static void *big_f(void *_arg) {
struct arg *arg = (struct arg *) _arg;
run_big_txn(arg->mgr, arg->lt, arg->txn_id, arg->start_i);
return arg;
}
static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) {
if (verbose)
printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra);
}
static uint64_t get_escalation_count(locktree::manager &mgr) {
LTM_STATUS_S ltm_status;
mgr.get_status(&ltm_status);
TOKU_ENGINE_STATUS_ROW key_status = NULL;
// lookup keyname in status
for (int i = 0; ; i++) {
TOKU_ENGINE_STATUS_ROW status = &ltm_status.status[i];
if (status->keyname == NULL)
break;
if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) {
key_status = status;
break;
}
}
assert(key_status);
return key_status->value.num;
}
int main(int argc, const char *argv[]) {
const int n_big = 2;
int n_lt = 1;
uint64_t stalls = 1;
uint64_t max_lock_memory = 1000000;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) {
stalls = atoll(argv[++i]);
continue;
}
if (strcmp(argv[i], "--n_lt") == 0 && i+1 < argc) {
n_lt = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoll(argv[++i]);
continue;
}
}
int r;
// create a manager
locktree::manager mgr;
mgr.create(nullptr, nullptr, e_callback, nullptr);
mgr.set_max_lock_memory(max_lock_memory);
// create lock trees
DESCRIPTOR desc[n_lt];
DICTIONARY_ID dict_id[n_lt];
locktree *lt[n_big];
for (int i = 0; i < n_lt; i++) {
desc[i] = nullptr;
dict_id[i] = { (uint64_t)i };
lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr);
assert(lt[i]);
}
// create the worker threads
struct arg big_arg[n_big];
pthread_t big_ids[n_big];
for (int i = 0; i < n_big; i++) {
big_arg[i] = { &mgr, lt[i % n_lt], (TXNID)(1000+i), i == 0 ? 1 : -1000000000 };
r = toku_pthread_create(&big_ids[i], nullptr, big_f, &big_arg[i]);
assert(r == 0);
}
// wait for some escalations to occur
while (get_escalation_count(mgr) < stalls) {
sleep(1);
}
killed = 1;
// cleanup
for (int i = 0; i < n_big; i++) {
void *ret;
r = toku_pthread_join(big_ids[i], &ret);
assert(r == 0);
}
for (int i = 0; i < n_lt ; i++) {
mgr.release_lt(lt[i]);
}
mgr.destroy();
return 0;
}
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <stdio.h>
#include "locktree.h"
#include "test.h"
// One client locks 1,2,3...
// The other client locks -1,-2,-3...
// Eventually lock escalation runs.
using namespace toku;
static int verbose = 0;
static int killed = 0;
static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) {
range_buffer buffer;
buffer.create();
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
buffer.append(&left, &right);
lt->release_locks(txn_id, &buffer);
buffer.destroy();
}
// grab a write range lock on int64 keys bounded by left_k and right_k
static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) {
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn);
}
static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t start_i) {
fprintf(stderr, "%u run_big_txn %p %" PRIu64 " %" PRId64 "\n", toku_os_gettid(), lt, txn_id, start_i);
int64_t last_i = -1;
for (int64_t i = start_i; !killed; i++) {
if (0)
printf("%u %" PRId64 "\n", toku_os_gettid(), i);
uint64_t t_start = toku_current_time_microsec();
int r = locktree_write_lock(lt, txn_id, i, i, true);
if (r != 0)
break;
last_i = i;
uint64_t t_end = toku_current_time_microsec();
uint64_t t_duration = t_end - t_start;
if (t_duration > 100000) {
printf("%u %s %" PRId64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_duration);
}
toku_pthread_yield();
}
if (last_i != -1)
locktree_release_lock(lt, txn_id, start_i, last_i); // release the range start_i .. last_i
}
struct arg {
locktree::manager *mgr;
locktree *lt;
TXNID txn_id;
int64_t start_i;
};
static void *big_f(void *_arg) {
struct arg *arg = (struct arg *) _arg;
run_big_txn(arg->mgr, arg->lt, arg->txn_id, arg->start_i);
return arg;
}
static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) {
if (verbose)
printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra);
}
static uint64_t get_escalation_count(locktree::manager &mgr) {
LTM_STATUS_S ltm_status;
mgr.get_status(&ltm_status);
TOKU_ENGINE_STATUS_ROW key_status = NULL;
// lookup keyname in status
for (int i = 0; ; i++) {
TOKU_ENGINE_STATUS_ROW status = &ltm_status.status[i];
if (status->keyname == NULL)
break;
if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) {
key_status = status;
break;
}
}
assert(key_status);
return key_status->value.num;
}
int main(int argc, const char *argv[]) {
const int n_big = 2;
int n_lt = 2;
uint64_t stalls = 1;
uint64_t max_lock_memory = 1000000;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "--stalls") == 0 && i+1 < argc) {
stalls = atoll(argv[++i]);
continue;
}
if (strcmp(argv[i], "--n_lt") == 0 && i+1 < argc) {
n_lt = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoll(argv[++i]);
continue;
}
}
int r;
// create a manager
locktree::manager mgr;
mgr.create(nullptr, nullptr, e_callback, nullptr);
mgr.set_max_lock_memory(max_lock_memory);
// create lock trees
DESCRIPTOR desc[n_lt];
DICTIONARY_ID dict_id[n_lt];
locktree *lt[n_big];
for (int i = 0; i < n_lt; i++) {
desc[i] = nullptr;
dict_id[i] = { (uint64_t)i };
lt[i] = mgr.get_lt(dict_id[i], desc[i], compare_dbts, nullptr);
assert(lt[i]);
}
// create the worker threads
struct arg big_arg[n_big];
pthread_t big_ids[n_big];
for (int i = 0; i < n_big; i++) {
big_arg[i] = { &mgr, lt[i % n_lt], (TXNID)(1000+i), i == 0 ? 1 : -1000000000 };
r = toku_pthread_create(&big_ids[i], nullptr, big_f, &big_arg[i]);
assert(r == 0);
}
// wait for some escalations to occur
while (get_escalation_count(mgr) < stalls) {
sleep(1);
}
killed = 1;
// cleanup
for (int i = 0; i < n_big; i++) {
void *ret;
r = toku_pthread_join(big_ids[i], &ret);
assert(r == 0);
}
for (int i = 0; i < n_lt ; i++) {
mgr.release_lt(lt[i]);
}
mgr.destroy();
return 0;
}
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <stdio.h>
#include "locktree.h"
#include "test.h"
// Two big txn's grab alternating locks in a single lock tree.
// Eventually lock escalation runs.
// Since the locks can not be consolidated, the out of locks error should be returned.
using namespace toku;
static int verbose = 0;
static inline void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) {
range_buffer buffer;
buffer.create();
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
buffer.append(&left, &right);
lt->release_locks(txn_id, &buffer);
buffer.destroy();
}
// grab a write range lock on int64 keys bounded by left_k and right_k
static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) {
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn);
}
static void e_callback(TXNID txnid, const locktree *lt, const range_buffer &buffer, void *extra) {
if (verbose)
printf("%u %s %" PRIu64 " %p %d %p\n", toku_os_gettid(), __FUNCTION__, txnid, lt, buffer.get_num_ranges(), extra);
}
static uint64_t get_escalation_count(locktree::manager &mgr) {
LTM_STATUS_S ltm_status;
mgr.get_status(&ltm_status);
TOKU_ENGINE_STATUS_ROW key_status = NULL;
// lookup keyname in status
for (int i = 0; ; i++) {
TOKU_ENGINE_STATUS_ROW status = &ltm_status.status[i];
if (status->keyname == NULL)
break;
if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) {
key_status = status;
break;
}
}
assert(key_status);
return key_status->value.num;
}
int main(int argc, const char *argv[]) {
uint64_t max_lock_memory = 1000000;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoll(argv[++i]);
continue;
}
}
int r;
// create a manager
locktree::manager mgr;
mgr.create(nullptr, nullptr, e_callback, nullptr);
mgr.set_max_lock_memory(max_lock_memory);
const TXNID txn_a = 10;
const TXNID txn_b = 100;
// create lock trees
DESCRIPTOR desc = nullptr;
DICTIONARY_ID dict_id = { 1 };
locktree *lt = mgr.get_lt(dict_id, desc, compare_dbts, nullptr);
int64_t last_i = -1;
for (int64_t i = 0; ; i++) {
if (verbose)
printf("%" PRId64 "\n", i);
int64_t k = 2*i;
r = locktree_write_lock(lt, txn_a, k, k, true);
if (r != 0) {
assert(r == TOKUDB_OUT_OF_LOCKS);
break;
}
last_i = i;
r = locktree_write_lock(lt, txn_b, k+1, k+1, true);
if (r != 0) {
assert(r == TOKUDB_OUT_OF_LOCKS);
break;
}
}
// wait for an escalation to occur
assert(get_escalation_count(mgr) > 0);
if (last_i != -1) {
locktree_release_lock(lt, txn_a, 0, 2*last_i);
locktree_release_lock(lt, txn_b, 0, 2*last_i+1);
}
mgr.release_lt(lt);
mgr.destroy();
return 0;
}
...@@ -89,6 +89,14 @@ PATENT RIGHTS GRANT: ...@@ -89,6 +89,14 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved." #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
// This test verifies that small txn's do not get stalled for a long time by lock escalation.
// Two lock trees are used by the test: a big lock tree and a small lock tree.
// One big txn grabs lots of write locks on the big lock tree.
// Several small txn's grab a single write lock on the small lock tree.
// None of the locks conflict.
// Eventually, the locks for the big txn consume all of the lock tree memory, so lock escalation runs.
// The test measures the lock acquisition time and makes sure that the small txn's are not blocked for
// locktree_escalation_stalls -v --stalls 10 // locktree_escalation_stalls -v --stalls 10
// verify that only big txn's get tagged with > 1 second stalls // verify that only big txn's get tagged with > 1 second stalls
...@@ -112,17 +120,17 @@ static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, in ...@@ -112,17 +120,17 @@ static void locktree_release_lock(locktree *lt, TXNID txn_id, int64_t left_k, in
} }
// grab a write range lock on int64 keys bounded by left_k and right_k // grab a write range lock on int64 keys bounded by left_k and right_k
static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k) { static int locktree_write_lock(locktree *lt, TXNID txn_id, int64_t left_k, int64_t right_k, bool big_txn) {
DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k); DBT left; toku_fill_dbt(&left, &left_k, sizeof left_k);
DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k); DBT right; toku_fill_dbt(&right, &right_k, sizeof right_k);
return lt->acquire_write_lock(txn_id, &left, &right, nullptr); return lt->acquire_write_lock(txn_id, &left, &right, nullptr, big_txn);
} }
static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id) { static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id) {
int64_t last_i = -1; int64_t last_i = -1;
for (int64_t i = 0; !killed; i++) { for (int64_t i = 0; !killed; i++) {
uint64_t t_start = toku_current_time_microsec(); uint64_t t_start = toku_current_time_microsec();
int r = locktree_write_lock(lt, txn_id, i, i); int r = locktree_write_lock(lt, txn_id, i, i, true);
assert(r == 0); assert(r == 0);
last_i = i; last_i = i;
uint64_t t_end = toku_current_time_microsec(); uint64_t t_end = toku_current_time_microsec();
...@@ -139,7 +147,7 @@ static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id) ...@@ -139,7 +147,7 @@ static void run_big_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id)
static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) { static void run_small_txn(locktree::manager *mgr UU(), locktree *lt, TXNID txn_id, int64_t k) {
for (int64_t i = 0; !killed; i++) { for (int64_t i = 0; !killed; i++) {
uint64_t t_start = toku_current_time_microsec(); uint64_t t_start = toku_current_time_microsec();
int r = locktree_write_lock(lt, txn_id, k, k); int r = locktree_write_lock(lt, txn_id, k, k, false);
assert(r == 0); assert(r == 0);
uint64_t t_end = toku_current_time_microsec(); uint64_t t_end = toku_current_time_microsec();
uint64_t t_duration = t_end - t_start; uint64_t t_duration = t_end - t_start;
...@@ -196,6 +204,7 @@ static uint64_t get_escalation_count(locktree::manager &mgr) { ...@@ -196,6 +204,7 @@ static uint64_t get_escalation_count(locktree::manager &mgr) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
uint64_t stalls = 0; uint64_t stalls = 0;
uint64_t max_lock_memory = 1000000000;
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++; verbose++;
...@@ -205,6 +214,10 @@ int main(int argc, const char *argv[]) { ...@@ -205,6 +214,10 @@ int main(int argc, const char *argv[]) {
stalls = atoll(argv[++i]); stalls = atoll(argv[++i]);
continue; continue;
} }
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoll(argv[++i]);
continue;
}
} }
int r; int r;
...@@ -212,7 +225,7 @@ int main(int argc, const char *argv[]) { ...@@ -212,7 +225,7 @@ int main(int argc, const char *argv[]) {
// create a manager // create a manager
locktree::manager mgr; locktree::manager mgr;
mgr.create(nullptr, nullptr, e_callback, nullptr); mgr.create(nullptr, nullptr, e_callback, nullptr);
mgr.set_max_lock_memory(1000000000); mgr.set_max_lock_memory(max_lock_memory);
// create lock trees // create lock trees
DESCRIPTOR desc_0 = nullptr; DESCRIPTOR desc_0 = nullptr;
......
...@@ -112,54 +112,54 @@ void locktree_unit_test::test_infinity(void) { ...@@ -112,54 +112,54 @@ void locktree_unit_test::test_infinity(void) {
const DBT max_int = max_dbt(); const DBT max_int = max_dbt();
// txn A will lock -inf, 5. // txn A will lock -inf, 5.
r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), five, nullptr); r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), five, nullptr, false);
invariant(r == 0); invariant(r == 0);
// txn B will fail to get any lock <= 5, even min_int // txn B will fail to get any lock <= 5, even min_int
r = lt->acquire_write_lock(txnid_b, five, five, nullptr); r = lt->acquire_write_lock(txnid_b, five, five, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, zero, one, nullptr); r = lt->acquire_write_lock(txnid_b, zero, one, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr); r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), &min_int, nullptr); r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), &min_int, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five);
// txn A will lock 1, +inf // txn A will lock 1, +inf
r = lt->acquire_write_lock(txnid_a, one, toku_dbt_positive_infinity(), nullptr); r = lt->acquire_write_lock(txnid_a, one, toku_dbt_positive_infinity(), nullptr, false);
invariant(r == 0); invariant(r == 0);
// txn B will fail to get any lock >= 1, even max_int // txn B will fail to get any lock >= 1, even max_int
r = lt->acquire_write_lock(txnid_b, one, one, nullptr); r = lt->acquire_write_lock(txnid_b, one, one, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, two, five, nullptr); r = lt->acquire_write_lock(txnid_b, two, five, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr); r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, &max_int, toku_dbt_positive_infinity(), nullptr); r = lt->acquire_write_lock(txnid_b, &max_int, toku_dbt_positive_infinity(), nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five); lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), five);
// txn A will lock -inf, +inf // txn A will lock -inf, +inf
r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr); r = lt->acquire_write_lock(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false);
invariant(r == 0); invariant(r == 0);
// txn B will fail to get any lock // txn B will fail to get any lock
r = lt->acquire_write_lock(txnid_b, zero, one, nullptr); r = lt->acquire_write_lock(txnid_b, zero, one, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, two, five, nullptr); r = lt->acquire_write_lock(txnid_b, two, five, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr); r = lt->acquire_write_lock(txnid_b, &min_int, &min_int, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, &min_int, &max_int, nullptr); r = lt->acquire_write_lock(txnid_b, &min_int, &max_int, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr); r = lt->acquire_write_lock(txnid_b, &max_int, &max_int, nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_negative_infinity(), nullptr); r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_negative_infinity(), nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr); r = lt->acquire_write_lock(txnid_b, toku_dbt_negative_infinity(), toku_dbt_positive_infinity(), nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
r = lt->acquire_write_lock(txnid_b, toku_dbt_positive_infinity(), toku_dbt_positive_infinity(), nullptr); r = lt->acquire_write_lock(txnid_b, toku_dbt_positive_infinity(), toku_dbt_positive_infinity(), nullptr, false);
invariant(r == DB_LOCK_NOTGRANTED); invariant(r == DB_LOCK_NOTGRANTED);
lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity()); lt->remove_overlapping_locks_for_txnid(txnid_a, toku_dbt_negative_infinity(), toku_dbt_positive_infinity());
......
...@@ -121,15 +121,15 @@ void locktree_unit_test::test_overlapping_relock(void) { ...@@ -121,15 +121,15 @@ void locktree_unit_test::test_overlapping_relock(void) {
// do something. at the end of the test, we release 100, 100. // do something. at the end of the test, we release 100, 100.
const TXNID the_other_txnid = 9999; const TXNID the_other_txnid = 9999;
const DBT *hundred = get_dbt(100); const DBT *hundred = get_dbt(100);
r = lt->acquire_write_lock(the_other_txnid, hundred, hundred, nullptr); r = lt->acquire_write_lock(the_other_txnid, hundred, hundred, nullptr, false);
invariant(r == 0); invariant(r == 0);
for (int test_run = 0; test_run < 2; test_run++) { for (int test_run = 0; test_run < 2; test_run++) {
// test_run == 0 means test with read lock // test_run == 0 means test with read lock
// test_run == 1 means test with write lock // test_run == 1 means test with write lock
#define ACQUIRE_LOCK(txn, left, right, conflicts) \ #define ACQUIRE_LOCK(txn, left, right, conflicts) \
test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts) \ test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \
: lt->acquire_write_lock(txn, left, right, conflicts) : lt->acquire_write_lock(txn, left, right, conflicts, false)
// lock [1,1] and [2,2]. then lock [1,2]. // lock [1,1] and [2,2]. then lock [1,2].
// ensure only [1,2] exists in the tree // ensure only [1,2] exists in the tree
......
...@@ -115,8 +115,8 @@ void locktree_unit_test::test_simple_lock(void) { ...@@ -115,8 +115,8 @@ void locktree_unit_test::test_simple_lock(void) {
// test_run == 0 means test with read lock // test_run == 0 means test with read lock
// test_run == 1 means test with write lock // test_run == 1 means test with write lock
#define ACQUIRE_LOCK(txn, left, right, conflicts) \ #define ACQUIRE_LOCK(txn, left, right, conflicts) \
test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts) \ test_run == 0 ? lt->acquire_read_lock(txn, left, right, conflicts, false) \
: lt->acquire_write_lock(txn, left, right, conflicts) : lt->acquire_write_lock(txn, left, right, conflicts, false)
// four txns, four points // four txns, four points
r = ACQUIRE_LOCK(txnid_a, one, one, nullptr); r = ACQUIRE_LOCK(txnid_a, one, one, nullptr);
...@@ -178,7 +178,7 @@ void locktree_unit_test::test_simple_lock(void) { ...@@ -178,7 +178,7 @@ void locktree_unit_test::test_simple_lock(void) {
for (int64_t i = 0; i < num_locks; i++) { for (int64_t i = 0; i < num_locks; i++) {
k.data = (void *) &keys[i]; k.data = (void *) &keys[i];
r = lt->acquire_read_lock(txnid_a, &k, &k, nullptr); r = lt->acquire_read_lock(txnid_a, &k, &k, nullptr, false);
invariant(r == 0); invariant(r == 0);
} }
......
...@@ -124,13 +124,13 @@ void locktree_unit_test::test_single_txnid_optimization(void) { ...@@ -124,13 +124,13 @@ void locktree_unit_test::test_single_txnid_optimization(void) {
buffer.create(); buffer.create();
#define lock_and_append_point_for_txnid_a(key) \ #define lock_and_append_point_for_txnid_a(key) \
r = lt->acquire_write_lock(txnid_a, key, key, nullptr); \ r = lt->acquire_write_lock(txnid_a, key, key, nullptr, false); \
invariant_zero(r); \ invariant_zero(r); \
buffer.append(key, key); buffer.append(key, key);
#define maybe_point_locks_for_txnid_b(i) \ #define maybe_point_locks_for_txnid_b(i) \
if (where == i) { \ if (where == i) { \
r = lt->acquire_write_lock(txnid_b, one, one, nullptr); \ r = lt->acquire_write_lock(txnid_b, one, one, nullptr, false); \
invariant_zero(r); \ invariant_zero(r); \
} }
......
...@@ -130,13 +130,13 @@ void manager_unit_test::test_status(void) { ...@@ -130,13 +130,13 @@ void manager_unit_test::test_status(void) {
const DBT *one = get_dbt(1); const DBT *one = get_dbt(1);
// txn a write locks one // txn a write locks one
r = lt->acquire_write_lock(txnid_a, one, one, nullptr); r = lt->acquire_write_lock(txnid_a, one, one, nullptr, false);
assert(r == 0); assert(r == 0);
// txn b tries to write lock one, conflicts, waits, and fails to lock one // txn b tries to write lock one, conflicts, waits, and fails to lock one
lock_request request_b; lock_request request_b;
request_b.create(); request_b.create();
request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE); request_b.set(lt, txnid_b, one, one, lock_request::type::WRITE, false);
r = request_b.start(); r = request_b.start();
assert(r == DB_LOCK_NOTGRANTED); assert(r == DB_LOCK_NOTGRANTED);
r = request_b.wait(1000); r = request_b.wait(1000);
......
...@@ -106,6 +106,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) ...@@ -106,6 +106,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
loader-stress-del loader-stress-del
loader-stress-test loader-stress-test
loader-tpch-load loader-tpch-load
locktree_escalation_stalls
lock-pressure lock-pressure
manyfiles manyfiles
maxsize-for-loader maxsize-for-loader
......
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#ident "Copyright (c) 2009-2013 Tokutek Inc. All rights reserved."
#ident "$Id$"
// This test ensures that lock escalation occurs on the big transaction thread.
// locktree_escalation_stalls --max_i 1000000000 --n_small 16 --verbose
#include "test.h"
#include <db.h>
#include "toku_time.h"
#include "toku_pthread.h"
// from #include "threaded_stress_test_helpers.h"
// For each line of engine status output, look for lines that contain substrings
// that match any of the strings in the pattern string. The pattern string contains
// 0 or more strings separated by the '|' character, kind of like a regex.
static void print_matching_engine_status_rows(DB_ENV *env, const char *pattern) {
uint64_t num_rows;
env->get_engine_status_num_rows(env, &num_rows);
uint64_t buf_size = num_rows * 128;
const char *row;
char *row_r;
char *pattern_copy = toku_xstrdup(pattern);
int num_patterns = 1;
for (char *p = pattern_copy; *p != '\0'; p++) {
if (*p == '|') {
*p = '\0';
num_patterns++;
}
}
char *XMALLOC_N(buf_size, buf);
int r = env->get_engine_status_text(env, buf, buf_size);
invariant_zero(r);
for (row = strtok_r(buf, "\n", &row_r); row != nullptr; row = strtok_r(nullptr, "\n", &row_r)) {
const char *p = pattern_copy;
for (int i = 0; i < num_patterns; i++, p += strlen(p) + 1) {
if (strstr(row, p) != nullptr) {
fprintf(stderr, "%s\n", row);
}
}
}
toku_free(pattern_copy);
toku_free(buf);
fflush(stderr);
}
static volatile int killed = 0;
// in a big transaction, insert a bunch of rows.
static void big_test(DB_ENV *env, DB *db, uint64_t max_i) {
if (verbose)
fprintf(stderr, "%u %s\n", toku_os_gettid(), __FUNCTION__);
int r;
DB_TXN *txn = NULL;
r = env->txn_begin(env, NULL, &txn, 0);
assert(r == 0);
for (uint64_t i = 0; !killed && i < max_i; i++) {
uint64_t k = htonl(i);
DBT key = { .data = &k, .size = sizeof k };
DBT val = { .data = &i, .size = sizeof i };
uint64_t t_start = toku_current_time_microsec();
r = db->put(db, txn, &key, &val, 0);
assert(r == 0);
uint64_t t_end = toku_current_time_microsec();
uint64_t t_delta = t_end - t_start;
if (t_delta >= 1000000) {
fprintf(stderr, "%u %s i=%" PRIu64 " %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, i, t_delta);
if (verbose)
print_matching_engine_status_rows(env, "locktree");
}
toku_pthread_yield();
}
r = txn->commit(txn, 0);
assert(r == 0);
}
// insert a row in a single transaction.
static void small_test(DB_ENV *env, DB *db, uint64_t max_i) {
if (verbose)
fprintf(stderr, "%u %s\n", toku_os_gettid(), __FUNCTION__);
int r;
uint64_t k = toku_os_gettid(); // get a unique number
for (uint64_t i = 0; !killed && i < max_i; i++) {
DB_TXN *txn = NULL;
r = env->txn_begin(env, NULL, &txn, 0);
assert(r == 0);
DBT key = { .data = &k, .size = sizeof k };
DBT val = { .data = &i, .size = sizeof i };
uint64_t t_start = toku_current_time_microsec();
r = db->put(db, txn, &key, &val, 0);
assert(r == 0);
uint64_t t_end = toku_current_time_microsec();
uint64_t t_delta = t_end - t_start;
if (t_delta >= 1000000) {
fprintf(stderr, "%u %s %" PRIu64 "\n", toku_os_gettid(), __FUNCTION__, t_delta);
assert(0);
}
r = txn->commit(txn, 0);
assert(r == 0);
toku_pthread_yield();
}
}
struct test_args {
DB_ENV *env;
DB *db;
uint64_t max_i;
void (*test_f)(DB_ENV *env, DB *db, uint64_t max_i);
};
static void *test_f(void *args) {
struct test_args *test_args = (struct test_args *) args;
test_args->test_f(test_args->env, test_args->db, test_args->max_i);
return args;
}
static void run_test(uint64_t max_i, int n_small) {
int r;
DB_ENV *env = NULL;
r = db_env_create(&env, 0);
assert(r == 0);
env->set_errfile(env, stderr);
r = env->set_cachesize(env, 8, 0, 1);
assert(r == 0);
r = env->set_lk_max_memory(env, 1000000000);
assert(r == 0);
r = env->open(env, TOKU_TEST_FILENAME, DB_INIT_LOCK+DB_INIT_MPOOL+DB_INIT_TXN+DB_INIT_LOG + DB_CREATE + DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO);
assert(r == 0);
DB *big_db = NULL;
r = db_create(&big_db, env, 0);
assert(r == 0);
r = big_db->open(big_db, NULL, "big", NULL, DB_BTREE, DB_AUTO_COMMIT+DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
assert(r == 0);
DB *small_db = NULL;
r = db_create(&small_db, env, 0);
assert(r == 0);
r = small_db->open(small_db, NULL, "small", NULL, DB_BTREE, DB_AUTO_COMMIT+DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
assert(r == 0);
struct test_args big_test_args = {
env, big_db, max_i, big_test,
};
toku_pthread_t big_id;
r = toku_pthread_create(&big_id, NULL, test_f, &big_test_args);
assert(r == 0);
struct test_args small_test_args[n_small];
toku_pthread_t small_id[n_small];
for (int i = 0; i < n_small; i++) {
small_test_args[i] = { env, small_db, max_i, small_test };
r = toku_pthread_create(&small_id[i], NULL, test_f, &small_test_args[i]);
assert(r == 0);
}
void *big_ret;
r = toku_pthread_join(big_id, &big_ret);
assert(r == 0);
killed = 1;
for (int i = 0; i < n_small; i++) {
void *small_ret;
r = toku_pthread_join(small_id[i], &small_ret);
assert(r == 0);
}
r = small_db->close(small_db, 0);
assert(r == 0);
r = big_db->close(big_db, 0);
assert(r == 0);
r = env->close(env, 0);
assert(r == 0);
}
int test_main (int argc, char * const argv[]) {
int r;
uint64_t max_i = 10000;
int n_small = 1;
// parse_args(argc, argv);
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "-q") == 0) {
if (verbose > 0) verbose--;
continue;
}
if (strcmp(argv[i], "--max_i") == 0 && i+1 < argc) {
max_i = atoll(argv[++i]);
continue;
}
if (strcmp(argv[i], "--n_small") == 0 && i+1 < argc) {
n_small = atoi(argv[++i]);
continue;
}
}
toku_os_recursive_delete(TOKU_TEST_FILENAME);
r = toku_os_mkdir(TOKU_TEST_FILENAME, S_IRWXU+S_IRWXG+S_IRWXO);
assert(r == 0);
run_test(max_i, n_small);
return 0;
}
...@@ -247,7 +247,7 @@ int toku_db_start_range_lock(DB *db, DB_TXN *txn, const DBT *left_key, const DBT ...@@ -247,7 +247,7 @@ int toku_db_start_range_lock(DB *db, DB_TXN *txn, const DBT *left_key, const DBT
toku::lock_request::type lock_type, toku::lock_request *request) { toku::lock_request::type lock_type, toku::lock_request *request) {
DB_TXN *txn_anc = txn_oldest_ancester(txn); DB_TXN *txn_anc = txn_oldest_ancester(txn);
TXNID txn_anc_id = txn_anc->id64(txn_anc); TXNID txn_anc_id = txn_anc->id64(txn_anc);
request->set(db->i->lt, txn_anc_id, left_key, right_key, lock_type); request->set(db->i->lt, txn_anc_id, left_key, right_key, lock_type, toku_is_big_txn(txn_anc));
const int r = request->start(); const int r = request->start();
if (r == 0) { if (r == 0) {
...@@ -297,7 +297,7 @@ void toku_db_grab_write_lock (DB *db, DBT *key, TOKUTXN tokutxn) { ...@@ -297,7 +297,7 @@ void toku_db_grab_write_lock (DB *db, DBT *key, TOKUTXN tokutxn) {
// This lock request must succeed, so we do not want to wait // This lock request must succeed, so we do not want to wait
toku::lock_request request; toku::lock_request request;
request.create(); request.create();
request.set(db->i->lt, txn_anc_id, key, key, toku::lock_request::type::WRITE); request.set(db->i->lt, txn_anc_id, key, key, toku::lock_request::type::WRITE, toku_is_big_txn(txn_anc));
int r = request.start(); int r = request.start();
invariant_zero(r); invariant_zero(r);
db_txn_note_row_lock(db, txn_anc, key, key); db_txn_note_row_lock(db, txn_anc, key, key);
......
...@@ -323,7 +323,7 @@ static int locked_txn_commit_with_progress(DB_TXN *txn, uint32_t flags, ...@@ -323,7 +323,7 @@ static int locked_txn_commit_with_progress(DB_TXN *txn, uint32_t flags,
if (!toku_txn_is_read_only(tokutxn)) { if (!toku_txn_is_read_only(tokutxn)) {
// A readonly transaction does no logging, and therefore does not need the MO lock. // A readonly transaction does no logging, and therefore does not need the MO lock.
holds_mo_lock = true; holds_mo_lock = true;
if (toku_txn_has_spilled_rollback(tokutxn)) { if (toku_is_big_tokutxn(tokutxn)) {
low_priority = true; low_priority = true;
toku_low_priority_multi_operation_client_lock(); toku_low_priority_multi_operation_client_lock();
} else { } else {
...@@ -351,7 +351,7 @@ static int locked_txn_abort_with_progress(DB_TXN *txn, ...@@ -351,7 +351,7 @@ static int locked_txn_abort_with_progress(DB_TXN *txn,
if (!toku_txn_is_read_only(tokutxn)) { if (!toku_txn_is_read_only(tokutxn)) {
// A readonly transaction does no logging, and therefore does not need the MO lock. // A readonly transaction does no logging, and therefore does not need the MO lock.
holds_mo_lock = true; holds_mo_lock = true;
if (toku_txn_has_spilled_rollback(tokutxn)) { if (toku_is_big_tokutxn(tokutxn)) {
low_priority = true; low_priority = true;
toku_low_priority_multi_operation_client_lock(); toku_low_priority_multi_operation_client_lock();
} else { } else {
...@@ -602,3 +602,11 @@ void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) { ...@@ -602,3 +602,11 @@ void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) {
void toku_increase_last_xid(DB_ENV *env, uint64_t increment) { void toku_increase_last_xid(DB_ENV *env, uint64_t increment) {
toku_txn_manager_increase_last_xid(toku_logger_get_txn_manager(env->i->logger), increment); toku_txn_manager_increase_last_xid(toku_logger_get_txn_manager(env->i->logger), increment);
} }
bool toku_is_big_txn(DB_TXN *txn) {
return toku_is_big_tokutxn(db_txn_struct_i(txn)->tokutxn);
}
bool toku_is_big_tokutxn(TOKUTXN tokutxn) {
return toku_txn_has_spilled_rollback(tokutxn);
}
...@@ -107,6 +107,9 @@ int locked_txn_abort(DB_TXN *txn); ...@@ -107,6 +107,9 @@ int locked_txn_abort(DB_TXN *txn);
void toku_keep_prepared_txn_callback(DB_ENV *env, TOKUTXN tokutxn); void toku_keep_prepared_txn_callback(DB_ENV *env, TOKUTXN tokutxn);
bool toku_is_big_txn(DB_TXN *txn);
bool toku_is_big_tokutxn(TOKUTXN tokutxn);
// Test-only function // Test-only function
extern "C" void toku_increase_last_xid(DB_ENV *env, uint64_t increment) __attribute__((__visibility__("default"))); extern "C" void toku_increase_last_xid(DB_ENV *env, uint64_t increment) __attribute__((__visibility__("default")));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment