diff --git a/src/lock_tree/Makefile b/src/lock_tree/Makefile index 85223c569046405f747243cb35ce566d6e1e4f44..caa42253d16911916e76a7f60a89b19a79cb41a6 100644 --- a/src/lock_tree/Makefile +++ b/src/lock_tree/Makefile @@ -34,8 +34,8 @@ locktree.$(OEXT) locktree_nooverlap.$(OEXT) $(OBJS): CPPFLAGS+=-I$(TOKUROOT)incl $(LOCKTREE): $(LOCKTREE_TLOG) cp $< $@ -.PHONY: build check -build: $(LIBRARIES) +.PHONY: build local check +build local: $(LIBRARIES) locktree_nooverlap.$(OEXT): CPPFLAGS+=-DTOKU_RT_NOOVERLAPS locktree_nooverlap.$(OEXT): locktree.c $(DEPEND_COMPILE) diff --git a/src/lock_tree/locktree-internal.h b/src/lock_tree/locktree-internal.h new file mode 100644 index 0000000000000000000000000000000000000000..cbd16145a58e6fb87a278a21cb2cc0cc0f823ab6 --- /dev/null +++ b/src/lock_tree/locktree-internal.h @@ -0,0 +1,130 @@ +#if !defined(TOKU_LOCKTREE_INTERNAL_H) +#define TOKU_LOCKTREE_INTERNAL_H + +#include <rangetree.h> +#include <lth.h> +#include <rth.h> +#include <idlth.h> +#include <omt.h> + +#define TOKU_LT_USE_BORDERWRITE 1 + +struct __toku_ltm { + /** The maximum number of locks allowed for the environment. */ + uint64_t locks_limit; + /** The current number of locks for the environment. */ + uint64_t curr_locks; + /** The maximum amount of memory for locks allowed for the environment. */ + uint64_t lock_memory_limit; + /** The current amount of memory for locks for the environment. */ + uint64_t curr_lock_memory; + /** Status / accountability information */ + LTM_STATUS_S status; + /** The list of lock trees it manages. */ + toku_lth* lth; + /** List of lock-tree DB mappings. Upon a request for a lock tree given + a DB, if an object for that DB exists in this list, then the lock tree + is retrieved from this list, otherwise, a new lock tree is created + and the new mapping of DB and Lock tree is stored here */ + toku_idlth* idlth; + /** The panic function */ + int (*panic)(DB*, int); + + toku_pthread_mutex_t mutex; + bool mutex_locked; + + struct timeval lock_wait_time; +}; + +/** \brief The lock tree structure */ +struct __toku_lock_tree { + /** Lock tree manager */ + toku_ltm* mgr; + /** The database for which this locktree will be handling locks */ + DB* db; +#if TOKU_LT_USE_BORDERWRITE + toku_range_tree* borderwrite; /**< See design document */ +#endif + toku_rth* rth; /**< Stores local(read|write)set tables */ + /** Whether lock escalation is allowed. */ + bool lock_escalation_allowed; + /** Function to retrieve the key compare function from the database. */ + toku_dbt_cmp compare_fun; + /** The number of references held by DB instances and transactions to this lock tree*/ + uint32_t ref_count; + /** DICTIONARY_ID associated with the lock tree */ + DICTIONARY_ID dict_id; + OMT dbs; //The extant dbs using this lock tree. + OMT lock_requests; + toku_rth* txns_to_unlock; // set of txn's that could not release their locks because there was no db for the comparison function + + toku_pthread_mutex_t mutex; + bool mutex_locked; + + /** A temporary area where we store the results of various find on + the range trees that this lock tree owns + Memory ownership: + - tree->buf is an array of toku_range's, which the lt owns + The contents of tree->buf are volatile (this is a buffer space + that we pass around to various functions, and every time we + invoke a new function, its previous contents may become + meaningless) + - tree->buf[i].left, .right are toku_points (ultimately a struct), + also owned by lt. We gave a pointer only to this memory to the + range tree earlier when we inserted a range, but the range tree + does not own it! + - tree->buf[i].{left,right}.key_payload is owned by + the lt, we made copies from the DB at some point + */ + toku_range* buf; + uint32_t buflen; /**< The length of buf */ + toku_range* bw_buf; + uint32_t bw_buflen; + toku_range* verify_buf; + uint32_t verify_buflen; +}; + +toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn); + +toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn); + +#include "txnid_set.h" + +// internal function that finds all transactions that conflict with a given lock request +// for read lock requests +// conflicts = all transactions in the BWT that conflict with the lock request +// for write lock requests +// conflicts = all transactions in the GRT that conflict with the lock request UNION +// all transactions in the BWT that conflict with the lock request +// adds all of the conflicting transactions to the conflicts transaction set +// returns an error code (0 == success) +int toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock_request, txnid_set *conflicts); + +// returns the lock request state +toku_lock_request_state toku_lock_request_get_state(toku_lock_request *lock_request); + +/** + + \brief A 2D BDB-inspired point. + + Observe the toku_point, and marvel! + It makes the pair (key, data) into a 1-dimensional point, + on which a total order is defined by toku_lt_point_cmp. + Additionally, we have points at +infty and -infty as + key_payload = (void*) toku_lt_infinity or + key_payload = (void*) toku_lt_neg infinity + */ +struct __toku_point { + toku_lock_tree* lt; /**< The lock tree, where toku_lt_point_cmp + is defined */ + void* key_payload; /**< The key ... */ + uint32_t key_len; /**< and its length */ +}; +#if !defined(__TOKU_POINT) +#define __TOKU_POINT +typedef struct __toku_point toku_point; +#endif + +int toku_lt_point_cmp(const toku_point* x, const toku_point* y); + +#endif diff --git a/src/lock_tree/locktree.c b/src/lock_tree/locktree.c index 599b72d871aec91fff53640cd369494848e18853..76d1e9fe57d9033f4518502e43e0c2a2f617355b 100644 --- a/src/lock_tree/locktree.c +++ b/src/lock_tree/locktree.c @@ -11,6 +11,7 @@ #include <toku_portability.h> #include "memory.h" #include <locktree.h> +#include <locktree-internal.h> #include <ydb-internal.h> #include <brt-internal.h> #include <toku_stdint.h> @@ -87,16 +88,17 @@ toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS statp) { *statp = mgr->status; } -static inline int lt_panic(toku_lock_tree *tree, int r) { +static inline int +lt_panic(toku_lock_tree *tree, int r) { return tree->mgr->panic(tree->db, r); } // forward defs of lock request tree functions -static void toku_lock_request_tree_init(toku_lock_tree *tree); -static void toku_lock_request_tree_destroy(toku_lock_tree *tree); -static void toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request); -static void toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request); -static toku_lock_request *toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id); +static void lock_request_tree_init(toku_lock_tree *tree); +static void lock_request_tree_destroy(toku_lock_tree *tree); +static void lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request); +static void lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request); +static toku_lock_request *lock_request_tree_find(toku_lock_tree *tree, TXNID id); const uint32_t __toku_default_buflen = 2; @@ -390,8 +392,8 @@ ltm_incr_lock_memory(toku_ltm *mgr, size_t s) { (void) __sync_add_and_fetch(&mgr->curr_lock_memory, s); } -void -toku_ltm_incr_lock_memory(void *extra, size_t s) { +static void +ltm_incr_lock_memory_callback(void *extra, size_t s) { toku_ltm *mgr = (toku_ltm *) extra; ltm_incr_lock_memory(mgr, s); } @@ -402,8 +404,8 @@ ltm_decr_lock_memory(toku_ltm *mgr, size_t s) { (void) __sync_sub_and_fetch(&mgr->curr_lock_memory, s); } -void -toku_ltm_decr_lock_memory(void *extra, size_t s) { +static void +ltm_decr_lock_memory_callback(void *extra, size_t s) { toku_ltm *mgr = (toku_ltm *) extra; ltm_decr_lock_memory(mgr, s); } @@ -503,7 +505,7 @@ lt_selfread(toku_lock_tree* tree, TXNID txn, toku_range_tree** pselfread) { assert(forest); if (!forest->self_read) { r = toku_rt_create(&forest->self_read, toku_lt_point_cmp, lt_txn_cmp, FALSE, - toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, tree->mgr); + ltm_incr_lock_memory_callback, ltm_decr_lock_memory_callback, tree->mgr); if (r != 0) goto cleanup; assert(forest->self_read); @@ -532,7 +534,7 @@ lt_selfwrite(toku_lock_tree* tree, TXNID txn, toku_range_tree** pselfwrite) { assert(forest); if (!forest->self_write) { r = toku_rt_create(&forest->self_write, toku_lt_point_cmp, lt_txn_cmp, FALSE, - toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, tree->mgr); + ltm_incr_lock_memory_callback, ltm_decr_lock_memory_callback, tree->mgr); if (r != 0) goto cleanup; assert(forest->self_write); @@ -589,6 +591,8 @@ lt_rt_dominates(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt, return 0; } +#if TOKU_LT_USE_BORDERWRITE + typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict; /* @@ -629,6 +633,7 @@ lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self, } return 0; } +#endif /* Determines whether 'query' meets 'rt'. @@ -1358,7 +1363,7 @@ toku_lt_create(toku_lock_tree** ptree, tmp_tree->compare_fun = compare_fun; tmp_tree->lock_escalation_allowed = TRUE; r = toku_rt_create(&tmp_tree->borderwrite, toku_lt_point_cmp, lt_txn_cmp, FALSE, - toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, mgr); + ltm_incr_lock_memory_callback, ltm_decr_lock_memory_callback, mgr); if (r != 0) goto cleanup; r = toku_rth_create(&tmp_tree->rth); @@ -1381,7 +1386,7 @@ toku_lt_create(toku_lock_tree** ptree, r = toku_omt_create(&tmp_tree->dbs); if (r != 0) goto cleanup; - toku_lock_request_tree_init(tmp_tree); + lock_request_tree_init(tmp_tree); toku_mutex_init(&tmp_tree->mutex, NULL); tmp_tree->mutex_locked = false; tmp_tree->ref_count = 1; @@ -1421,7 +1426,7 @@ toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id) { } static inline void -toku_lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id) { +lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id) { assert(lt && dict_id.dictid != DICTIONARY_ID_NONE.dictid); lt->dict_id = dict_id; } @@ -1458,7 +1463,7 @@ toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB r = toku_lt_create(&tree, mgr, compare_fun); if (r != 0) goto cleanup; - toku_lt_set_dict_id(tree, dict_id); + lt_set_dict_id(tree, dict_id); /* add tree to ltm */ r = ltm_add_lt(mgr, tree); if (r != 0) @@ -1515,7 +1520,7 @@ toku_lt_close(toku_lock_tree* tree) { } tree->mgr->STATUS_VALUE(LTM_LT_DESTROY)++; tree->mgr->STATUS_VALUE(LTM_LT_NUM)--; - toku_lock_request_tree_destroy(tree); + lock_request_tree_destroy(tree); r = toku_rt_close(tree->borderwrite); if (!first_error && r != 0) first_error = r; @@ -2050,6 +2055,7 @@ toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* k return toku_lt_acquire_range_write_lock(tree, db, txn, key, key); } +#if TOKU_LT_USE_BORDERWRITE static inline int sweep_border(toku_lock_tree* tree, toku_range* range) { assert(tree && range); @@ -2153,6 +2159,7 @@ lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) { return 0; } +#endif static inline int lt_unlock_txn(toku_lock_tree* tree, TXNID txn) { @@ -2246,7 +2253,7 @@ toku_lt_add_ref(toku_lock_tree* tree) { } static void -toku_ltm_stop_managing_lt(toku_ltm* mgr, toku_lock_tree* tree) { +ltm_stop_managing_lt(toku_ltm* mgr, toku_lock_tree* tree) { ltm_mutex_lock(mgr); ltm_remove_lt(mgr, tree); toku_lt_map* map = toku_idlth_find(mgr->idlth, tree->dict_id); @@ -2266,7 +2273,7 @@ toku_lt_remove_ref(toku_lock_tree* tree) { r = 0; goto cleanup; } assert(tree->dict_id.dictid != DICTIONARY_ID_NONE.dictid); - toku_ltm_stop_managing_lt(tree->mgr, tree); + ltm_stop_managing_lt(tree->mgr, tree); r = toku_lt_close(tree); if (r != 0) goto cleanup; @@ -2327,7 +2334,7 @@ toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) { } static void -toku_lock_request_init_wait(toku_lock_request *lock_request) { +lock_request_init_wait(toku_lock_request *lock_request) { if (!lock_request->wait_initialized) { int r = toku_pthread_cond_init(&lock_request->wait, NULL); assert_zero(r); lock_request->wait_initialized = true; @@ -2335,7 +2342,7 @@ toku_lock_request_init_wait(toku_lock_request *lock_request) { } static void -toku_lock_request_destroy_wait(toku_lock_request *lock_request) { +lock_request_destroy_wait(toku_lock_request *lock_request) { if (lock_request->wait_initialized) { int r = toku_pthread_cond_destroy(&lock_request->wait); assert_zero(r); lock_request->wait_initialized = false; @@ -2378,16 +2385,16 @@ toku_lock_request_destroy(toku_lock_request *lock_request) { if (lock_request->state == LOCK_REQUEST_PENDING) { toku_lock_tree *tree = lock_request->tree; lt_mutex_lock(tree); - toku_lock_request_tree_delete(lock_request->tree, lock_request); + lock_request_tree_delete(lock_request->tree, lock_request); lt_mutex_unlock(tree); } - toku_lock_request_destroy_wait(lock_request); + lock_request_destroy_wait(lock_request); toku_free(lock_request->key_left_copy.data); toku_free(lock_request->key_right_copy.data); } static void -toku_lock_request_complete(toku_lock_request *lock_request, int complete_r) { +lock_request_complete(toku_lock_request *lock_request, int complete_r) { lock_request->state = LOCK_REQUEST_COMPLETE; lock_request->complete_r = complete_r; } @@ -2395,7 +2402,7 @@ toku_lock_request_complete(toku_lock_request *lock_request, int complete_r) { static const struct timeval max_timeval = { ~0, 0 }; static int -toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time, bool tree_locked) { +lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time, bool tree_locked) { #if TOKU_LT_DEBUG if (toku_lt_debug) printf("%s:%u %lu\n", __FUNCTION__, __LINE__, lock_request->txnid); @@ -2411,21 +2418,21 @@ toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree struct timespec ts = { sec + d_sec, d_usec * 1000 }; if (!tree_locked) lt_mutex_lock(tree); while (lock_request->state == LOCK_REQUEST_PENDING) { - toku_lock_request_init_wait(lock_request); + lock_request_init_wait(lock_request); tree->mutex_locked = false; r = pthread_cond_timedwait(&lock_request->wait, &tree->mutex, &ts); tree->mutex_locked = true; assert(r == 0 || r == ETIMEDOUT); if (r == ETIMEDOUT && lock_request->state == LOCK_REQUEST_PENDING) { - toku_lock_request_tree_delete(tree, lock_request); - toku_lock_request_complete(lock_request, DB_LOCK_NOTGRANTED); + lock_request_tree_delete(tree, lock_request); + lock_request_complete(lock_request, DB_LOCK_NOTGRANTED); } } if (!tree_locked) lt_mutex_unlock(tree); } else { if (!tree_locked) lt_mutex_lock(tree); while (lock_request->state == LOCK_REQUEST_PENDING) { - toku_lock_request_init_wait(lock_request); + lock_request_init_wait(lock_request); tree->mutex_locked = false; r = toku_pthread_cond_wait(&lock_request->wait, &tree->mutex); assert_zero(r); tree->mutex_locked = true; @@ -2438,16 +2445,16 @@ toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree int toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time) { - return toku_lock_request_wait_internal(lock_request, tree, wait_time, false); + return lock_request_wait(lock_request, tree, wait_time, false); } int toku_lock_request_wait_with_default_timeout(toku_lock_request *lock_request, toku_lock_tree *tree) { - return toku_lock_request_wait_internal(lock_request, tree, &tree->mgr->lock_wait_time, false); + return lock_request_wait(lock_request, tree, &tree->mgr->lock_wait_time, false); } -void -toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree UU()) { +static void +lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree UU()) { if (lock_request->wait_initialized) { int r = toku_pthread_cond_broadcast(&lock_request->wait); assert_zero(r); } @@ -2456,14 +2463,14 @@ toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree U // a lock request tree contains pending lock requests. // initialize a lock request tree. static void -toku_lock_request_tree_init(toku_lock_tree *tree) { +lock_request_tree_init(toku_lock_tree *tree) { int r = toku_omt_create(&tree->lock_requests); assert_zero(r); } // destroy a lock request tree. // the tree must be empty when destroyed. static void -toku_lock_request_tree_destroy(toku_lock_tree *tree) { +lock_request_tree_destroy(toku_lock_tree *tree) { assert(toku_omt_size(tree->lock_requests) == 0); toku_omt_destroy(&tree->lock_requests); } @@ -2481,7 +2488,7 @@ compare_lock_request(OMTVALUE a, void *b) { // insert a lock request into the tree. static void -toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request) { +lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request) { lock_request->tree = tree; int r; OMTVALUE v; @@ -2492,7 +2499,7 @@ toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_requ // delete a lock request from the tree. static void -toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request) { +lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request) { int r; OMTVALUE v; u_int32_t idx; @@ -2504,7 +2511,7 @@ toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_requ // find a lock request for a given transaction id. static toku_lock_request * -toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id) { +lock_request_tree_find(toku_lock_tree *tree, TXNID id) { int r; OMTVALUE v; u_int32_t idx; @@ -2526,7 +2533,8 @@ copy_dbt(DBT *dest, const DBT *src) { #if TOKU_LT_DEBUG #include <ctype.h> -static void print_key(const char *sp, const DBT *k) { +static void +print_key(const char *sp, const DBT *k) { printf("%s", sp); if (k == toku_lt_neg_infinity) printf("-inf"); @@ -2546,10 +2554,10 @@ static void print_key(const char *sp, const DBT *k) { } #endif -static void toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request); +static void lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request); static int -toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted, bool do_escalation) { +lock_request_start(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted, bool do_escalation) { assert(lock_request->state == LOCK_REQUEST_INIT); assert(tree->mutex_locked); int r = 0; @@ -2580,14 +2588,14 @@ toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree * if (!lt_is_infinite(lock_request->key_right)) lock_request->key_right = &lock_request->key_right_copy; } - toku_lock_request_tree_insert(tree, lock_request); + lock_request_tree_insert(tree, lock_request); // check for deadlock - toku_lt_check_deadlock(tree, lock_request); + lt_check_deadlock(tree, lock_request); if (lock_request->state == LOCK_REQUEST_COMPLETE) r = lock_request->complete_r; } else - toku_lock_request_complete(lock_request, r); + lock_request_complete(lock_request, r); return r; } @@ -2595,23 +2603,23 @@ toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree * int toku_lock_request_start(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted) { lt_mutex_lock(tree); - int r = toku_lock_request_start_locked(lock_request, tree, copy_keys_if_not_granted, true); + int r = lock_request_start(lock_request, tree, copy_keys_if_not_granted, true); lt_mutex_unlock(tree); return r; } static int -toku_lt_acquire_lock_request_with_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) { - int r = toku_lock_request_start_locked(lock_request, tree, false, true); +lt_acquire_lock_request_with_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) { + int r = lock_request_start(lock_request, tree, false, true); if (r == DB_LOCK_NOTGRANTED) - r = toku_lock_request_wait_internal(lock_request, tree, wait_time, true); + r = lock_request_wait(lock_request, tree, wait_time, true); return r; } int toku_lt_acquire_lock_request_with_timeout(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) { lt_mutex_lock(tree); - int r = toku_lt_acquire_lock_request_with_timeout_locked(tree, lock_request, wait_time); + int r = lt_acquire_lock_request_with_timeout_locked(tree, lock_request, wait_time); lt_mutex_unlock(tree); return r; } @@ -2634,9 +2642,9 @@ lt_retry_lock_requests(toku_lock_tree *tree) { assert(lock_request->state == LOCK_REQUEST_PENDING); lock_request->state = LOCK_REQUEST_INIT; toku_omt_delete_at(tree->lock_requests, i); - r = toku_lock_request_start_locked(lock_request, tree, false, false); + r = lock_request_start(lock_request, tree, false, false); if (lock_request->state == LOCK_REQUEST_COMPLETE) { - toku_lock_request_wakeup(lock_request, tree); + lock_request_wakeup(lock_request, tree); } else { assert(lock_request->state == LOCK_REQUEST_PENDING); i++; @@ -2644,13 +2652,6 @@ lt_retry_lock_requests(toku_lock_tree *tree) { } } -void -toku_lt_retry_lock_requests(toku_lock_tree *tree) { - lt_mutex_lock(tree); - lt_retry_lock_requests(tree); - lt_mutex_unlock(tree); -} - #include <stdbool.h> #include "wfg.h" @@ -2665,7 +2666,7 @@ build_wfg_for_a_lock_request(toku_lock_tree *tree, struct wfg *wfg, toku_lock_re size_t n_conflicts = txnid_set_size(&conflicts); for (size_t i = 0; i < n_conflicts; i++) { TXNID b = txnid_set_get(&conflicts, i); - toku_lock_request *b_lock_request = toku_lock_request_tree_find(tree, b); + toku_lock_request *b_lock_request = lock_request_tree_find(tree, b); if (b_lock_request) { bool b_exists = wfg_node_exists(wfg, b); wfg_add_edge(wfg, a_lock_request->txnid, b); @@ -2678,7 +2679,7 @@ build_wfg_for_a_lock_request(toku_lock_tree *tree, struct wfg *wfg, toku_lock_re // check if a given lock request could deadlock with any granted locks. static void -toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request) { +lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request) { // init the wfg struct wfg wfg_static; struct wfg *wfg = &wfg_static; wfg_init(wfg); @@ -2695,9 +2696,9 @@ toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request) // wakeup T's lock request if (wfg_exist_cycle_from_txnid(wfg, a_lock_request->txnid)) { assert(a_lock_request->state == LOCK_REQUEST_PENDING); - toku_lock_request_complete(a_lock_request, DB_LOCK_DEADLOCK); - toku_lock_request_tree_delete(tree, a_lock_request); - toku_lock_request_wakeup(a_lock_request, tree); + lock_request_complete(a_lock_request, DB_LOCK_DEADLOCK); + lock_request_tree_delete(tree, a_lock_request); + lock_request_wakeup(a_lock_request, tree); } // destroy the wfg diff --git a/src/lock_tree/locktree.h b/src/lock_tree/locktree.h index 96073718edef58eb24e18d27f9b1a74fb94c0505..984d6b6412a7613e2e5d46d04705ae56f2f7fcb5 100644 --- a/src/lock_tree/locktree.h +++ b/src/lock_tree/locktree.h @@ -6,6 +6,10 @@ #if !defined(TOKU_LOCKTREE_H) #define TOKU_LOCKTREE_H +#include <stdbool.h> +#include <db.h> +#include <brttypes.h> + /** \file locktree.h \brief Lock trees: header and comments @@ -17,16 +21,6 @@ each other, due to some system error like failed malloc, we defer to the db panic handler. Pass in another parameter to do this. */ -#include <stdbool.h> -#include <db.h> -#include <brttypes.h> -#include <rangetree.h> -#include <lth.h> -#include <rth.h> -#include <idlth.h> -#include <omt.h> -#include "toku_pthread.h" -#include "toku_assert.h" #if defined(__cplusplus) extern "C" { @@ -53,135 +47,77 @@ typedef struct __toku_lock_tree toku_lock_tree; typedef struct __toku_lth toku_lth; #endif -#define TOKU_LT_USE_BORDERWRITE 1 - typedef struct __toku_ltm toku_ltm; -/** \brief The lock tree structure */ -struct __toku_lock_tree { - /** Lock tree manager */ - toku_ltm* mgr; - /** The database for which this locktree will be handling locks */ - DB* db; - toku_range_tree* borderwrite; /**< See design document */ - toku_rth* rth; /**< Stores local(read|write)set tables */ - /** Whether lock escalation is allowed. */ - bool lock_escalation_allowed; - /** Function to retrieve the key compare function from the database. */ - toku_dbt_cmp compare_fun; - /** The number of references held by DB instances and transactions to this lock tree*/ - uint32_t ref_count; - /** DICTIONARY_ID associated with the lock tree */ - DICTIONARY_ID dict_id; - OMT dbs; //The extant dbs using this lock tree. - OMT lock_requests; - toku_rth* txns_to_unlock; // set of txn's that could not release their locks because there was no db for the comparison function - - toku_pthread_mutex_t mutex; - bool mutex_locked; - - /** A temporary area where we store the results of various find on - the range trees that this lock tree owns - Memory ownership: - - tree->buf is an array of toku_range's, which the lt owns - The contents of tree->buf are volatile (this is a buffer space - that we pass around to various functions, and every time we - invoke a new function, its previous contents may become - meaningless) - - tree->buf[i].left, .right are toku_points (ultimately a struct), - also owned by lt. We gave a pointer only to this memory to the - range tree earlier when we inserted a range, but the range tree - does not own it! - - tree->buf[i].{left,right}.key_payload is owned by - the lt, we made copies from the DB at some point - */ - toku_range* buf; - uint32_t buflen; /**< The length of buf */ - toku_range* bw_buf; - uint32_t bw_buflen; - toku_range* verify_buf; - uint32_t verify_buflen; -}; +/* Lock tree manager functions begin here */ -typedef enum { - LTM_LOCKS_LIMIT, // number of locks allowed (obsolete) - LTM_LOCKS_CURR, // number of locks in existence - LTM_LOCK_MEMORY_LIMIT, // maximum amount of memory allowed for locks - LTM_LOCK_MEMORY_CURR, // maximum amount of memory allowed for locks - LTM_LOCK_ESCALATION_SUCCESSES, // number of times lock escalation succeeded - LTM_LOCK_ESCALATION_FAILURES, // number of times lock escalation failed - LTM_READ_LOCK, // number of times read lock taken successfully - LTM_READ_LOCK_FAIL, // number of times read lock denied - LTM_OUT_OF_READ_LOCKS, // number of times read lock denied for out_of_locks - LTM_WRITE_LOCK, // number of times write lock taken successfully - LTM_WRITE_LOCK_FAIL, // number of times write lock denied - LTM_OUT_OF_WRITE_LOCKS, // number of times write lock denied for out_of_locks - LTM_LT_CREATE, // number of locktrees created - LTM_LT_CREATE_FAIL, // number of locktrees unable to be created - LTM_LT_DESTROY, // number of locktrees destroyed - LTM_LT_NUM, // number of locktrees (should be created - destroyed) - LTM_LT_NUM_MAX, // max number of locktrees that have existed simultaneously - LTM_STATUS_NUM_ROWS -} ltm_status_entry; +/** + Creates a lock tree manager. + + \param pmgr A buffer for the new lock tree manager. + \param locks_limit The maximum number of locks. -typedef struct { - BOOL initialized; - TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS]; -} LTM_STATUS_S, *LTM_STATUS; + \return + - 0 on success. + - EINVAL if any pointer parameter is NULL. + - May return other errors due to system calls. +*/ +int toku_ltm_create(toku_ltm** pmgr, + uint32_t locks_limit, + uint64_t lock_memory_limit, + int (*panic)(DB*, int)); + +/** Open the lock tree manager */ +int toku_ltm_open(toku_ltm *mgr); + +/** + Closes and frees a lock tree manager.. + + \param mgr The lock tree manager. -struct __toku_ltm { - /** The maximum number of locks allowed for the environment. */ - uint64_t locks_limit; - /** The current number of locks for the environment. */ - uint64_t curr_locks; - /** The maximum amount of memory for locks allowed for the environment. */ - uint64_t lock_memory_limit; - /** The current amount of memory for locks for the environment. */ - uint64_t curr_lock_memory; - /** Status / accountability information */ - LTM_STATUS_S status; - /** The list of lock trees it manages. */ - toku_lth* lth; - /** List of lock-tree DB mappings. Upon a request for a lock tree given - a DB, if an object for that DB exists in this list, then the lock tree - is retrieved from this list, otherwise, a new lock tree is created - and the new mapping of DB and Lock tree is stored here */ - toku_idlth* idlth; - /** The panic function */ - int (*panic)(DB*, int); - - toku_pthread_mutex_t mutex; - bool mutex_locked; - - struct timeval lock_wait_time; -}; - -extern const DBT* const toku_lt_infinity; /**< Special value denoting - +infty */ -extern const DBT* const toku_lt_neg_infinity; /**< Special value denoting - -infty */ + \return + - 0 on success. + - EINVAL if any pointer parameter is NULL. + - May return other errors due to system calls. +*/ +int toku_ltm_close(toku_ltm* mgr); /** + Sets the maximum number of locks on the lock tree manager. + + \param mgr The lock tree manager to which to set locks_limit. + \param locks_limit The new maximum number of locks. + + \return + - 0 on success. + - EINVAL if tree is NULL or locks_limit is 0 + - EDOM if locks_limit is less than the number of locks held by any lock tree + held by the manager +*/ +int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit); - \brief A 2D BDB-inspired point. +int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit); - Observe the toku_point, and marvel! - It makes the pair (key, data) into a 1-dimensional point, - on which a total order is defined by toku_lt_point_cmp. - Additionally, we have points at +infty and -infty as - key_payload = (void*) toku_lt_infinity or - key_payload = (void*) toku_lt_neg infinity - */ -struct __toku_point { - toku_lock_tree* lt; /**< The lock tree, where toku_lt_point_cmp - is defined */ - void* key_payload; /**< The key ... */ - uint32_t key_len; /**< and its length */ -}; -#if !defined(__TOKU_POINT) -#define __TOKU_POINT -typedef struct __toku_point toku_point; -#endif +int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit); + +int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit); + +// set the default lock timeout. units are milliseconds +void toku_ltm_set_lock_wait_time(toku_ltm *mgr, uint64_t lock_wait_time_msec); + +// get the default lock timeout +void toku_ltm_get_lock_wait_time(toku_ltm *mgr, uint64_t *lock_wait_time_msec); + +/** + Gets a lock tree for a given DB with id dict_id +*/ +int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB *dbp, toku_dbt_cmp compare_fun); + +void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id); + +extern const DBT* const toku_lt_infinity; /**< Special value denoting +infty */ + +extern const DBT* const toku_lt_neg_infinity; /**< Special value denoting -infty */ /** Create a lock tree. Should be called only inside DB->open. @@ -371,93 +307,14 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, */ int toku_lt_unlock_txn(toku_lock_tree* tree, TXNID txn); -void toku_lt_retry_lock_requests(toku_lock_tree *tree); - void toku_lt_add_ref(toku_lock_tree* tree); int toku_lt_remove_ref(toku_lock_tree* tree); void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db); -toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn); - -toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn); - void toku_lt_verify(toku_lock_tree *tree, DB *db); -int toku_lt_point_cmp(const toku_point* x, const toku_point* y); - -/* Lock tree manager functions begin here */ - -/** - Creates a lock tree manager. - - \param pmgr A buffer for the new lock tree manager. - \param locks_limit The maximum number of locks. - - \return - - 0 on success. - - EINVAL if any pointer parameter is NULL. - - May return other errors due to system calls. -*/ -int toku_ltm_create(toku_ltm** pmgr, - uint32_t locks_limit, - uint64_t lock_memory_limit, - int (*panic)(DB*, int)); - -/** Open the lock tree manager */ -int toku_ltm_open(toku_ltm *mgr); - -/** - Closes and frees a lock tree manager.. - - \param mgr The lock tree manager. - - \return - - 0 on success. - - EINVAL if any pointer parameter is NULL. - - May return other errors due to system calls. -*/ -int toku_ltm_close(toku_ltm* mgr); - -/** - Sets the maximum number of locks on the lock tree manager. - - \param mgr The lock tree manager to which to set locks_limit. - \param locks_limit The new maximum number of locks. - - \return - - 0 on success. - - EINVAL if tree is NULL or locks_limit is 0 - - EDOM if locks_limit is less than the number of locks held by any lock tree - held by the manager -*/ -int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit); - -int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit); - -int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit); - -int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit); - -void toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS s); - -// set the default lock timeout. units are milliseconds -void toku_ltm_set_lock_wait_time(toku_ltm *mgr, uint64_t lock_wait_time_msec); - -// get the default lock timeout -void toku_ltm_get_lock_wait_time(toku_ltm *mgr, uint64_t *lock_wait_time_msec); - -/** - Gets a lock tree for a given DB with id dict_id -*/ -int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB *dbp, toku_dbt_cmp compare_fun); - -void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id); - -void toku_ltm_incr_lock_memory(void *extra, size_t s); -void toku_ltm_decr_lock_memory(void *extra, size_t s); - typedef enum { LOCK_REQUEST_INIT = 0, LOCK_REQUEST_PENDING = 1, @@ -471,6 +328,19 @@ typedef enum { LOCK_REQUEST_WRITE = 2, } toku_lock_type; +#include "toku_pthread.h" + +// a lock request contains the db, the key range, the lock type, and the transaction id that describes a potential row range lock. +// the typical use case is: +// - initialize a lock request +// - start to try to acquire the lock +// - do something else +// - wait for the lock request to be resolved on the wait condition variable and a timeout. +// - destroy the lock request +// a lock request is resolved when its state is no longer pending, or when it becomes granted, or timedout, or deadlocked. +// when resolved, the state of the lock request is changed and any waiting threads are awakened. + +// this is exposed so that we can allocate these as local variables. don't touch typedef struct { DB *db; TXNID txnid; @@ -484,16 +354,6 @@ typedef struct { bool wait_initialized; } toku_lock_request; -// a lock request contains the db, the key range, the lock type, and the transaction id that describes a potential row range lock. -// the typical use case is: -// - initialize a lock request -// - start to try to acquire the lock -// - do something else -// - wait for the lock request to be resolved on the wait condition variable and a timeout. -// - destroy the lock request -// a lock request is resolved when its state is no longer pending, or when it becomes granted, or timedout, or deadlocked. -// when resolved, the state of the lock request is changed and any waiting threads are awakened. - // initialize a lock request (default initializer). void toku_lock_request_default_init(toku_lock_request *lock_request); @@ -519,12 +379,6 @@ int toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree int toku_lock_request_wait_with_default_timeout(toku_lock_request *lock_request, toku_lock_tree *tree); -// wakeup any threads that are waiting on a lock request. -void toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree); - -// returns the lock request state -toku_lock_request_state toku_lock_request_get_state(toku_lock_request *lock_request); - // try to acquire a lock described by a lock request. if the lock is granted then return success. // otherwise wait on the lock request until the lock request is resolved (either granted or // deadlocks), or the given timer has expired. @@ -533,17 +387,33 @@ int toku_lt_acquire_lock_request_with_timeout(toku_lock_tree *tree, toku_lock_re int toku_lt_acquire_lock_request_with_default_timeout(toku_lock_tree *tree, toku_lock_request *lock_request); -#include "txnid_set.h" - -// internal function that finds all transactions that conflict with a given lock request -// for read lock requests -// conflicts = all transactions in the BWT that conflict with the lock request -// for write lock requests -// conflicts = all transactions in the GRT that conflict with the lock request UNION -// all transactions in the BWT that conflict with the lock request -// adds all of the conflicting transactions to the conflicts transaction set -// returns an error code (0 == success) -int toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock_request, txnid_set *conflicts); +typedef enum { + LTM_LOCKS_LIMIT, // number of locks allowed (obsolete) + LTM_LOCKS_CURR, // number of locks in existence + LTM_LOCK_MEMORY_LIMIT, // maximum amount of memory allowed for locks + LTM_LOCK_MEMORY_CURR, // maximum amount of memory allowed for locks + LTM_LOCK_ESCALATION_SUCCESSES, // number of times lock escalation succeeded + LTM_LOCK_ESCALATION_FAILURES, // number of times lock escalation failed + LTM_READ_LOCK, // number of times read lock taken successfully + LTM_READ_LOCK_FAIL, // number of times read lock denied + LTM_OUT_OF_READ_LOCKS, // number of times read lock denied for out_of_locks + LTM_WRITE_LOCK, // number of times write lock taken successfully + LTM_WRITE_LOCK_FAIL, // number of times write lock denied + LTM_OUT_OF_WRITE_LOCKS, // number of times write lock denied for out_of_locks + LTM_LT_CREATE, // number of locktrees created + LTM_LT_CREATE_FAIL, // number of locktrees unable to be created + LTM_LT_DESTROY, // number of locktrees destroyed + LTM_LT_NUM, // number of locktrees (should be created - destroyed) + LTM_LT_NUM_MAX, // max number of locktrees that have existed simultaneously + LTM_STATUS_NUM_ROWS +} ltm_status_entry; + +typedef struct { + BOOL initialized; + TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS]; +} LTM_STATUS_S, *LTM_STATUS; + +void toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS s); #if defined(__cplusplus) } diff --git a/src/lock_tree/lth.h b/src/lock_tree/lth.h index c9f089b234ea841005cf676775c954cda98b765f..5d3f871139d7bafd4d2f7fafd2e644fa212b4f3f 100644 --- a/src/lock_tree/lth.h +++ b/src/lock_tree/lth.h @@ -15,7 +15,6 @@ //Defines bool data type. #include <db.h> #include <brttypes.h> -#include <locktree.h> #if defined(__cplusplus) extern "C" { diff --git a/src/lock_tree/tests/test.h b/src/lock_tree/tests/test.h index 821437a49ccdaa397d53f3d668e1ae9a2d8b1f66..e55ce691e277027e7f65af65a3976dfb8ec16fd5 100644 --- a/src/lock_tree/tests/test.h +++ b/src/lock_tree/tests/test.h @@ -1,6 +1,7 @@ #include <toku_portability.h> #include <string.h> #include <locktree.h> +#include <locktree-internal.h> #include <db.h> #include <brttypes.h> #include <stdlib.h> diff --git a/src/ydb_row_lock.c b/src/ydb_row_lock.c index 6a600a3ca4c0b8680e2b281513ba278385fb2faa..0597e03f20b1bded7620282f920f3d6c9713b53f 100755 --- a/src/ydb_row_lock.c +++ b/src/ydb_row_lock.c @@ -6,12 +6,12 @@ #include <db.h> #include "ydb-internal.h" #include "ydb_row_lock.h" +#include "lth.h" static int toku_txn_add_lt(DB_TXN* txn, toku_lock_tree* lt) { int r = ENOSYS; assert(txn && lt); - toku_mutex_lock(<->mgr->mutex); toku_lth* lth = db_txn_struct_i(txn)->lth; // we used to initialize the transaction's lth during begin. // Now we initialize the lth only if the transaction needs the lth, here @@ -33,7 +33,6 @@ toku_txn_add_lt(DB_TXN* txn, toku_lock_tree* lt) { toku_lt_add_ref(lt); r = 0; cleanup: - toku_mutex_unlock(<->mgr->mutex); return r; } diff --git a/src/ydb_txn.c b/src/ydb_txn.c index 41abf574fbe0ae53bf0e1ba64dc639112444eb1d..37c79caa249e2d8b864400c3ad1fb11ff1dbb053 100644 --- a/src/ydb_txn.c +++ b/src/ydb_txn.c @@ -8,6 +8,7 @@ #include "checkpoint.h" #include "log_header.h" #include "ydb_txn.h" +#include "lth.h" #include <valgrind/helgrind.h> static int