Commit e3a289e8 authored by Yoni Fogel's avatar Yoni Fogel

[t:4844] closes #4844 Fix deadlock issue with row locks, add minor optimizations, and cleanup

Deleted duplicate function, added some optimizations to make 'no lock conflict' faster
since it's the common case.

Cleaned up the way lock tree row lock conflicts are tested (only for the new test and one existing one)


git-svn-id: file:///svn/toku/tokudb@43303 c7de825b-a66e-492c-adef-691d508d4ae1
parent cadf4f16
...@@ -572,6 +572,13 @@ lt_rt_dominates(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt, ...@@ -572,6 +572,13 @@ lt_rt_dominates(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt,
#if TOKU_LT_USE_BORDERWRITE #if TOKU_LT_USE_BORDERWRITE
static inline bool
interval_strictly_internal(toku_interval* query, toku_interval* to) {
assert(query && to);
return (bool)(toku_lt_point_cmp(query->left, to->left) > 0 &&
toku_lt_point_cmp(query->right, to->right) < 0);
}
typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict; typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
/* /*
...@@ -580,9 +587,12 @@ typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_con ...@@ -580,9 +587,12 @@ typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_con
If >= 2 ranges overlap the query then, by definition of borderwrite, If >= 2 ranges overlap the query then, by definition of borderwrite,
at least one overlapping regions must not be 'self'. Design document at least one overlapping regions must not be 'self'. Design document
explains why this MUST cause a conflict. explains why this MUST cause a conflict.
If exactly one range overlaps and its data == self, there is no conflict. If exactly one border_range overlaps and its data == self, there is no conflict.
If exactly one range overlaps and its data != self, there might be a If exactly one border_range overlaps and its data != self:
conflict. We need to check the 'peer'write table to verify. - If the query range overlaps one of the endpoints of border_range,
there must be a conflict
- Otherwise (query range is strictly internal to border_range),
we need to check the 'peer'write table to determine if there is a conflict or not.
*/ */
static inline int static inline int
lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self, lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
...@@ -602,13 +612,28 @@ lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self, ...@@ -602,13 +612,28 @@ lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
if (r != 0) if (r != 0)
return r; return r;
assert(numfound <= query_size); assert(numfound <= query_size);
if (numfound == 2) if (numfound == 0)
*conflict = TOKU_YES_CONFLICT;
else if (numfound == 0 || !lt_txn_cmp(buf[0].data, self))
*conflict = TOKU_NO_CONFLICT; *conflict = TOKU_NO_CONFLICT;
else { else if (numfound == 1) {
toku_interval* border_range = &buf[0].ends;
TXNID border_txn = buf[0].data;
if (!lt_txn_cmp(border_txn, self))
*conflict = TOKU_NO_CONFLICT;
else if (interval_strictly_internal(query, border_range)) {
// Only the end-points of border_range are known to be locked.
// We need to look at the self_write tree to determine
// if there is a conflict or not.
*conflict = TOKU_MAYBE_CONFLICT; *conflict = TOKU_MAYBE_CONFLICT;
*peer = buf[0].data; *peer = border_txn;
}
else
*conflict = TOKU_YES_CONFLICT;
}
else {
// query overlaps >= 2 border ranges and therefore overlaps end points
// of >= 2 border_ranges with different transactions (at least one must
// conflict).
*conflict = TOKU_YES_CONFLICT;
} }
return 0; return 0;
} }
...@@ -646,35 +671,6 @@ lt_meets(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt, bool* ...@@ -646,35 +671,6 @@ lt_meets(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt, bool*
return 0; return 0;
} }
/*
Determines whether 'query' meets 'rt' at txn2 not equal to txn.
This function supports all range trees, but queries must either be a single point,
or the range tree is homogenous.
Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document.
*/
static inline int
lt_meets_peer(toku_lock_tree* tree, toku_interval* query,
toku_range_tree* rt, bool is_homogenous,
TXNID self, bool* met) {
assert(tree && query && rt && met);
assert(query->left == query->right || is_homogenous);
const uint32_t query_size = is_homogenous ? 1 : 2;
toku_range buffer[2];
uint32_t buflen = query_size;
toku_range* buf = &buffer[0];
uint32_t numfound;
int r;
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r != 0)
return r;
assert(numfound <= query_size);
*met = (bool) (numfound == 2 || (numfound == 1 && lt_txn_cmp(buf[0].data, self)));
return 0;
}
/* Checks for if a write range conflicts with reads. /* Checks for if a write range conflicts with reads.
Supports ranges. */ Supports ranges. */
static inline int static inline int
...@@ -686,7 +682,7 @@ lt_write_range_conflicts_reads(toku_lock_tree* tree, TXNID txn, toku_interval* q ...@@ -686,7 +682,7 @@ lt_write_range_conflicts_reads(toku_lock_tree* tree, TXNID txn, toku_interval* q
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) { if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) {
r = lt_meets_peer(tree, query, forest->self_read, TRUE, txn, &met); r = lt_meets(tree, query, forest->self_read, &met);
if (r != 0) if (r != 0)
goto cleanup; goto cleanup;
if (met) { if (met) {
...@@ -710,7 +706,7 @@ lt_write_range_conflicts_writes(toku_lock_tree* tree, TXNID txn, toku_interval* ...@@ -710,7 +706,7 @@ lt_write_range_conflicts_writes(toku_lock_tree* tree, TXNID txn, toku_interval*
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_write != NULL && lt_txn_cmp(forest->hash_key, txn)) { if (forest->self_write != NULL && lt_txn_cmp(forest->hash_key, txn)) {
r = lt_meets_peer(tree, query, forest->self_write, TRUE, txn, &met); r = lt_meets(tree, query, forest->self_write, &met);
if (r != 0) if (r != 0)
goto cleanup; goto cleanup;
if (met) { if (met) {
...@@ -752,10 +748,10 @@ lt_check_borderwrite_conflict(toku_lock_tree* tree, TXNID txn, toku_interval* qu ...@@ -752,10 +748,10 @@ lt_check_borderwrite_conflict(toku_lock_tree* tree, TXNID txn, toku_interval* qu
return r; return r;
conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT; conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT;
} }
if (conflict == TOKU_YES_CONFLICT) if (conflict == TOKU_NO_CONFLICT)
return DB_LOCK_NOTGRANTED;
assert(conflict == TOKU_NO_CONFLICT);
return 0; return 0;
assert(conflict == TOKU_YES_CONFLICT);
return DB_LOCK_NOTGRANTED;
#else #else
int r = lt_write_range_conflicts_writes(tree, txn, query); int r = lt_write_range_conflicts_writes(tree, txn, query);
return r; return r;
...@@ -2551,7 +2547,8 @@ find_read_conflicts(toku_lock_tree *tree, toku_interval *query, TXNID id, txnid_ ...@@ -2551,7 +2547,8 @@ find_read_conflicts(toku_lock_tree *tree, toku_interval *query, TXNID id, txnid_
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, id)) { if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, id)) {
numfound = 0; numfound = 0;
int r = toku_rt_find(forest->self_read, query, 0, range_ptr, n_expected_ranges_ptr, &numfound); // All ranges in a self_read tree have the same txn
int r = toku_rt_find(forest->self_read, query, 1, range_ptr, n_expected_ranges_ptr, &numfound);
if (r == 0) if (r == 0)
add_conflicts(conflicts, *range_ptr, numfound, id); add_conflicts(conflicts, *range_ptr, numfound, id);
} }
...@@ -2585,11 +2582,28 @@ toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock ...@@ -2585,11 +2582,28 @@ toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock
uint32_t numfound = 0; uint32_t numfound = 0;
r = toku_rt_find(tree->borderwrite, &query, 0, &ranges, &n_expected_ranges, &numfound); r = toku_rt_find(tree->borderwrite, &query, 0, &ranges, &n_expected_ranges, &numfound);
if (r == 0) { if (r == 0) {
bool false_positive = false;
if (numfound == 1 && interval_strictly_internal(&query, &ranges[0].ends)) {
toku_range_tree* peer_selfwrite = toku_lt_ifexist_selfwrite(tree, ranges[0].data);
if (!peer_selfwrite) {
r = lt_panic(tree, TOKU_LT_INCONSISTENT);
goto cleanup;
}
bool met;
r = lt_meets(tree, &query, peer_selfwrite, &met);
if (r != 0)
goto cleanup;
false_positive = !met;
}
if (!false_positive) {
for (uint32_t i = 0; i < numfound; i++) for (uint32_t i = 0; i < numfound; i++)
if (ranges[i].data != lock_request->txnid) if (ranges[i].data != lock_request->txnid)
txnid_set_add(conflicts, ranges[i].data); txnid_set_add(conflicts, ranges[i].data);
} }
}
cleanup:
if (ranges) if (ranges)
toku_free(ranges); toku_free(ranges);
......
...@@ -102,3 +102,79 @@ static inline void init_point(toku_point* point, toku_lock_tree* tree) { ...@@ -102,3 +102,79 @@ static inline void init_point(toku_point* point, toku_lock_tree* tree) {
point->lt = tree; point->lt = tree;
} }
#define READ_REQUEST(TXN, KEY) \
toku_lock_request TXN ## _r_ ## KEY; \
toku_lock_request_init(&TXN ## _r_ ## KEY, txn_ ## TXN, &key_ ## KEY, &key_ ## KEY, LOCK_REQUEST_READ);
#define WRITE_REQUEST(TXN, KEY) \
toku_lock_request TXN ## _w_ ## KEY; \
toku_lock_request_init(&TXN ## _w_ ## KEY, txn_ ## TXN, &key_ ## KEY, &key_ ## KEY, LOCK_REQUEST_WRITE)
static inline void
verify_txnid_set_sorted(txnid_set *txns) {
size_t n = txnid_set_size(txns);
for (size_t i = 1; i < n; i++)
assert(txnid_set_get(txns, i) > txnid_set_get(txns, i-1));
}
static inline void
verify_and_clean_finished_request(toku_lock_tree *lt, toku_lock_request *request) {
int r;
txnid_set conflicts;
assert(request->state == LOCK_REQUEST_COMPLETE);
assert(request->complete_r == 0);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, request, &conflicts);
assert(r == 0);
assert(txnid_set_size(&conflicts) == 0);
txnid_set_destroy(&conflicts);
toku_lock_request_destroy(request);
}
static inline void
do_request_and_succeed(toku_lock_tree *lt, toku_lock_request *request) {
int r;
r = toku_lock_request_start(request, lt, false);
CKERR(r);
verify_and_clean_finished_request(lt, request);
}
static inline void
request_still_blocked(
toku_lock_tree *lt,
toku_lock_request *request,
size_t num_conflicts,
TXNID conflicting_txns[num_conflicts]) {
int r;
txnid_set conflicts;
assert(request->state == LOCK_REQUEST_PENDING);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, request, &conflicts);
CKERR(r);
assert(txnid_set_size(&conflicts) == num_conflicts);
verify_txnid_set_sorted(&conflicts);
size_t i;
for (i = 0; i < num_conflicts; i++) {
assert(txnid_set_get(&conflicts, i) == conflicting_txns[i]);
}
txnid_set_destroy(&conflicts);
}
static inline void
do_request_that_blocks(
toku_lock_tree *lt,
toku_lock_request *request,
int num_conflicts,
TXNID conflicting_txns[num_conflicts]) {
int r;
r = toku_lock_request_start(request, lt, false);
CKERR2(r, DB_LOCK_NOTGRANTED);
request_still_blocked(lt, request, num_conflicts, conflicting_txns);
}
// See #4844
//
// T(A) gets R(1)
// T(B) gets W(3)
// T(B) gets W(7)
// T(C) gets R(5)
// T(A) trys W(5) blocked
// T(A) gets conflicts { C }
// T(B) trys W(1) blocked
// T(B) gets conflicts { A }
// T(C) releases locks
// T(A) gets W(5)
// T(A) releases locks
// T(B) gets W(1)
#include "test.h"
int main(int argc, const char *argv[]) {
int r;
uint32_t max_locks = 4;
uint64_t max_lock_memory = 4096;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) {
if (verbose > 0) verbose--;
continue;
}
if (strcmp(argv[i], "--max_locks") == 0 && i+1 < argc) {
max_locks = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoi(argv[++i]);
continue;
}
assert(0);
}
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_ltm_get_lt(ltm, &lt, (DICTIONARY_ID){1}, NULL, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
const TXNID txn_b = 2;
const TXNID txn_c = 3;
DBT key_1; dbt_init(&key_1, "1", 1);
DBT key_3; dbt_init(&key_3, "3", 1);
DBT key_5; dbt_init(&key_5, "5", 1);
DBT key_7; dbt_init(&key_7, "7", 1);
READ_REQUEST(a, 1);
WRITE_REQUEST(b, 3);
WRITE_REQUEST(b, 7);
READ_REQUEST(c, 5);
WRITE_REQUEST(a, 5);
WRITE_REQUEST(b, 1);
do_request_and_succeed(lt, &a_r_1);
do_request_and_succeed(lt, &b_w_3);
do_request_and_succeed(lt, &b_w_7);
do_request_and_succeed(lt, &c_r_5);
do_request_that_blocks(lt, &a_w_5, 1, (TXNID[]){ txn_c });
do_request_that_blocks(lt, &b_w_1, 1, (TXNID[]){ txn_a });
r = toku_lt_unlock_txn(lt, txn_c);
CKERR(r);
verify_and_clean_finished_request(lt, &a_w_5);
r = toku_lt_unlock_txn(lt, txn_a);
CKERR(r);
verify_and_clean_finished_request(lt, &b_w_1);
r = toku_lt_unlock_txn(lt, txn_b);
CKERR(r);
// shutdown
toku_lt_remove_db_ref(lt);
r = toku_ltm_close(ltm); assert(r == 0);
return 0;
}
// T(A) gets R(TABLE) // T(A) gets R(TABLE)
// T(B) gets R(TABLE) // T(B) gets R(L)
// T(C) trys W(L) blocked // T(C) trys W(L) blocked
// T(C) gets conflicts { A, B } // T(C) gets conflicts { A, B }
// T(A) releases locks // T(A) releases locks
...@@ -57,52 +57,24 @@ int main(int argc, const char *argv[]) { ...@@ -57,52 +57,24 @@ int main(int argc, const char *argv[]) {
const TXNID txn_a = 1; const TXNID txn_a = 1;
toku_lock_request a_r_t; toku_lock_request_init(&a_r_t, txn_a, toku_lt_neg_infinity, toku_lt_infinity, LOCK_REQUEST_READ); toku_lock_request a_r_t; toku_lock_request_init(&a_r_t, txn_a, toku_lt_neg_infinity, toku_lt_infinity, LOCK_REQUEST_READ);
r = toku_lock_request_start(&a_r_t, lt, false); assert(r == 0);
assert(a_r_t.state == LOCK_REQUEST_COMPLETE && a_r_t.complete_r == 0); do_request_and_succeed(lt, &a_r_t);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, &a_r_t, &conflicts);
assert(r == 0);
assert(txnid_set_size(&conflicts) == 0);
txnid_set_destroy(&conflicts);
toku_lock_request_destroy(&a_r_t);
const TXNID txn_b = 2; const TXNID txn_b = 2;
toku_lock_request b_r_l; toku_lock_request_init(&b_r_l, txn_b, &key_l, &key_l, LOCK_REQUEST_READ); READ_REQUEST(b, l);
r = toku_lock_request_start(&b_r_l, lt, false); assert(r == 0); do_request_and_succeed(lt, &b_r_l);
assert(b_r_l.state == LOCK_REQUEST_COMPLETE && b_r_l.complete_r == 0);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, &b_r_l, &conflicts);
assert(r == 0);
assert(txnid_set_size(&conflicts) == 0);
txnid_set_destroy(&conflicts);
toku_lock_request_destroy(&b_r_l);
const TXNID txn_c = 3; const TXNID txn_c = 3;
toku_lock_request c_w_l; toku_lock_request_init(&c_w_l, txn_c, &key_l, &key_l, LOCK_REQUEST_WRITE); WRITE_REQUEST(c, l);
r = toku_lock_request_start(&c_w_l, lt, false); assert(r != 0); do_request_that_blocks(lt, &c_w_l, 2, (TXNID[]){ txn_a, txn_b });
assert(c_w_l.state == LOCK_REQUEST_PENDING);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, &c_w_l, &conflicts);
assert(r == 0);
assert(txnid_set_size(&conflicts) == 2);
sortit(&conflicts);
assert(txnid_set_get(&conflicts, 0) == txn_a);
assert(txnid_set_get(&conflicts, 1) == txn_b);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0); r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(c_w_l.state == LOCK_REQUEST_PENDING); request_still_blocked(lt, &c_w_l, 1, (TXNID[]){ txn_b });
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, &c_w_l, &conflicts);
assert(r == 0);
assert(txnid_set_size(&conflicts) == 1);
assert(txnid_set_get(&conflicts, 0) == txn_b);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0); r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
assert(c_w_l.state == LOCK_REQUEST_COMPLETE && c_w_l.complete_r == 0);
toku_lock_request_destroy(&c_w_l); verify_and_clean_finished_request(lt, &c_w_l);
r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0); r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0);
// shutdown // shutdown
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment