Commit 6711cd60 authored by Yoni Fogel's avatar Yoni Fogel

Addresses #293

Read lock appears to be finished for now.

git-svn-id: file:///svn/tokudb@1854 c7de825b-a66e-492c-adef-691d508d4ae1
parent 6c59b5d4
......@@ -13,22 +13,23 @@ static int __toku_lt_panic(toku_lock_tree *tree, int r) {
return r;
}
DBT __toku_lt_infinity;
DBT __toku_lt_neg_infinity;
const DBT* toku_lt_infinity = &__toku_lt_infinity;
const DBT* toku_lt_neg_infinity = &__toku_lt_neg_infinity;
static int __toku_infinity_compare(void* a, void* b) {
if (a == b) return 0;
else if (a == toku_lt_infinity && b != toku_lt_infinity) return 1;
else if (b == toku_lt_infinity && a != toku_lt_infinity) return -1;
else if (a == toku_lt_neg_infinity && b != toku_lt_neg_infinity) return -1;
else if (b == toku_lt_neg_infinity && a != toku_lt_neg_infinity) return 1;
else return 0;
static const DBT __toku_lt_infinity;
static const DBT __toku_lt_neg_infinity;
const DBT* const toku_lt_infinity = &__toku_lt_infinity;
const DBT* const toku_lt_neg_infinity = &__toku_lt_neg_infinity;
/* Compare two payloads assuming that at least one of them is infinite */
static int __toku_infinite_compare(void* a, void* b) {
if (a == b) return 0;
if (a == toku_lt_infinity) return 1;
if (b == toku_lt_infinity) return -1;
if (a == toku_lt_neg_infinity) return -1;
if (b == toku_lt_neg_infinity) return 1;
assert(FALSE);
}
static BOOL __toku_lt_is_infinity(void* p) {
static BOOL __toku_lt_is_infinite(void* p) {
return (p == toku_lt_infinity) || (p == toku_lt_neg_infinity);
}
......@@ -51,9 +52,16 @@ int __toku_lt_point_cmp(void* a, void* b) {
toku_point* x = (toku_point*)a;
toku_point* y = (toku_point*)b;
partial_result = __toku_infinity_compare(x->key_payload, y->key_payload);
if (partial_result) return partial_result;
if (__toku_lt_is_infinite(x->key_payload) ||
__toku_lt_is_infinite(y->key_payload)) {
/* If either payload is infinite, then:
- if duplicates are allowed, the data must be the same
infinite value.
- if duplicates are not allowed, the data is irrelevant
In either case, we do not have to compare data: the key will
be the sole determinant of the comparison */
return __toku_infinite_compare(x->key_payload, y->key_payload);
}
partial_result = x->lt->db->i->brt->compare_fun(x->lt->db,
__toku_recreate_DBT(&point_1, x->key_payload, x->key_len),
__toku_recreate_DBT(&point_2, y->key_payload, y->key_len));
......@@ -61,9 +69,10 @@ int __toku_lt_point_cmp(void* a, void* b) {
if (!x->lt->duplicates) return 0;
partial_result = __toku_infinity_compare(x->data_payload, y->data_payload);
if (partial_result) return partial_result;
if (__toku_lt_is_infinite(x->data_payload) ||
__toku_lt_is_infinite(y->data_payload)) {
return __toku_infinite_compare(x->data_payload, y->data_payload);
}
return x->lt->db->i->brt->dup_compare(x->lt->db,
__toku_recreate_DBT(&point_1, x->data_payload, x->data_len),
__toku_recreate_DBT(&point_2, y->data_payload, y->data_len));
......@@ -71,6 +80,12 @@ int __toku_lt_point_cmp(void* a, void* b) {
static int __toku_p_free(toku_point* point) {
assert(point);
if (!__toku_lt_is_infinite(point->key_payload)) {
toku_free(point->key_payload);
}
if (!__toku_lt_is_infinite(point->data_payload)) {
toku_free(point->data_payload);
}
toku_free(point);
return 0;
}
......@@ -78,52 +93,55 @@ static int __toku_p_free(toku_point* point) {
/*
Allocate and copy the payload.
*/
static void __toku_payload_copy(void** payload_out, u_int32_t* len_out,
void* payload_in, u_int32_t len_in,
void* free_memory) {
static int __toku_payload_copy(void** payload_out, u_int32_t* len_out,
void* payload_in, u_int32_t len_in) {
assert(payload_out && len_out);
if (__toku_lt_is_infinity(payload_in)) {
if (__toku_lt_is_infinite(payload_in)) {
assert(!len_in);
*payload_out = payload_in;
*len_out = 0;
}
else if (!len_in) {
else if (!len_in || !payload_in) {
*payload_out = NULL;
*len_out = 0;
}
else {
*payload_out = free_memory;
*payload_out = malloc(len_in);
if (!*payload_out) return errno;
*len_out = len_in;
memcpy(payload_out, payload_in, len_in);
}
return 0;
}
//TODO: Do the void*'s need to be aligned at all?
// If alignment is necessary, this code needs updating a bit.
static int __toku_p_makecopy(void** ppoint) {
assert(ppoint);
toku_point* point = *(toku_point**)ppoint;
toku_point* temp_point = NULL;
size_t to_alloc = 0;
to_alloc += sizeof(toku_point);
to_alloc += point->key_len;
to_alloc += point->data_len;
temp_point = (toku_point*)toku_malloc(to_alloc);
int r;
temp_point = (toku_point*)toku_malloc(sizeof(toku_point));
if (0) {
died1:
toku_free(temp_point);
return r;
}
if (!temp_point) return errno;
memcpy(temp_point, point, sizeof(toku_point));
/* Using char* for pointer arithmetic. */
char* next = (char*)temp_point;
next += sizeof(toku_point);
__toku_payload_copy(&temp_point->key_payload, &temp_point->key_len,
point->key_payload, point->key_len,
next);
next += point->key_len;
r = __toku_payload_copy(&temp_point->key_payload, &temp_point->key_len,
point->key_payload, point->key_len);
if (0) {
died2:
if (!__toku_lt_is_infinite(temp_point->key_payload)) {
toku_free(temp_point->key_payload);
}
goto died1;
}
if (r!=0) goto died1;
__toku_payload_copy(&temp_point->data_payload, &temp_point->data_len,
point->data_payload, point->data_len,
next);
point->data_payload, point->data_len);
if (r!=0) goto died2;
*ppoint = temp_point;
return 0;
}
......@@ -258,7 +276,7 @@ static int __toku_lt_borderwrite_conflict(toku_lock_tree* tree, DB_TXN* self,
This function supports only non-overlapping trees.
Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document.
Determines whether 'query' meets 'rt' and if so says AT what data..
Determines whether 'query' meets 'rt'.
*/
static int __toku_lt_meets(toku_lock_tree* tree, DB_TXN* self,
toku_range* query, toku_range_tree* rt, BOOL* met) {
......@@ -316,19 +334,25 @@ static int __toku_lt_check_borderwrite_conflict(toku_lock_tree* tree,
static void __toku_payload_from_dbt(void** payload, u_int32_t* len, DBT* dbt) {
assert(payload && len && dbt);
if (__toku_lt_is_infinity(dbt)) {
if (__toku_lt_is_infinite(dbt)) {
*payload = dbt;
*len = 0;
}
else if (!dbt->data || !dbt->size) {
*len = 0;
*payload = NULL;
}
else {
*len = dbt->size;
*payload = (*len == 0) ? NULL : dbt->data;
*payload = dbt->data;
}
}
static void __toku_init_point(toku_point* point, toku_lock_tree* tree,
DBT* key, DBT* data) {
point->lt = tree;
assert(point && tree && key && data);
memset(point, 0, sizeof(toku_point));
point->lt = tree;
__toku_payload_from_dbt(&point->key_payload, &point->key_len, key);
if (tree->duplicates) {
......@@ -363,6 +387,198 @@ static BOOL __toku_db_is_dupsort(DB* db) {
return (brtflags & TOKU_DB_DUPSORT) != 0;
}
static void __toku_lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert,
BOOL* alloc_left, BOOL* alloc_right,
unsigned numfound) {
assert(to_insert && tree && alloc_left && alloc_right);
unsigned i;
for (i = 0; i < numfound; i++) {
int c;
/* Find the extreme left end-point among overlapping ranges */
if ((c = __toku_lt_point_cmp(tree->buf[i].left, to_insert->left))
<= 0) {
assert(*alloc_left || c < 0);
assert(tree->buf[i].left != to_insert->left);
assert(tree->buf[i].left != to_insert->right);
*alloc_left = FALSE;
to_insert->left = tree->buf[i].left;
}
/* Find the extreme right end-point */
if ((c = __toku_lt_point_cmp(tree->buf[i].right, to_insert->right))
>= 0) {
assert(*alloc_right || c > 0);
assert(tree->buf[i].right != to_insert->left ||
(tree->buf[i].left == to_insert->left &&
tree->buf[i].left == tree->buf[i].right));
assert(tree->buf[i].right != to_insert->right);
*alloc_right = FALSE;
to_insert->right = tree->buf[i].right;
}
}
}
static int __toku_lt_alloc_extreme(toku_range* to_insert,
BOOL alloc_left, BOOL* alloc_right) {
assert(to_insert && alloc_right);
BOOL copy_left = FALSE;
int r;
if (alloc_left && alloc_right &&
__toku_lt_point_cmp(to_insert->left, to_insert->right) == 0) {
*alloc_right = FALSE;
copy_left = TRUE;
}
if (alloc_left) {
r = __toku_p_makecopy(&to_insert->left);
if (0) {
died1:
if (alloc_left) __toku_p_free(to_insert->left);
return r;
}
if (r!=0) return r;
}
if (*alloc_right) {
assert(!copy_left);
r = __toku_p_makecopy(&to_insert->right);
if (r!=0) goto died1;
}
else if (copy_left) to_insert->right = to_insert->left;
return 0;
}
static void __toku_lt_delete_overlapping_ranges(toku_lock_tree* tree,
toku_range_tree* rt,
unsigned numfound) {
assert(tree && rt);
int r;
unsigned i;
for (i = 0; i < numfound; i++) {
r = toku_rt_delete(rt, &tree->buf[i]);
assert(r==0);
}
}
static void __toku_lt_free_points(toku_lock_tree* tree, toku_range* to_insert,
unsigned numfound) {
assert(tree && to_insert);
unsigned i;
for (i = 0; i < numfound; i++) {
/*
We will maintain the invariant: (separately for read and write
environments)
(__toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
*/
/* Do not double-free. */
if (tree->buf[i].right != tree->buf[i].left &&
tree->buf[i].right != to_insert->left &&
tree->buf[i].right != to_insert->right) {
__toku_p_free(tree->buf[i].right);
}
if (tree->buf[i].left != to_insert->left &&
tree->buf[i].left != to_insert->right) {
__toku_p_free(tree->buf[i].left);
}
}
}
/* Consolidate the new range and all the overlapping ranges */
static int __toku_consolidate(toku_lock_tree* tree,
toku_range* query, toku_range* to_insert,
DB_TXN* txn) {
int r;
BOOL alloc_left = TRUE;
BOOL alloc_right = TRUE;
toku_range_tree* selfread;
assert(tree && to_insert && txn);
toku_range_tree* mainread = tree->mainread;
assert(mainread);
/** This is so important that it should go into doxygen at some point,
either here or in the .h file
Memory ownership:
- tree->buf is an array of toku_range's, which the lt owns
The contents of tree->buf are volatile (this is a buffer space
that we pass around to various functions, and every time we
invoke a new function, its previous contents may become
meaningless)
- tree->buf[i].left, .right are toku_points (ultimately a struct),
also owned by lt. We gave a pointer only to this memory to the
range tree earlier when we inserted a range, but the range tree
does not own it!
- tree->buf[i].{left,right}.{key_payload,data_payload} is owned by
the lt, we made copies from the DB at some point
- to_insert we own (it's static)
- to_insert.left, .right are toku_point's, and we own them.
If we have consolidated, we own them because we had allocated
them earlier, but
if we have not consolidated we need to gain ownership now:
we will gain ownership by copying all payloads and
allocating the points.
-to_insert.{left,right}.{key_payload, data_payload} are owned by lt,
we made copies from the DB at consolidation time
*/
/* Find the self read tree */
r = __toku_lt_selfread(tree, txn, &selfread);
if (r!=0) return r;
assert(selfread);
/* Find all overlapping ranges in the self-read */
unsigned numfound;
r = toku_rt_find(selfread, query, 0, &tree->buf, &tree->buflen,
&numfound);
if (r!=0) return r;
/* Find the extreme left and right point of the consolidated interval */
__toku_lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right,
numfound);
/* Allocate the consolidated range */
r = __toku_lt_alloc_extreme(to_insert, alloc_left, &alloc_right);
if (0) {
died1:
if (alloc_left) __toku_p_free(to_insert->left);
if (alloc_right) __toku_p_free(to_insert->right);
return r;
}
if (r!=0) return r;
/* Delete overlapping ranges from selfread ... */
__toku_lt_delete_overlapping_ranges(tree, selfread, numfound);
/* ... and mainread.
Growth direction: if we had no overlaps, the next line
should be commented out */
__toku_lt_delete_overlapping_ranges(tree, mainread, numfound);
/* Free all the points from ranges in tree->buf[0]..tree->buf[numfound-1] */
__toku_lt_free_points(tree, to_insert, numfound);
/* Insert extreme range into selfread. */
r = toku_rt_insert(selfread, to_insert);
int r2;
if (0) {
died2:
r2 = toku_rt_delete(selfread, to_insert);
assert(r2==0);
goto died1;
}
if (r!=0) {
r = __toku_lt_panic(tree, r);
goto died1;
}
assert(tree->mainread);
/* Insert extreme range into mainread. */
r = toku_rt_insert(tree->mainread, to_insert);
if (r!=0) {
r = __toku_lt_panic(tree, r);
goto died2;
}
return 0;
}
int toku_lt_create(toku_lock_tree** ptree, DB* db) {
if (!ptree || !db) return EINVAL;
int r;
......@@ -440,10 +656,9 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn,
if (!tree->duplicates && ( data_left || data_right)) return EINVAL;
if (tree->duplicates && (!data_left || !data_right)) return EINVAL;
if (tree->duplicates && key_left != data_left &&
__toku_lt_is_infinity(key_left)) return EINVAL;
__toku_lt_is_infinite(key_left)) return EINVAL;
if (tree->duplicates && key_right != data_right &&
__toku_lt_is_infinity(key_right)) return EINVAL;
assert(FALSE); //Not implemented yet.
__toku_lt_is_infinite(key_right)) return EINVAL;
int r;
toku_point left;
......@@ -470,7 +685,6 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn,
r = __toku_lt_dominated(tree, &query,
__toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) return r;
/* else if 'K' is dominated by selfread('txn') then return success. */
r = __toku_lt_dominated(tree, &query,
......@@ -485,140 +699,12 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn,
if (r!=0) return r;
/* Now need to merge, copy the memory and insert. */
BOOL alloc_left = TRUE;
BOOL alloc_right = TRUE;
BOOL copy_left = FALSE;
toku_range to_insert;
__toku_init_insert(&to_insert, &left, &right, txn);
toku_range_tree* mainread = tree->mainread;
assert(mainread);
toku_range_tree* selfread = __toku_lt_ifexist_selfread(tree, txn);
if (selfread) {
unsigned numfound;
r = toku_rt_find(selfread, &query, 0, &tree->buf, &tree->buflen,
&numfound);
if (r!=0) return r;
/* Consolidate the new range and all the overlapping ranges */
/** This is so important that it should go into doxygen at some point,
either here or in the .h file
Memory ownership:
- tree->buf is an array of toku_range's, which the lt owns
The contents of tree->buf are volatile (this is a buffer space
that we pass around to various functions, and every time we
invoke a new function, its previous contents may become
meaningless)
- tree->buf[i].left, .right are toku_points (ultimately a struct),
also owned by lt. We gave a pointer only to this memory to the
range tree earlier when we inserted a range, but the range tree
does not own it!
- tree->buf[i].{left,right}.{key_payload,data_payload} is owned by
the lt, we made copies from the DB at some point
- to_insert we own (it's static)
- to_insert.left, .right are toku_point's, and we own them.
If we have consolidated, we own them because we had allocated
them earlier, but
if we have not consolidated we need to gain ownership now:
we will gain ownership by copying all payloads and
allocating the points.
-to_insert.{left,right}.{key_payload, data_payload} are owned by lt,
we made copies from the DB at consolidation time
*/
unsigned i;
//TODO: If we panic, is it alright to have a memory leak here?
// What we should have, is some flags in 'lock_tree'
// to tell it that the buffer is the only way to access
// some things, and so it can delete it later?
for (i = 0; i < numfound; i++) {
/* Delete overlapping ranges from selfread ... */
r = toku_rt_delete(selfread, &(tree->buf[i]));
if (r!=0) return __toku_lt_panic(tree,r);
/* ... and mainread.
Growth direction: if we had no overlaps, the next two lines
should be commented out */
r = toku_rt_delete(mainread, &(tree->buf[i]));
if (r!=0) return __toku_lt_panic(tree,r);
}
for (i = 0; i < numfound; i++) {
/* Find the extreme left end-point among overlapping ranges */
if (__toku_lt_point_cmp(tree->buf[i].left,to_insert.left)
<= 0) {
assert(tree->buf[i].left != to_insert.left);
assert(tree->buf[i].left != to_insert.right);
alloc_left = FALSE;
to_insert.left = tree->buf[i].left;
}
/* Find the extreme right end-point */
if (__toku_lt_point_cmp(tree->buf[i].right,to_insert.right)
>= 0) {
assert(tree->buf[i].right != to_insert.left ||
(tree->buf[i].left == to_insert.left &&
tree->buf[i].left == tree->buf[i].right));
assert(tree->buf[i].right != to_insert.right);
alloc_right = FALSE;
to_insert.right = tree->buf[i].right;
}
}
for (i = 0; i < numfound; i++) {
/*
We will maintain the invariant: (separately for read and write
environments)
(__toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
*/
/* Do not double-free. */
if (tree->buf[i].right != tree->buf[i].left &&
tree->buf[i].right != to_insert.left &&
tree->buf[i].right != to_insert.right) {
__toku_p_free(tree->buf[i].right);
}
if (tree->buf[i].left != to_insert.left &&
tree->buf[i].left != to_insert.right) {
__toku_p_free(tree->buf[i].left);
}
}
}
if (alloc_left && alloc_right && __toku_lt_point_cmp(&left, &right) == 0) {
alloc_right = FALSE;
copy_left = TRUE;
}
if (alloc_left) {
r = __toku_p_makecopy(&to_insert.left);
if (0) {
died1:
if (alloc_left) __toku_p_free(to_insert.left);
return __toku_lt_panic(tree, r);
}
if (r!=0) return __toku_lt_panic(tree,r);
}
if (alloc_right) {
assert(!copy_left);
r = __toku_p_makecopy(&to_insert.right);
if (0) {
died2:
if (alloc_right) __toku_p_free(to_insert.right);
return __toku_lt_panic(tree, r);
}
if (r!=0) goto died1;
}
else if (copy_left) to_insert.right = to_insert.left;
if (!selfread) {
r = __toku_lt_selfread(tree, txn, &selfread);
if (r!=0) return __toku_lt_panic(tree, r);
assert(selfread);
}
r = toku_rt_insert(selfread, &to_insert);
if (r!=0) goto died2;
assert(tree->mainread);
r = toku_rt_insert(tree->mainread, &to_insert);
if (r!=0) goto died2;
return 0;
/* Consolidate the new range and all the overlapping ranges */
return __toku_consolidate(tree, &query, &to_insert, txn);
}
int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn,
......@@ -627,7 +713,7 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn,
if (!tree->duplicates && data) return EINVAL;
if (tree->duplicates && !data) return EINVAL;
if (tree->duplicates && key != data &&
__toku_lt_is_infinity(key)) return EINVAL;
__toku_lt_is_infinite(key)) return EINVAL;
int r;
toku_point left;
toku_point right;
......@@ -778,9 +864,9 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB_TXN* txn,
if (!tree->duplicates && ( data_left || data_right)) return EINVAL;
if (tree->duplicates && (!data_left || !data_right)) return EINVAL;
if (tree->duplicates && key_left != data_left &&
__toku_lt_is_infinity(key_left)) return EINVAL;
__toku_lt_is_infinite(key_left)) return EINVAL;
if (tree->duplicates && key_right != data_right &&
__toku_lt_is_infinity(key_right)) return EINVAL;
__toku_lt_is_infinite(key_right)) return EINVAL;
assert(FALSE);
//We are not ready for this.
//Not needed for Feb 1 release.
......
......@@ -23,8 +23,8 @@ typedef struct {
} toku_lock_tree;
#warning TODO: Handle 'panicked' variable in every api call.
extern const DBT* toku_lt_infinity;
extern const DBT* toku_lt_neg_infinity;
extern const DBT* const toku_lt_infinity;
extern const DBT* const toku_lt_neg_infinity;
const unsigned __toku_default_buflen = 2;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment