Commit a346f6c7 authored by Yoni Fogel's avatar Yoni Fogel

[t:2808] Merge 2808 onto main, remove dev and merge branches

git-svn-id: file:///svn/toku/tokudb@23201 c7de825b-a66e-492c-adef-691d508d4ae1
parent 83b56c3c
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -266,7 +267,9 @@ struct __toku_db_env { ...@@ -266,7 +267,9 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[18]; int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void* __toku_dummy0[16];
char __toku_dummy1[64]; char __toku_dummy1[64];
void *api1_internal; /* 32-bit offset=212 size=4, 64=bit offset=360 size=8 */ void *api1_internal; /* 32-bit offset=212 size=4, 64=bit offset=360 size=8 */
void* __toku_dummy2[7]; void* __toku_dummy2[7];
......
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -268,7 +269,9 @@ struct __toku_db_env { ...@@ -268,7 +269,9 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[18]; int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void* __toku_dummy0[16];
char __toku_dummy1[96]; char __toku_dummy1[96];
void *api1_internal; /* 32-bit offset=244 size=4, 64=bit offset=392 size=8 */ void *api1_internal; /* 32-bit offset=244 size=4, 64=bit offset=392 size=8 */
void* __toku_dummy2[7]; void* __toku_dummy2[7];
......
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -270,7 +271,9 @@ struct __toku_db_env { ...@@ -270,7 +271,9 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[33]; int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void* __toku_dummy0[31];
char __toku_dummy1[128]; char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */ void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[7]; void* __toku_dummy2[7];
......
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -270,7 +271,9 @@ struct __toku_db_env { ...@@ -270,7 +271,9 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[33]; int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void* __toku_dummy0[31];
char __toku_dummy1[128]; char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */ void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[8]; void* __toku_dummy2[8];
......
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -271,7 +272,9 @@ struct __toku_db_env { ...@@ -271,7 +272,9 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[34]; int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void* __toku_dummy0[32];
char __toku_dummy1[144]; char __toku_dummy1[144];
void *api1_internal; /* 32-bit offset=356 size=4, 64=bit offset=568 size=8 */ void *api1_internal; /* 32-bit offset=356 size=4, 64=bit offset=568 size=8 */
void* __toku_dummy2[8]; void* __toku_dummy2[8];
......
...@@ -468,8 +468,9 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__ ...@@ -468,8 +468,9 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ \n"); printf(" int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ \n");
printf(" int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ \n"); printf(" int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ \n");
printf(" u_int32_t range_locks_max; /* max total number of range locks */ \n"); printf(" u_int32_t range_locks_max; /* max total number of range locks */ \n");
printf(" u_int32_t range_locks_max_per_index; /* max range locks per dictionary */ \n");
printf(" u_int32_t range_locks_curr; /* total range locks currently in use */ \n"); printf(" u_int32_t range_locks_curr; /* total range locks currently in use */ \n");
printf(" u_int64_t range_locks_max_memory; /* max total bytes of range locks */ \n");
printf(" u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */ \n");
printf(" u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ \n"); printf(" u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ \n");
printf(" u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ \n"); printf(" u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ \n");
printf(" u_int64_t range_read_locks; /* total range read locks taken */ \n"); printf(" u_int64_t range_read_locks; /* total range read locks taken */ \n");
...@@ -553,6 +554,8 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__ ...@@ -553,6 +554,8 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
" void *extra))", " void *extra))",
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */", "int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */", "int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
"int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max)",
NULL}; NULL};
print_struct("db_env", 1, db_env_fields32, db_env_fields64, sizeof(db_env_fields32)/sizeof(db_env_fields32[0]), extra); print_struct("db_env", 1, db_env_fields32, db_env_fields64, sizeof(db_env_fields32)/sizeof(db_env_fields32[0]), extra);
} }
......
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -271,6 +272,8 @@ struct __toku_db_env { ...@@ -271,6 +272,8 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void *api1_internal; void *api1_internal;
int (*close) (DB_ENV *, u_int32_t); int (*close) (DB_ENV *, u_int32_t);
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t); int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t);
......
...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status { ...@@ -105,8 +105,9 @@ typedef struct __toku_engine_status {
int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */ int64_t local_checkpoint_files; /* number of files subjec to local checkpoint is taken for commit */
int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */ int64_t local_checkpoint_during_checkpoint; /* number of times a local checkpoint happens during normal checkpoint */
u_int32_t range_locks_max; /* max total number of range locks */ u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */ u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ u_int32_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */ u_int32_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */ u_int64_t range_read_locks; /* total range read locks taken */
...@@ -271,6 +272,8 @@ struct __toku_db_env { ...@@ -271,6 +272,8 @@ struct __toku_db_env {
void *extra)); void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */; int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */; int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void *api1_internal; void *api1_internal;
int (*close) (DB_ENV *, u_int32_t); int (*close) (DB_ENV *, u_int32_t);
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t); int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t);
......
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
/* TODO: investigate whether we can remove the user_memory functions */ /* TODO: investigate whether we can remove the user_memory functions */
/* TODO: reallocate the hash idlth if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */ /* TODO: reallocate the hash idlth if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */
const u_int32_t __toku_idlth_init_size = 521; const uint32_t __toku_idlth_init_size = 521;
static inline u_int32_t toku__idlth_hash(toku_idlth* idlth, DICTIONARY_ID dict_id) { static inline uint32_t toku__idlth_hash(toku_idlth* idlth, DICTIONARY_ID dict_id) {
uint32_t tmp = dict_id.dictid; uint32_t tmp = dict_id.dictid;
return tmp % idlth->num_buckets; return tmp % idlth->num_buckets;
} }
...@@ -66,7 +66,7 @@ int toku_idlth_create(toku_idlth** pidlth, ...@@ -66,7 +66,7 @@ int toku_idlth_create(toku_idlth** pidlth,
toku_lt_map* toku_idlth_find(toku_idlth* idlth, DICTIONARY_ID dict_id) { toku_lt_map* toku_idlth_find(toku_idlth* idlth, DICTIONARY_ID dict_id) {
assert(idlth); assert(idlth);
u_int32_t index = toku__idlth_hash(idlth, dict_id); uint32_t index = toku__idlth_hash(idlth, dict_id);
toku_idlth_elt* head = &idlth->buckets[index]; toku_idlth_elt* head = &idlth->buckets[index];
toku_idlth_elt* current = head->next_in_bucket; toku_idlth_elt* current = head->next_in_bucket;
while (current) { while (current) {
...@@ -105,7 +105,7 @@ void toku_idlth_delete(toku_idlth* idlth, DICTIONARY_ID dict_id) { ...@@ -105,7 +105,7 @@ void toku_idlth_delete(toku_idlth* idlth, DICTIONARY_ID dict_id) {
/* Must have elements. */ /* Must have elements. */
assert(idlth->num_keys); assert(idlth->num_keys);
u_int32_t index = toku__idlth_hash(idlth, dict_id); uint32_t index = toku__idlth_hash(idlth, dict_id);
toku_idlth_elt* head = &idlth->buckets[index]; toku_idlth_elt* head = &idlth->buckets[index];
toku_idlth_elt* prev = head; toku_idlth_elt* prev = head;
toku_idlth_elt* current = prev->next_in_bucket; toku_idlth_elt* current = prev->next_in_bucket;
...@@ -131,7 +131,7 @@ int toku_idlth_insert(toku_idlth* idlth, DICTIONARY_ID dict_id) { ...@@ -131,7 +131,7 @@ int toku_idlth_insert(toku_idlth* idlth, DICTIONARY_ID dict_id) {
assert(idlth); assert(idlth);
toku__invalidate_scan(idlth); toku__invalidate_scan(idlth);
u_int32_t index = toku__idlth_hash(idlth, dict_id); uint32_t index = toku__idlth_hash(idlth, dict_id);
/* Allocate a new one. */ /* Allocate a new one. */
toku_idlth_elt* element = (toku_idlth_elt*)idlth->malloc(sizeof(*element)); toku_idlth_elt* element = (toku_idlth_elt*)idlth->malloc(sizeof(*element));
......
...@@ -42,8 +42,8 @@ struct __toku_idlth_elt { ...@@ -42,8 +42,8 @@ struct __toku_idlth_elt {
typedef struct __toku_idlth toku_idlth; typedef struct __toku_idlth toku_idlth;
struct __toku_idlth { struct __toku_idlth {
toku_idlth_elt* buckets; toku_idlth_elt* buckets;
u_int32_t num_buckets; uint32_t num_buckets;
u_int32_t num_keys; uint32_t num_keys;
toku_idlth_elt iter_head; toku_idlth_elt iter_head;
toku_idlth_elt* iter_curr; toku_idlth_elt* iter_curr;
BOOL iter_is_valid; BOOL iter_is_valid;
......
...@@ -29,11 +29,11 @@ ...@@ -29,11 +29,11 @@
(return EINVAL if necessary) before making lock tree calls. */ (return EINVAL if necessary) before making lock tree calls. */
static inline int toku__lt_panic(toku_lock_tree *tree, int r) { static inline int lt_panic(toku_lock_tree *tree, int r) {
return tree->panic(tree->db, r); return tree->panic(tree->db, r);
} }
const u_int32_t __toku_default_buflen = 2; const uint32_t __toku_default_buflen = 2;
static const DBT __toku_lt_infinity; static const DBT __toku_lt_infinity;
static const DBT __toku_lt_neg_infinity; static const DBT __toku_lt_neg_infinity;
...@@ -49,18 +49,18 @@ char* toku_lt_strerror(TOKU_LT_ERROR r) { ...@@ -49,18 +49,18 @@ char* toku_lt_strerror(TOKU_LT_ERROR r) {
return "Unknown error in locking data structures.\n"; return "Unknown error in locking data structures.\n";
} }
/* Compare two payloads assuming that at least one of them is infinite */ /* Compare two payloads assuming that at least one of them is infinite */
static inline int toku__infinite_compare(const DBT* a, const DBT* b) { static inline int infinite_compare(const DBT* a, const DBT* b) {
if (a == b) return 0; if (a == b) return 0;
if (a == toku_lt_infinity) return 1; if (a == toku_lt_infinity) return 1;
if (b == toku_lt_infinity) return -1; if (b == toku_lt_infinity) return -1;
if (a == toku_lt_neg_infinity) return -1; if (a == toku_lt_neg_infinity) return -1;
assert(b == toku_lt_neg_infinity); return 1; invariant(b == toku_lt_neg_infinity); return 1;
} }
static inline BOOL toku__lt_is_infinite(const DBT* p) { static inline BOOL lt_is_infinite(const DBT* p) {
if (p == toku_lt_infinity || p == toku_lt_neg_infinity) { if (p == toku_lt_infinity || p == toku_lt_neg_infinity) {
DBT* dbt = (DBT*)p; DBT* dbt = (DBT*)p;
assert(!dbt->data && !dbt->size); invariant(!dbt->data && !dbt->size);
return TRUE; return TRUE;
} }
return FALSE; return FALSE;
...@@ -68,19 +68,19 @@ static inline BOOL toku__lt_is_infinite(const DBT* p) { ...@@ -68,19 +68,19 @@ static inline BOOL toku__lt_is_infinite(const DBT* p) {
/* Verifies that NULL data and size are consistent. /* Verifies that NULL data and size are consistent.
i.e. The size is 0 if and only if the data is NULL. */ i.e. The size is 0 if and only if the data is NULL. */
static inline int toku__lt_verify_null_key(const DBT* key) { static inline int lt_verify_null_key(const DBT* key) {
if (key && key->size && !key->data) return EINVAL; if (key && key->size && !key->data) return EINVAL;
return 0; return 0;
} }
static inline DBT* toku__recreate_DBT(DBT* dbt, void* payload, u_int32_t length) { static inline DBT* recreate_DBT(DBT* dbt, void* payload, uint32_t length) {
memset(dbt, 0, sizeof(DBT)); memset(dbt, 0, sizeof(DBT));
dbt->data = payload; dbt->data = payload;
dbt->size = length; dbt->size = length;
return dbt; return dbt;
} }
static inline int toku__lt_txn_cmp(const TXNID a, const TXNID b) { static inline int lt_txn_cmp(const TXNID a, const TXNID b) {
return a < b ? -1 : (a != b); return a < b ? -1 : (a != b);
} }
...@@ -94,7 +94,8 @@ static inline int toku_ltm_add_lt(toku_ltm* mgr, toku_lock_tree* lt) { ...@@ -94,7 +94,8 @@ static inline int toku_ltm_add_lt(toku_ltm* mgr, toku_lock_tree* lt) {
return toku_lth_insert(mgr->lth, lt); return toku_lth_insert(mgr->lth, lt);
} }
int toku__lt_point_cmp(const toku_point* x, const toku_point* y) { int
toku_lt_point_cmp(const toku_point* x, const toku_point* y) {
DBT point_1; DBT point_1;
DBT point_2; DBT point_2;
...@@ -102,24 +103,25 @@ int toku__lt_point_cmp(const toku_point* x, const toku_point* y) { ...@@ -102,24 +103,25 @@ int toku__lt_point_cmp(const toku_point* x, const toku_point* y) {
assert(x->lt); assert(x->lt);
assert(x->lt == y->lt); assert(x->lt == y->lt);
if (toku__lt_is_infinite(x->key_payload) || if (lt_is_infinite(x->key_payload) ||
toku__lt_is_infinite(y->key_payload)) { lt_is_infinite(y->key_payload)) {
/* If either payload is infinite, then: /* If either payload is infinite, then:
- if duplicates are allowed, the data must be the same - if duplicates are allowed, the data must be the same
infinite value. infinite value.
- if duplicates are not allowed, the data is irrelevant - if duplicates are not allowed, the data is irrelevant
In either case, we do not have to compare data: the key will In either case, we do not have to compare data: the key will
be the sole determinant of the comparison */ be the sole determinant of the comparison */
return toku__infinite_compare(x->key_payload, y->key_payload); return infinite_compare(x->key_payload, y->key_payload);
} }
return x->lt->compare_fun(x->lt->db, return x->lt->compare_fun(x->lt->db,
toku__recreate_DBT(&point_1, x->key_payload, x->key_len), recreate_DBT(&point_1, x->key_payload, x->key_len),
toku__recreate_DBT(&point_2, y->key_payload, y->key_len)); recreate_DBT(&point_2, y->key_payload, y->key_len));
} }
/* Lock tree manager functions begin here */ /* Lock tree manager functions begin here */
int toku_ltm_create(toku_ltm** pmgr, int toku_ltm_create(toku_ltm** pmgr,
u_int32_t max_locks, uint32_t max_locks,
uint64_t max_lock_memory,
int (*panic)(DB*, int), int (*panic)(DB*, int),
toku_dbt_cmp (*get_compare_fun_from_db)(DB*), toku_dbt_cmp (*get_compare_fun_from_db)(DB*),
void* (*user_malloc) (size_t), void* (*user_malloc) (size_t),
...@@ -136,13 +138,10 @@ int toku_ltm_create(toku_ltm** pmgr, ...@@ -136,13 +138,10 @@ int toku_ltm_create(toku_ltm** pmgr,
tmp_mgr = (toku_ltm*)user_malloc(sizeof(*tmp_mgr)); tmp_mgr = (toku_ltm*)user_malloc(sizeof(*tmp_mgr));
if (!tmp_mgr) { r = ENOMEM; goto cleanup; } if (!tmp_mgr) { r = ENOMEM; goto cleanup; }
memset(tmp_mgr, 0, sizeof(toku_ltm)); memset(tmp_mgr, 0, sizeof(toku_ltm));
/*
Temporarily set the maximum number of locks per environment to 'infinity'. r = toku_ltm_set_max_locks(tmp_mgr, max_locks);
See ticket #596 for when this will be reversed. if (r!=0) { goto cleanup; }
We will then use 'max_locks' instead of 'UINT32_MAX'. r = toku_ltm_set_max_lock_memory(tmp_mgr, max_lock_memory);
All of the 'per_db' functions and variables will then be deleted.
*/
r = toku_ltm_set_max_locks(tmp_mgr, UINT32_MAX);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
tmp_mgr->panic = panic; tmp_mgr->panic = panic;
tmp_mgr->malloc = user_malloc; tmp_mgr->malloc = user_malloc;
...@@ -158,9 +157,6 @@ int toku_ltm_create(toku_ltm** pmgr, ...@@ -158,9 +157,6 @@ int toku_ltm_create(toku_ltm** pmgr,
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
if (!tmp_mgr->idlth) { r = ENOMEM; goto cleanup; } if (!tmp_mgr->idlth) { r = ENOMEM; goto cleanup; }
r = toku_ltm_set_max_locks_per_db(tmp_mgr, max_locks);
if (r!=0) { goto cleanup; }
r = 0; r = 0;
*pmgr = tmp_mgr; *pmgr = tmp_mgr;
cleanup: cleanup:
...@@ -197,15 +193,18 @@ int toku_ltm_close(toku_ltm* mgr) { ...@@ -197,15 +193,18 @@ int toku_ltm_close(toku_ltm* mgr) {
void void
toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks, uint32_t * max_locks_per_db, LTM_STATUS s) { toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
LTM_STATUS s) {
*max_locks = mgr->max_locks; *max_locks = mgr->max_locks;
*curr_locks = mgr->curr_locks; *curr_locks = mgr->curr_locks;
*max_locks_per_db = mgr->max_locks_per_db; *max_lock_memory = mgr->max_lock_memory;
*curr_lock_memory = mgr->curr_lock_memory;
*s = mgr->status; *s = mgr->status;
} }
int toku_ltm_get_max_locks(toku_ltm* mgr, u_int32_t* max_locks) { int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* max_locks) {
int r = ENOSYS; int r = ENOSYS;
if (!mgr || !max_locks) { r = EINVAL; goto cleanup; } if (!mgr || !max_locks) { r = EINVAL; goto cleanup; }
...@@ -215,17 +214,7 @@ int toku_ltm_get_max_locks(toku_ltm* mgr, u_int32_t* max_locks) { ...@@ -215,17 +214,7 @@ int toku_ltm_get_max_locks(toku_ltm* mgr, u_int32_t* max_locks) {
return r; return r;
} }
int toku_ltm_get_max_locks_per_db(toku_ltm* mgr, u_int32_t* max_locks) { int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t max_locks) {
int r = ENOSYS;
if (!mgr || !max_locks) { r = EINVAL; goto cleanup; }
*max_locks = mgr->max_locks_per_db;
r = 0;
cleanup:
return r;
}
int toku_ltm_set_max_locks(toku_ltm* mgr, u_int32_t max_locks) {
int r = ENOSYS; int r = ENOSYS;
if (!mgr || !max_locks) { if (!mgr || !max_locks) {
r = EINVAL; goto cleanup; r = EINVAL; goto cleanup;
...@@ -240,22 +229,26 @@ int toku_ltm_set_max_locks(toku_ltm* mgr, u_int32_t max_locks) { ...@@ -240,22 +229,26 @@ int toku_ltm_set_max_locks(toku_ltm* mgr, u_int32_t max_locks) {
return r; return r;
} }
int toku_ltm_set_max_locks_per_db(toku_ltm* mgr, u_int32_t max_locks) { int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* max_lock_memory) {
int r = ENOSYS; int r = ENOSYS;
if (!mgr || !max_locks) {
if (!mgr || !max_lock_memory) { r = EINVAL; goto cleanup; }
*max_lock_memory = mgr->max_lock_memory;
r = 0;
cleanup:
return r;
}
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t max_lock_memory) {
int r = ENOSYS;
if (!mgr || !max_lock_memory) {
r = EINVAL; goto cleanup; r = EINVAL; goto cleanup;
} }
toku_lth_start_scan(mgr->lth); if (max_lock_memory < mgr->curr_locks) {
toku_lock_tree* lt; r = EDOM; goto cleanup;
while ((lt = toku_lth_next(mgr->lth)) != NULL) {
if (max_locks < lt->curr_locks) {
r = EDOM; goto cleanup; }
}
toku_lth_start_scan(mgr->lth);
while ((lt = toku_lth_next(mgr->lth)) != NULL) {
lt->max_locks = max_locks;
} }
mgr->max_locks_per_db = max_locks;
mgr->max_lock_memory = max_lock_memory;
r = 0; r = 0;
cleanup: cleanup:
return r; return r;
...@@ -263,102 +256,113 @@ int toku_ltm_set_max_locks_per_db(toku_ltm* mgr, u_int32_t max_locks) { ...@@ -263,102 +256,113 @@ int toku_ltm_set_max_locks_per_db(toku_ltm* mgr, u_int32_t max_locks) {
/* Functions to update the range count and compare it with the /* Functions to update the range count and compare it with the
maximum number of ranges */ maximum number of ranges */
//See ticket #596 static inline BOOL ltm_lock_test_incr(toku_ltm* tree_mgr,
static inline BOOL toku__ltm_lock_test_incr(toku_ltm* tree_mgr, uint32_t replace_locks) {
u_int32_t replace_locks) {
assert(tree_mgr); assert(tree_mgr);
assert(replace_locks <= tree_mgr->curr_locks); assert(replace_locks <= tree_mgr->curr_locks);
return (BOOL)(tree_mgr->curr_locks - replace_locks < tree_mgr->max_locks); return (BOOL)(tree_mgr->curr_locks - replace_locks < tree_mgr->max_locks);
} }
static inline void toku__ltm_lock_incr(toku_ltm* tree_mgr, u_int32_t replace_locks) { static inline void ltm_lock_incr(toku_ltm* tree_mgr, uint32_t replace_locks) {
assert(toku__ltm_lock_test_incr(tree_mgr, replace_locks)); assert(ltm_lock_test_incr(tree_mgr, replace_locks));
tree_mgr->curr_locks -= replace_locks; tree_mgr->curr_locks -= replace_locks;
tree_mgr->curr_locks += 1; tree_mgr->curr_locks += 1;
} }
static inline void toku__ltm_lock_decr(toku_ltm* tree_mgr, u_int32_t locks) { static inline void ltm_lock_decr(toku_ltm* tree_mgr, uint32_t locks) {
assert(tree_mgr); assert(tree_mgr);
assert(tree_mgr->curr_locks >= locks); assert(tree_mgr->curr_locks >= locks);
tree_mgr->curr_locks -= locks; tree_mgr->curr_locks -= locks;
} }
/* The following 3 are temporary functions. See #596 */ static inline void
static inline BOOL toku__lt_lock_test_incr_per_db(toku_lock_tree* tree, ltm_note_free_memory(toku_ltm *mgr, size_t mem) {
u_int32_t replace_locks) { assert(mgr->curr_lock_memory >= mem);
assert(tree); mgr->curr_lock_memory -= mem;
assert(replace_locks <= tree->curr_locks);
return (BOOL)((tree->curr_locks - replace_locks < tree->max_locks) &&
toku__ltm_lock_test_incr(tree->mgr, replace_locks));
} }
static inline void toku__lt_lock_incr_per_db(toku_lock_tree* tree, u_int32_t replace_locks) { static inline int
assert(toku__lt_lock_test_incr_per_db(tree, replace_locks)); ltm_note_allocate_memory(toku_ltm *mgr, size_t mem) {
tree->curr_locks -= replace_locks; int r = TOKUDB_OUT_OF_LOCKS;
tree->curr_locks += 1; if (mgr->curr_lock_memory + mem <= mgr->max_lock_memory) {
toku__ltm_lock_incr(tree->mgr, replace_locks); mgr->curr_lock_memory += mem;
} r = 0;
}
static inline void toku__lt_lock_decr_per_db(toku_lock_tree* tree, u_int32_t locks) { return r;
assert(tree);
assert(tree->curr_locks >= locks);
tree->curr_locks -= locks;
toku__ltm_lock_decr(tree->mgr, locks);
} }
static inline void toku__p_free(toku_lock_tree* tree, toku_point* point) { static inline void p_free(toku_lock_tree* tree, toku_point* point) {
assert(point); assert(point);
if (!toku__lt_is_infinite(point->key_payload)) { size_t freeing = sizeof(*point);
if (!lt_is_infinite(point->key_payload)) {
freeing += point->key_len;
tree->free(point->key_payload); tree->free(point->key_payload);
} }
tree->free(point); tree->free(point);
ltm_note_free_memory(tree->mgr, freeing);
} }
/* /*
Allocate and copy the payload. Allocate and copy the payload.
*/ */
static inline int toku__payload_copy(toku_lock_tree* tree, static inline int
void** payload_out, u_int32_t* len_out, payload_copy(toku_lock_tree* tree,
void* payload_in, u_int32_t len_in) { void** payload_out, uint32_t* len_out,
void* payload_in, uint32_t len_in) {
int r = 0;
assert(payload_out && len_out); assert(payload_out && len_out);
if (!len_in) { if (!len_in) {
assert(!payload_in || toku__lt_is_infinite(payload_in)); assert(!payload_in || lt_is_infinite(payload_in));
*payload_out = payload_in; *payload_out = payload_in;
*len_out = len_in; *len_out = len_in;
} }
else { else {
r = ltm_note_allocate_memory(tree->mgr, len_in);
if (r==0) {
assert(payload_in); assert(payload_in);
*payload_out = tree->malloc((size_t)len_in); *payload_out = tree->malloc((size_t)len_in); //2808
if (!*payload_out) return errno; resource_assert(*payload_out);
*len_out = len_in; *len_out = len_in;
memcpy(*payload_out, payload_in, (size_t)len_in); memcpy(*payload_out, payload_in, (size_t)len_in);
} }
return 0; }
return r;
} }
static inline int toku__p_makecopy(toku_lock_tree* tree, toku_point** ppoint) { static inline int
p_makecopy(toku_lock_tree* tree, toku_point** ppoint) {
assert(ppoint); assert(ppoint);
toku_point* point = *ppoint; toku_point* point = *ppoint;
toku_point* temp_point = NULL; toku_point* temp_point = NULL;
int r; int r;
temp_point = (toku_point*)tree->malloc(sizeof(toku_point)); r = ltm_note_allocate_memory(tree->mgr, sizeof(toku_point));
if (r!=0) goto done;
temp_point = (toku_point*)tree->malloc(sizeof(toku_point)); //2808
resource_assert(temp_point);
if (0) { if (0) {
died1: tree->free(temp_point); return r; } died1:
if (!temp_point) return errno; tree->free(temp_point);
memcpy(temp_point, point, sizeof(toku_point)); ltm_note_free_memory(tree->mgr, sizeof(toku_point));
goto done;
}
*temp_point = *point;
r = toku__payload_copy(tree, r = payload_copy(tree,
&temp_point->key_payload, &temp_point->key_len, &temp_point->key_payload, &temp_point->key_len,
point->key_payload, point->key_len); point->key_payload, point->key_len);
if (r!=0) goto died1; if (r!=0) goto died1;
*ppoint = temp_point; *ppoint = temp_point;
return 0; done:
return r;
} }
/* Provides access to a selfread tree for a particular transaction. /* Provides access to a selfread tree for a particular transaction.
Returns NULL if it does not exist yet. */ Returns NULL if it does not exist yet. */
toku_range_tree* toku__lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) { toku_range_tree*
toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
assert(tree); assert(tree);
rt_forest* forest = toku_rth_find(tree->rth, txn); rt_forest* forest = toku_rth_find(tree->rth, txn);
return forest ? forest->self_read : NULL; return forest ? forest->self_read : NULL;
...@@ -366,13 +370,14 @@ toku_range_tree* toku__lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) { ...@@ -366,13 +370,14 @@ toku_range_tree* toku__lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn) {
/* Provides access to a selfwrite tree for a particular transaction. /* Provides access to a selfwrite tree for a particular transaction.
Returns NULL if it does not exist yet. */ Returns NULL if it does not exist yet. */
toku_range_tree* toku__lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) { toku_range_tree*
toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) {
assert(tree); assert(tree);
rt_forest* forest = toku_rth_find(tree->rth, txn); rt_forest* forest = toku_rth_find(tree->rth, txn);
return forest ? forest->self_write : NULL; return forest ? forest->self_write : NULL;
} }
static inline int toku__lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) { static inline int lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS; int r = ENOSYS;
BOOL half_done = FALSE; BOOL half_done = FALSE;
...@@ -389,7 +394,7 @@ static inline int toku__lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) { ...@@ -389,7 +394,7 @@ static inline int toku__lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) {
/* Provides access to a selfread tree for a particular transaction. /* Provides access to a selfread tree for a particular transaction.
Creates it if it does not exist. */ Creates it if it does not exist. */
static inline int toku__lt_selfread(toku_lock_tree* tree, TXNID txn, static inline int lt_selfread(toku_lock_tree* tree, TXNID txn,
toku_range_tree** pselfread) { toku_range_tree** pselfread) {
int r = ENOSYS; int r = ENOSYS;
assert(tree && pselfread); assert(tree && pselfread);
...@@ -397,14 +402,14 @@ static inline int toku__lt_selfread(toku_lock_tree* tree, TXNID txn, ...@@ -397,14 +402,14 @@ static inline int toku__lt_selfread(toku_lock_tree* tree, TXNID txn,
rt_forest* forest = toku_rth_find(tree->rth, txn); rt_forest* forest = toku_rth_find(tree->rth, txn);
if (!forest) { if (!forest) {
/* Neither selfread nor selfwrite exist. */ /* Neither selfread nor selfwrite exist. */
r = toku__lt_add_locked_txn(tree, txn); r = lt_add_locked_txn(tree, txn);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
forest = toku_rth_find(tree->rth, txn); forest = toku_rth_find(tree->rth, txn);
} }
assert(forest); assert(forest);
if (!forest->self_read) { if (!forest->self_read) {
r = toku_rt_create(&forest->self_read, r = toku_rt_create(&forest->self_read,
toku__lt_point_cmp, toku__lt_txn_cmp, toku_lt_point_cmp, lt_txn_cmp,
FALSE, FALSE,
tree->malloc, tree->free, tree->realloc); tree->malloc, tree->free, tree->realloc);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
...@@ -418,7 +423,7 @@ static inline int toku__lt_selfread(toku_lock_tree* tree, TXNID txn, ...@@ -418,7 +423,7 @@ static inline int toku__lt_selfread(toku_lock_tree* tree, TXNID txn,
/* Provides access to a selfwrite tree for a particular transaction. /* Provides access to a selfwrite tree for a particular transaction.
Creates it if it does not exist. */ Creates it if it does not exist. */
static inline int toku__lt_selfwrite(toku_lock_tree* tree, TXNID txn, static inline int lt_selfwrite(toku_lock_tree* tree, TXNID txn,
toku_range_tree** pselfwrite) { toku_range_tree** pselfwrite) {
int r = ENOSYS; int r = ENOSYS;
assert(tree && pselfwrite); assert(tree && pselfwrite);
...@@ -426,14 +431,14 @@ static inline int toku__lt_selfwrite(toku_lock_tree* tree, TXNID txn, ...@@ -426,14 +431,14 @@ static inline int toku__lt_selfwrite(toku_lock_tree* tree, TXNID txn,
rt_forest* forest = toku_rth_find(tree->rth, txn); rt_forest* forest = toku_rth_find(tree->rth, txn);
if (!forest) { if (!forest) {
/* Neither selfread nor selfwrite exist. */ /* Neither selfread nor selfwrite exist. */
r = toku__lt_add_locked_txn(tree, txn); r = lt_add_locked_txn(tree, txn);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
forest = toku_rth_find(tree->rth, txn); forest = toku_rth_find(tree->rth, txn);
} }
assert(forest); assert(forest);
if (!forest->self_write) { if (!forest->self_write) {
r = toku_rt_create(&forest->self_write, r = toku_rt_create(&forest->self_write,
toku__lt_point_cmp, toku__lt_txn_cmp, toku_lt_point_cmp, lt_txn_cmp,
FALSE, FALSE,
tree->malloc, tree->free, tree->realloc); tree->malloc, tree->free, tree->realloc);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
...@@ -445,10 +450,10 @@ static inline int toku__lt_selfwrite(toku_lock_tree* tree, TXNID txn, ...@@ -445,10 +450,10 @@ static inline int toku__lt_selfwrite(toku_lock_tree* tree, TXNID txn,
return r; return r;
} }
static inline BOOL toku__dominated(toku_interval* query, toku_interval* by) { static inline BOOL interval_dominated(toku_interval* query, toku_interval* by) {
assert(query && by); assert(query && by);
return (BOOL)(toku__lt_point_cmp(query->left, by->left) >= 0 && return (BOOL)(toku_lt_point_cmp(query->left, by->left) >= 0 &&
toku__lt_point_cmp(query->right, by->right) <= 0); toku_lt_point_cmp(query->right, by->right) <= 0);
} }
/* /*
...@@ -456,7 +461,7 @@ static inline BOOL toku__dominated(toku_interval* query, toku_interval* by) { ...@@ -456,7 +461,7 @@ static inline BOOL toku__dominated(toku_interval* query, toku_interval* by) {
Uses the standard definition of dominated from the design document. Uses the standard definition of dominated from the design document.
Determines whether 'query' is dominated by 'rt'. Determines whether 'query' is dominated by 'rt'.
*/ */
static inline int toku__lt_rt_dominates(toku_lock_tree* tree, toku_interval* query, static inline int lt_rt_dominates(toku_lock_tree* tree, toku_interval* query,
toku_range_tree* rt, BOOL* dominated) { toku_range_tree* rt, BOOL* dominated) {
assert(tree && query && dominated); assert(tree && query && dominated);
if (!rt) { if (!rt) {
...@@ -465,11 +470,11 @@ static inline int toku__lt_rt_dominates(toku_lock_tree* tree, toku_interval* que ...@@ -465,11 +470,11 @@ static inline int toku__lt_rt_dominates(toku_lock_tree* tree, toku_interval* que
} }
BOOL allow_overlaps; BOOL allow_overlaps;
const u_int32_t query_size = 1; const uint32_t query_size = 1;
toku_range buffer[query_size]; toku_range buffer[query_size];
u_int32_t buflen = query_size; uint32_t buflen = query_size;
toku_range* buf = &buffer[0]; toku_range* buf = &buffer[0];
u_int32_t numfound; uint32_t numfound;
int r; int r;
/* Sanity check. (Function only supports non-overlap range trees.) */ /* Sanity check. (Function only supports non-overlap range trees.) */
...@@ -484,7 +489,7 @@ static inline int toku__lt_rt_dominates(toku_lock_tree* tree, toku_interval* que ...@@ -484,7 +489,7 @@ static inline int toku__lt_rt_dominates(toku_lock_tree* tree, toku_interval* que
return 0; return 0;
} }
assert(numfound == 1); assert(numfound == 1);
*dominated = toku__dominated(query, &buf[0].ends); *dominated = interval_dominated(query, &buf[0].ends);
return 0; return 0;
} }
...@@ -500,25 +505,25 @@ typedef enum ...@@ -500,25 +505,25 @@ typedef enum
If exactly one range overlaps and its data != self, there might be a If exactly one range overlaps and its data != self, there might be a
conflict. We need to check the 'peer'write table to verify. conflict. We need to check the 'peer'write table to verify.
*/ */
static inline int toku__lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self, static inline int lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
toku_interval* query, toku_interval* query,
toku_conflict* conflict, TXNID* peer) { toku_conflict* conflict, TXNID* peer) {
assert(tree && query && conflict && peer); assert(tree && query && conflict && peer);
toku_range_tree* rt = tree->borderwrite; toku_range_tree* rt = tree->borderwrite;
assert(rt); assert(rt);
const u_int32_t query_size = 2; const uint32_t query_size = 2;
toku_range buffer[query_size]; toku_range buffer[query_size];
u_int32_t buflen = query_size; uint32_t buflen = query_size;
toku_range* buf = &buffer[0]; toku_range* buf = &buffer[0];
u_int32_t numfound; uint32_t numfound;
int r; int r;
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound); r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r; if (r!=0) return r;
assert(numfound <= query_size); assert(numfound <= query_size);
if (numfound == 2) *conflict = TOKU_YES_CONFLICT; if (numfound == 2) *conflict = TOKU_YES_CONFLICT;
else if (numfound == 0 || !toku__lt_txn_cmp(buf[0].data, self)) *conflict = TOKU_NO_CONFLICT; else if (numfound == 0 || !lt_txn_cmp(buf[0].data, self)) *conflict = TOKU_NO_CONFLICT;
else { else {
*conflict = TOKU_MAYBE_CONFLICT; *conflict = TOKU_MAYBE_CONFLICT;
*peer = buf[0].data; *peer = buf[0].data;
...@@ -533,14 +538,14 @@ static inline int toku__lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self ...@@ -533,14 +538,14 @@ static inline int toku__lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self
Uses the standard definition of 'query' meets 'tree' at 'data' from the Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document. design document.
*/ */
static inline int toku__lt_meets(toku_lock_tree* tree, toku_interval* query, static inline int lt_meets(toku_lock_tree* tree, toku_interval* query,
toku_range_tree* rt, BOOL* met) { toku_range_tree* rt, BOOL* met) {
assert(tree && query && rt && met); assert(tree && query && rt && met);
const u_int32_t query_size = 1; const uint32_t query_size = 1;
toku_range buffer[query_size]; toku_range buffer[query_size];
u_int32_t buflen = query_size; uint32_t buflen = query_size;
toku_range* buf = &buffer[0]; toku_range* buf = &buffer[0];
u_int32_t numfound; uint32_t numfound;
int r; int r;
BOOL allow_overlaps; BOOL allow_overlaps;
...@@ -563,23 +568,23 @@ static inline int toku__lt_meets(toku_lock_tree* tree, toku_interval* query, ...@@ -563,23 +568,23 @@ static inline int toku__lt_meets(toku_lock_tree* tree, toku_interval* query,
Uses the standard definition of 'query' meets 'tree' at 'data' from the Uses the standard definition of 'query' meets 'tree' at 'data' from the
design document. design document.
*/ */
static inline int toku__lt_meets_peer(toku_lock_tree* tree, toku_interval* query, static inline int lt_meets_peer(toku_lock_tree* tree, toku_interval* query,
toku_range_tree* rt, BOOL is_homogenous, toku_range_tree* rt, BOOL is_homogenous,
TXNID self, BOOL* met) { TXNID self, BOOL* met) {
assert(tree && query && rt && met); assert(tree && query && rt && met);
assert(query->left == query->right || is_homogenous); assert(query->left == query->right || is_homogenous);
const u_int32_t query_size = is_homogenous ? 1 : 2; const uint32_t query_size = is_homogenous ? 1 : 2;
toku_range buffer[2]; toku_range buffer[2];
u_int32_t buflen = query_size; uint32_t buflen = query_size;
toku_range* buf = &buffer[0]; toku_range* buf = &buffer[0];
u_int32_t numfound; uint32_t numfound;
int r; int r;
r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound); r = toku_rt_find(rt, query, query_size, &buf, &buflen, &numfound);
if (r!=0) return r; if (r!=0) return r;
assert(numfound <= query_size); assert(numfound <= query_size);
*met = (BOOL) (numfound == 2 || (numfound == 1 && toku__lt_txn_cmp(buf[0].data, self))); *met = (BOOL) (numfound == 2 || (numfound == 1 && lt_txn_cmp(buf[0].data, self)));
return 0; return 0;
} }
...@@ -587,7 +592,7 @@ static inline int toku__lt_meets_peer(toku_lock_tree* tree, toku_interval* query ...@@ -587,7 +592,7 @@ static inline int toku__lt_meets_peer(toku_lock_tree* tree, toku_interval* query
Utility function to implement: (from design document) Utility function to implement: (from design document)
if K meets E at v'!=t and K meets W_v' then return failure. if K meets E at v'!=t and K meets W_v' then return failure.
*/ */
static inline int toku__lt_check_borderwrite_conflict(toku_lock_tree* tree, static inline int lt_check_borderwrite_conflict(toku_lock_tree* tree,
TXNID txn, toku_interval* query) { TXNID txn, toku_interval* query) {
assert(tree && query); assert(tree && query);
toku_conflict conflict; toku_conflict conflict;
...@@ -595,14 +600,14 @@ static inline int toku__lt_check_borderwrite_conflict(toku_lock_tree* tree, ...@@ -595,14 +600,14 @@ static inline int toku__lt_check_borderwrite_conflict(toku_lock_tree* tree,
toku_range_tree* peer_selfwrite; toku_range_tree* peer_selfwrite;
int r; int r;
r = toku__lt_borderwrite_conflict(tree, txn, query, &conflict, &peer); r = lt_borderwrite_conflict(tree, txn, query, &conflict, &peer);
if (r!=0) return r; if (r!=0) return r;
if (conflict == TOKU_MAYBE_CONFLICT) { if (conflict == TOKU_MAYBE_CONFLICT) {
peer_selfwrite = toku__lt_ifexist_selfwrite(tree, peer); peer_selfwrite = toku_lt_ifexist_selfwrite(tree, peer);
if (!peer_selfwrite) return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); if (!peer_selfwrite) return lt_panic(tree, TOKU_LT_INCONSISTENT);
BOOL met; BOOL met;
r = toku__lt_meets(tree, query, peer_selfwrite, &met); r = lt_meets(tree, query, peer_selfwrite, &met);
if (r!=0) return r; if (r!=0) return r;
conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT; conflict = met ? TOKU_YES_CONFLICT : TOKU_NO_CONFLICT;
} }
...@@ -611,10 +616,10 @@ static inline int toku__lt_check_borderwrite_conflict(toku_lock_tree* tree, ...@@ -611,10 +616,10 @@ static inline int toku__lt_check_borderwrite_conflict(toku_lock_tree* tree,
return 0; return 0;
} }
static inline void toku__payload_from_dbt(void** payload, u_int32_t* len, static inline void payload_from_dbt(void** payload, uint32_t* len,
const DBT* dbt) { const DBT* dbt) {
assert(payload && len && dbt); assert(payload && len && dbt);
if (toku__lt_is_infinite(dbt)) *payload = (void*)dbt; if (lt_is_infinite(dbt)) *payload = (void*)dbt;
else if (!dbt->size) { else if (!dbt->size) {
*payload = NULL; *payload = NULL;
*len = 0; *len = 0;
...@@ -625,16 +630,16 @@ static inline void toku__payload_from_dbt(void** payload, u_int32_t* len, ...@@ -625,16 +630,16 @@ static inline void toku__payload_from_dbt(void** payload, u_int32_t* len,
} }
} }
static inline void toku__init_point(toku_point* point, toku_lock_tree* tree, static inline void init_point(toku_point* point, toku_lock_tree* tree,
const DBT* key) { const DBT* key) {
assert(point && tree && key); assert(point && tree && key);
memset(point, 0, sizeof(toku_point)); memset(point, 0, sizeof(toku_point));
point->lt = tree; point->lt = tree;
toku__payload_from_dbt(&point->key_payload, &point->key_len, key); payload_from_dbt(&point->key_payload, &point->key_len, key);
} }
static inline void toku__init_query(toku_interval* query, static inline void init_query(toku_interval* query,
toku_point* left, toku_point* right) { toku_point* left, toku_point* right) {
query->left = left; query->left = left;
query->right = right; query->right = right;
...@@ -653,7 +658,7 @@ static inline void toku__init_query(toku_interval* query, ...@@ -653,7 +658,7 @@ static inline void toku__init_query(toku_interval* query,
we made copies from the DB at consolidation time we made copies from the DB at consolidation time
*/ */
static inline void toku__init_insert(toku_range* to_insert, static inline void init_insert(toku_range* to_insert,
toku_point* left, toku_point* right, toku_point* left, toku_point* right,
TXNID txn) { TXNID txn) {
to_insert->ends.left = left; to_insert->ends.left = left;
...@@ -663,38 +668,38 @@ static inline void toku__init_insert(toku_range* to_insert, ...@@ -663,38 +668,38 @@ static inline void toku__init_insert(toku_range* to_insert,
/* Returns whether the point already exists /* Returns whether the point already exists
as an endpoint of the given range. */ as an endpoint of the given range. */
static inline BOOL toku__lt_p_independent(toku_point* point, toku_interval* range) { static inline BOOL lt_p_independent(toku_point* point, toku_interval* range) {
assert(point && range); assert(point && range);
return (BOOL)(point != range->left && point != range->right); return (BOOL)(point != range->left && point != range->right);
} }
static inline int toku__lt_determine_extreme(toku_lock_tree* tree, static inline int lt_determine_extreme(toku_lock_tree* tree,
toku_range* to_insert, toku_range* to_insert,
BOOL* alloc_left, BOOL* alloc_right, BOOL* alloc_left, BOOL* alloc_right,
u_int32_t numfound, uint32_t numfound,
u_int32_t start_at) { uint32_t start_at) {
assert(to_insert && tree && alloc_left && alloc_right); assert(to_insert && tree && alloc_left && alloc_right);
u_int32_t i; uint32_t i;
assert(numfound <= tree->buflen); assert(numfound <= tree->buflen);
for (i = start_at; i < numfound; i++) { for (i = start_at; i < numfound; i++) {
int c; int c;
/* Find the extreme left end-point among overlapping ranges */ /* Find the extreme left end-point among overlapping ranges */
if ((c = toku__lt_point_cmp(tree->buf[i].ends.left, to_insert->ends.left)) if ((c = toku_lt_point_cmp(tree->buf[i].ends.left, to_insert->ends.left))
<= 0) { <= 0) {
if ((!*alloc_left && c == 0) || if ((!*alloc_left && c == 0) ||
!toku__lt_p_independent(tree->buf[i].ends.left, &to_insert->ends)) { !lt_p_independent(tree->buf[i].ends.left, &to_insert->ends)) {
return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); } return lt_panic(tree, TOKU_LT_INCONSISTENT); }
*alloc_left = FALSE; *alloc_left = FALSE;
to_insert->ends.left = tree->buf[i].ends.left; to_insert->ends.left = tree->buf[i].ends.left;
} }
/* Find the extreme right end-point */ /* Find the extreme right end-point */
if ((c = toku__lt_point_cmp(tree->buf[i].ends.right, to_insert->ends.right)) if ((c = toku_lt_point_cmp(tree->buf[i].ends.right, to_insert->ends.right))
>= 0) { >= 0) {
if ((!*alloc_right && c == 0) || if ((!*alloc_right && c == 0) ||
(tree->buf[i].ends.right == to_insert->ends.left && (tree->buf[i].ends.right == to_insert->ends.left &&
tree->buf[i].ends.left != to_insert->ends.left) || tree->buf[i].ends.left != to_insert->ends.left) ||
tree->buf[i].ends.right == to_insert->ends.right) { tree->buf[i].ends.right == to_insert->ends.right) {
return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); } return lt_panic(tree, TOKU_LT_INCONSISTENT); }
*alloc_right = FALSE; *alloc_right = FALSE;
to_insert->ends.right = tree->buf[i].ends.right; to_insert->ends.right = tree->buf[i].ends.right;
} }
...@@ -703,26 +708,26 @@ static inline int toku__lt_determine_extreme(toku_lock_tree* tree, ...@@ -703,26 +708,26 @@ static inline int toku__lt_determine_extreme(toku_lock_tree* tree,
} }
/* Find extreme given a starting point. */ /* Find extreme given a starting point. */
static inline int toku__lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert, static inline int lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert,
BOOL* alloc_left, BOOL* alloc_right, BOOL* alloc_left, BOOL* alloc_right,
u_int32_t numfound) { uint32_t numfound) {
return toku__lt_determine_extreme(tree, to_insert, alloc_left, alloc_right, return lt_determine_extreme(tree, to_insert, alloc_left, alloc_right,
numfound, 0); numfound, 0);
} }
/* Has no starting point. */ /* Has no starting point. */
static inline int toku__lt_find_extreme(toku_lock_tree* tree, static inline int lt_find_extreme(toku_lock_tree* tree,
toku_range* to_insert, toku_range* to_insert,
u_int32_t numfound) { uint32_t numfound) {
assert(numfound > 0); assert(numfound > 0);
*to_insert = tree->buf[0]; *to_insert = tree->buf[0];
BOOL ignore_left = TRUE; BOOL ignore_left = TRUE;
BOOL ignore_right = TRUE; BOOL ignore_right = TRUE;
return toku__lt_determine_extreme(tree, to_insert, &ignore_left, return lt_determine_extreme(tree, to_insert, &ignore_left,
&ignore_right, numfound, 1); &ignore_right, numfound, 1);
} }
static inline int toku__lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert, static inline int lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert,
BOOL alloc_left, BOOL* alloc_right) { BOOL alloc_left, BOOL* alloc_right) {
assert(to_insert && alloc_right); assert(to_insert && alloc_right);
BOOL copy_left = FALSE; BOOL copy_left = FALSE;
...@@ -732,32 +737,32 @@ static inline int toku__lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_in ...@@ -732,32 +737,32 @@ static inline int toku__lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_in
but it is not strictly needed */ but it is not strictly needed */
if (alloc_left && alloc_right && if (alloc_left && alloc_right &&
(to_insert->ends.left == to_insert->ends.right || (to_insert->ends.left == to_insert->ends.right ||
toku__lt_point_cmp(to_insert->ends.left, to_insert->ends.right) == 0)) { toku_lt_point_cmp(to_insert->ends.left, to_insert->ends.right) == 0)) {
*alloc_right = FALSE; *alloc_right = FALSE;
copy_left = TRUE; copy_left = TRUE;
} }
if (alloc_left) { if (alloc_left) {
r = toku__p_makecopy(tree, &to_insert->ends.left); r = p_makecopy(tree, &to_insert->ends.left);
if (0) { died1: if (0) { died1:
if (alloc_left) toku__p_free(tree, to_insert->ends.left); return r; } if (alloc_left) p_free(tree, to_insert->ends.left); return r; }
if (r!=0) return r; if (r!=0) return r;
} }
if (*alloc_right) { if (*alloc_right) {
assert(!copy_left); assert(!copy_left);
r = toku__p_makecopy(tree, &to_insert->ends.right); r = p_makecopy(tree, &to_insert->ends.right);
if (r!=0) goto died1; if (r!=0) goto died1;
} }
else if (copy_left) to_insert->ends.right = to_insert->ends.left; else if (copy_left) to_insert->ends.right = to_insert->ends.left;
return 0; return 0;
} }
static inline int toku__lt_delete_overlapping_ranges(toku_lock_tree* tree, static inline int lt_delete_overlapping_ranges(toku_lock_tree* tree,
toku_range_tree* rt, toku_range_tree* rt,
u_int32_t numfound) { uint32_t numfound) {
assert(tree && rt); assert(tree && rt);
int r; int r;
u_int32_t i; uint32_t i;
assert(numfound <= tree->buflen); assert(numfound <= tree->buflen);
for (i = 0; i < numfound; i++) { for (i = 0; i < numfound; i++) {
r = toku_rt_delete(rt, &tree->buf[i]); r = toku_rt_delete(rt, &tree->buf[i]);
...@@ -766,32 +771,32 @@ static inline int toku__lt_delete_overlapping_ranges(toku_lock_tree* tree, ...@@ -766,32 +771,32 @@ static inline int toku__lt_delete_overlapping_ranges(toku_lock_tree* tree,
return 0; return 0;
} }
static inline int toku__lt_free_points(toku_lock_tree* tree, static inline int lt_free_points(toku_lock_tree* tree,
toku_interval* to_insert, toku_interval* to_insert,
u_int32_t numfound, uint32_t numfound,
toku_range_tree *rt) { toku_range_tree *rt) {
assert(tree && to_insert); assert(tree && to_insert);
assert(numfound <= tree->buflen); assert(numfound <= tree->buflen);
int r; int r;
u_int32_t i; uint32_t i;
for (i = 0; i < numfound; i++) { for (i = 0; i < numfound; i++) {
if (rt != NULL) { if (rt != NULL) {
r = toku_rt_delete(rt, &tree->buf[i]); r = toku_rt_delete(rt, &tree->buf[i]);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
} }
/* /*
We will maintain the invariant: (separately for read and write We will maintain the invariant: (separately for read and write
environments) environments)
(toku__lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b (toku_lt_point_cmp(a, b) == 0 && a.txn == b.txn) => a == b
*/ */
/* Do not double-free. */ /* Do not double-free. */
if (tree->buf[i].ends.right != tree->buf[i].ends.left && if (tree->buf[i].ends.right != tree->buf[i].ends.left &&
toku__lt_p_independent(tree->buf[i].ends.right, to_insert)) { lt_p_independent(tree->buf[i].ends.right, to_insert)) {
toku__p_free(tree, tree->buf[i].ends.right); p_free(tree, tree->buf[i].ends.right);
} }
if (toku__lt_p_independent(tree->buf[i].ends.left, to_insert)) { if (lt_p_independent(tree->buf[i].ends.left, to_insert)) {
toku__p_free(tree, tree->buf[i].ends.left); p_free(tree, tree->buf[i].ends.left);
} }
} }
return 0; return 0;
...@@ -804,26 +809,25 @@ static inline int toku__lt_free_points(toku_lock_tree* tree, ...@@ -804,26 +809,25 @@ static inline int toku__lt_free_points(toku_lock_tree* tree,
If found_only is TRUE, we're only consolidating existing ranges in the interval If found_only is TRUE, we're only consolidating existing ranges in the interval
specified inside of to_insert. specified inside of to_insert.
*/ */
static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only, static inline int consolidate(toku_lock_tree* tree, BOOL found_only,
toku_range* to_insert, toku_range* to_insert,
TXNID txn, BOOL* out_of_locks) { TXNID txn) {
int r; int r;
BOOL alloc_left = TRUE; BOOL alloc_left = TRUE;
BOOL alloc_right = TRUE; BOOL alloc_right = TRUE;
toku_range_tree* selfread; toku_range_tree* selfread;
assert(tree && to_insert && out_of_locks); assert(tree && to_insert);
toku_interval* query = &to_insert->ends; toku_interval* query = &to_insert->ends;
*out_of_locks = FALSE;
#if !defined(TOKU_RT_NOOVERLAPS) #if !defined(TOKU_RT_NOOVERLAPS)
toku_range_tree* mainread = tree->mainread; toku_range_tree* mainread = tree->mainread;
assert(mainread); assert(mainread);
#endif #endif
/* Find the self read tree */ /* Find the self read tree */
r = toku__lt_selfread(tree, txn, &selfread); r = lt_selfread(tree, txn, &selfread);
if (r!=0) return r; if (r!=0) return r;
assert(selfread); assert(selfread);
/* Find all overlapping ranges in the self-read */ /* Find all overlapping ranges in the self-read */
u_int32_t numfound; uint32_t numfound;
r = toku_rt_find(selfread, query, 0, &tree->buf, &tree->buflen, &numfound); r = toku_rt_find(selfread, query, 0, &tree->buf, &tree->buflen, &numfound);
if (r!=0) return r; if (r!=0) return r;
assert(numfound <= tree->buflen); assert(numfound <= tree->buflen);
...@@ -831,39 +835,41 @@ static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only, ...@@ -831,39 +835,41 @@ static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only,
/* If there is 0 or 1 found, it is already consolidated. */ /* If there is 0 or 1 found, it is already consolidated. */
if (numfound < 2) { return 0; } if (numfound < 2) { return 0; }
/* Copy the first one, so we only consolidate existing entries. */ /* Copy the first one, so we only consolidate existing entries. */
r = toku__lt_find_extreme(tree, to_insert, numfound); r = lt_find_extreme(tree, to_insert, numfound);
if (r!=0) return r; if (r!=0) return r;
alloc_left = FALSE;
alloc_right = FALSE;
} }
else { else {
/* Find the extreme left and right point of the consolidated interval */ /* Find the extreme left and right point of the consolidated interval */
r = toku__lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right, r = lt_extend_extreme(tree, to_insert, &alloc_left, &alloc_right,
numfound); numfound);
if (r!=0) return r; if (r!=0) return r;
if (!ltm_lock_test_incr(tree->mgr, numfound)) {
return TOKUDB_OUT_OF_LOCKS;
} }
if (found_only) { alloc_left = FALSE; alloc_right = FALSE; }
if (!toku__lt_lock_test_incr_per_db(tree, numfound)) {
*out_of_locks = TRUE;
return 0;
} }
/* Allocate the consolidated range */ /* Allocate the consolidated range */
r = toku__lt_alloc_extreme(tree, to_insert, alloc_left, &alloc_right); r = lt_alloc_extreme(tree, to_insert, alloc_left, &alloc_right);
if (0) { died1: if (0) { died1:
if (alloc_left) toku__p_free(tree, to_insert->ends.left); if (alloc_left) p_free(tree, to_insert->ends.left);
if (alloc_right) toku__p_free(tree, to_insert->ends.right); return r; } if (alloc_right) p_free(tree, to_insert->ends.right); return r; }
if (r!=0) return r; if (r!=0) {
return r;
}
/* From this point on we have to panic if we cannot finish. */ /* From this point on we have to panic if we cannot finish. */
/* Delete overlapping ranges from selfread ... */ /* Delete overlapping ranges from selfread ... */
r = toku__lt_delete_overlapping_ranges(tree, selfread, numfound); r = lt_delete_overlapping_ranges(tree, selfread, numfound);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
/* ... and mainread. /* ... and mainread.
Growth direction: if we had no overlaps, the next line Growth direction: if we had no overlaps, the next line
should be commented out */ should be commented out */
#if !defined(TOKU_RT_NOOVERLAPS) #if !defined(TOKU_RT_NOOVERLAPS)
r = toku__lt_delete_overlapping_ranges(tree, mainread, numfound); r = lt_delete_overlapping_ranges(tree, mainread, numfound);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
#endif #endif
/* Free all the points from ranges in tree->buf[0]..tree->buf[numfound-1] */ /* Free all the points from ranges in tree->buf[0]..tree->buf[numfound-1] */
toku__lt_free_points(tree, &to_insert->ends, numfound, NULL); lt_free_points(tree, &to_insert->ends, numfound, NULL);
/* We don't necessarily need to panic after here unless numfound > 0 /* We don't necessarily need to panic after here unless numfound > 0
Which indicates we deleted something. */ Which indicates we deleted something. */
/* Insert extreme range into selfread. */ /* Insert extreme range into selfread. */
...@@ -872,11 +878,11 @@ static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only, ...@@ -872,11 +878,11 @@ static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only,
#if !defined(TOKU_RT_NOOVERLAPS) #if !defined(TOKU_RT_NOOVERLAPS)
int r2; int r2;
if (0) { died2: r2 = toku_rt_delete(selfread, to_insert); if (0) { died2: r2 = toku_rt_delete(selfread, to_insert);
if (r2!=0) return toku__lt_panic(tree, r2); goto died1; } if (r2!=0) return lt_panic(tree, r2); goto died1; }
#endif #endif
if (r!=0) { if (r!=0) {
/* If we deleted/merged anything, this is a panic situation. */ /* If we deleted/merged anything, this is a panic situation. */
if (numfound) return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); if (numfound) return lt_panic(tree, TOKU_LT_INCONSISTENT);
goto died1; } goto died1; }
#if !defined(TOKU_RT_NOOVERLAPS) #if !defined(TOKU_RT_NOOVERLAPS)
/* Insert extreme range into mainread. */ /* Insert extreme range into mainread. */
...@@ -884,18 +890,18 @@ static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only, ...@@ -884,18 +890,18 @@ static inline int toku__consolidate(toku_lock_tree* tree, BOOL found_only,
r = toku_rt_insert(tree->mainread, to_insert); r = toku_rt_insert(tree->mainread, to_insert);
if (r!=0) { if (r!=0) {
/* If we deleted/merged anything, this is a panic situation. */ /* If we deleted/merged anything, this is a panic situation. */
if (numfound) return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); if (numfound) return lt_panic(tree, TOKU_LT_INCONSISTENT);
goto died2; } goto died2; }
#endif #endif
toku__lt_lock_incr_per_db(tree, numfound); ltm_lock_incr(tree->mgr, numfound);
return 0; return 0;
} }
static inline void toku__lt_init_full_query(toku_lock_tree* tree, toku_interval* query, static inline void lt_init_full_query(toku_lock_tree* tree, toku_interval* query,
toku_point* left, toku_point* right) { toku_point* left, toku_point* right) {
toku__init_point(left, tree, (DBT*)toku_lt_neg_infinity); init_point(left, tree, (DBT*)toku_lt_neg_infinity);
toku__init_point(right, tree, (DBT*)toku_lt_infinity); init_point(right, tree, (DBT*)toku_lt_infinity);
toku__init_query(query, left, right); init_query(query, left, right);
} }
typedef struct { typedef struct {
...@@ -910,18 +916,18 @@ static int free_contents_helper(toku_range* value, void* extra) { ...@@ -910,18 +916,18 @@ static int free_contents_helper(toku_range* value, void* extra) {
int r = ENOSYS; int r = ENOSYS;
*info->store_value = *value; *info->store_value = *value;
if ((r=toku__lt_free_points(info->lt, info->query, 1, info->rtdel))) { if ((r=lt_free_points(info->lt, info->query, 1, info->rtdel))) {
return toku__lt_panic(info->lt, r); return lt_panic(info->lt, r);
} }
return 0; return 0;
} }
/* /*
TODO: Refactor. TODO: Refactor.
toku__lt_free_points should be replaced (or supplanted) with a lt_free_points should be replaced (or supplanted) with a
toku__lt_free_point (singular) lt_free_point (singular)
*/ */
static inline int toku__lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt, static inline int lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt,
toku_range_tree *rtdel, BOOL doclose) { toku_range_tree *rtdel, BOOL doclose) {
assert(tree); assert(tree);
if (!rt) return 0; if (!rt) return 0;
...@@ -931,7 +937,7 @@ static inline int toku__lt_free_contents(toku_lock_tree* tree, toku_range_tree* ...@@ -931,7 +937,7 @@ static inline int toku__lt_free_contents(toku_lock_tree* tree, toku_range_tree*
toku_interval query; toku_interval query;
toku_point left; toku_point left;
toku_point right; toku_point right;
toku__lt_init_full_query(tree, &query, &left, &right); lt_init_full_query(tree, &query, &left, &right);
free_contents_info info; free_contents_info info;
info.lt = tree; info.lt = tree;
info.rtdel = rtdel; info.rtdel = rtdel;
...@@ -948,7 +954,7 @@ static inline int toku__lt_free_contents(toku_lock_tree* tree, toku_range_tree* ...@@ -948,7 +954,7 @@ static inline int toku__lt_free_contents(toku_lock_tree* tree, toku_range_tree*
return r; return r;
} }
static inline BOOL toku__r_backwards(toku_interval* range) { static inline BOOL r_backwards(toku_interval* range) {
assert(range && range->left && range->right); assert(range && range->left && range->right);
toku_point* left = (toku_point*)range->left; toku_point* left = (toku_point*)range->left;
toku_point* right = (toku_point*)range->right; toku_point* right = (toku_point*)range->right;
...@@ -956,12 +962,12 @@ static inline BOOL toku__r_backwards(toku_interval* range) { ...@@ -956,12 +962,12 @@ static inline BOOL toku__r_backwards(toku_interval* range) {
/* Optimization: if all the pointers are equal, clearly left == right. */ /* Optimization: if all the pointers are equal, clearly left == right. */
return (BOOL) return (BOOL)
((left->key_payload != right->key_payload) && ((left->key_payload != right->key_payload) &&
toku__lt_point_cmp(left, right) > 0); toku_lt_point_cmp(left, right) > 0);
} }
static inline int toku__lt_unlock_deferred_txns(toku_lock_tree* tree); static inline int lt_unlock_deferred_txns(toku_lock_tree* tree);
static inline void toku__lt_set_comparison_functions(toku_lock_tree* tree, static inline void lt_set_comparison_functions(toku_lock_tree* tree,
DB* db) { DB* db) {
assert(!tree->db && !tree->compare_fun); assert(!tree->db && !tree->compare_fun);
tree->db = db; tree->db = db;
...@@ -969,56 +975,55 @@ static inline void toku__lt_set_comparison_functions(toku_lock_tree* tree, ...@@ -969,56 +975,55 @@ static inline void toku__lt_set_comparison_functions(toku_lock_tree* tree,
assert(tree->compare_fun); assert(tree->compare_fun);
} }
static inline void toku__lt_clear_comparison_functions(toku_lock_tree* tree) { static inline void lt_clear_comparison_functions(toku_lock_tree* tree) {
assert(tree); assert(tree);
tree->db = NULL; tree->db = NULL;
tree->compare_fun = NULL; tree->compare_fun = NULL;
} }
/* Preprocess step for acquire functions. */ /* Preprocess step for acquire functions. */
static inline int toku__lt_preprocess(toku_lock_tree* tree, DB* db, static inline int lt_preprocess(toku_lock_tree* tree, DB* db,
__attribute__((unused)) TXNID txn, __attribute__((unused)) TXNID txn,
const DBT* key_left, const DBT* key_left,
const DBT* key_right, const DBT* key_right,
toku_point* left, toku_point* right, toku_point* left, toku_point* right,
toku_interval* query, BOOL* out_of_locks) { toku_interval* query) {
int r = ENOSYS; int r = ENOSYS;
if (!tree || !db || if (!tree || !db ||
!key_left || !key_right || !out_of_locks) {r = EINVAL; goto cleanup; } !key_left || !key_right) {r = EINVAL; goto cleanup; }
/* Verify that NULL keys have payload and size that are mutually /* Verify that NULL keys have payload and size that are mutually
consistent*/ consistent*/
if ((r = toku__lt_verify_null_key(key_left)) != 0) { goto cleanup; } if ((r = lt_verify_null_key(key_left)) != 0) { goto cleanup; }
if ((r = toku__lt_verify_null_key(key_right)) != 0) { goto cleanup; } if ((r = lt_verify_null_key(key_right)) != 0) { goto cleanup; }
toku__init_point(left, tree, key_left); init_point(left, tree, key_left);
toku__init_point(right, tree, key_right); init_point(right, tree, key_right);
toku__init_query(query, left, right); init_query(query, left, right);
toku__lt_set_comparison_functions(tree, db); lt_set_comparison_functions(tree, db);
/* Verify left <= right, otherwise return EDOM. */ /* Verify left <= right, otherwise return EDOM. */
if (toku__r_backwards(query)) { r = EDOM; goto cleanup; } if (r_backwards(query)) { r = EDOM; goto cleanup; }
*out_of_locks = FALSE;
r = 0; r = 0;
cleanup: cleanup:
if (r == 0) { if (r == 0) {
assert(tree->db && tree->compare_fun); assert(tree->db && tree->compare_fun);
/* Cleanup all existing deleted transactions */ /* Cleanup all existing deleted transactions */
if (!toku_rth_is_empty(tree->txns_to_unlock)) { if (!toku_rth_is_empty(tree->txns_to_unlock)) {
r = toku__lt_unlock_deferred_txns(tree); r = lt_unlock_deferred_txns(tree);
} }
} }
return r; return r;
} }
/* Postprocess step for acquire functions. */ /* Postprocess step for acquire functions. */
static inline void toku__lt_postprocess(toku_lock_tree* tree) { static inline void lt_postprocess(toku_lock_tree* tree) {
toku__lt_clear_comparison_functions(tree); lt_clear_comparison_functions(tree);
} }
static inline int toku__lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite, static inline int lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite,
toku_range* pred, toku_range* succ, toku_range* pred, toku_range* succ,
BOOL* found_p, BOOL* found_s, BOOL* found_p, BOOL* found_s,
toku_range* to_insert) { toku_range* to_insert) {
...@@ -1026,8 +1031,8 @@ static inline int toku__lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite, ...@@ -1026,8 +1031,8 @@ static inline int toku__lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite,
int r; int r;
toku_range_tree* rt; toku_range_tree* rt;
rt = in_borderwrite ? tree->borderwrite : rt = in_borderwrite ? tree->borderwrite :
toku__lt_ifexist_selfwrite(tree, tree->buf[0].data); toku_lt_ifexist_selfwrite(tree, tree->buf[0].data);
if (!rt) return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); if (!rt) return lt_panic(tree, TOKU_LT_INCONSISTENT);
r = toku_rt_predecessor(rt, to_insert->ends.left, pred, found_p); r = toku_rt_predecessor(rt, to_insert->ends.left, pred, found_p);
if (r!=0) return r; if (r!=0) return r;
r = toku_rt_successor (rt, to_insert->ends.right, succ, found_s); r = toku_rt_successor (rt, to_insert->ends.right, succ, found_s);
...@@ -1035,17 +1040,17 @@ static inline int toku__lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite, ...@@ -1035,17 +1040,17 @@ static inline int toku__lt_get_border(toku_lock_tree* tree, BOOL in_borderwrite,
return 0; return 0;
} }
static inline int toku__lt_expand_border(toku_lock_tree* tree, toku_range* to_insert, static inline int lt_expand_border(toku_lock_tree* tree, toku_range* to_insert,
toku_range* pred, toku_range* succ, toku_range* pred, toku_range* succ,
BOOL found_p, BOOL found_s) { BOOL found_p, BOOL found_s) {
assert(tree && to_insert && pred && succ); assert(tree && to_insert && pred && succ);
int r; int r;
if (found_p && !toku__lt_txn_cmp(pred->data, to_insert->data)) { if (found_p && !lt_txn_cmp(pred->data, to_insert->data)) {
r = toku_rt_delete(tree->borderwrite, pred); r = toku_rt_delete(tree->borderwrite, pred);
if (r!=0) return r; if (r!=0) return r;
to_insert->ends.left = pred->ends.left; to_insert->ends.left = pred->ends.left;
} }
else if (found_s && !toku__lt_txn_cmp(succ->data, to_insert->data)) { else if (found_s && !lt_txn_cmp(succ->data, to_insert->data)) {
r = toku_rt_delete(tree->borderwrite, succ); r = toku_rt_delete(tree->borderwrite, succ);
if (r!=0) return r; if (r!=0) return r;
to_insert->ends.right = succ->ends.right; to_insert->ends.right = succ->ends.right;
...@@ -1053,26 +1058,26 @@ static inline int toku__lt_expand_border(toku_lock_tree* tree, toku_range* to_in ...@@ -1053,26 +1058,26 @@ static inline int toku__lt_expand_border(toku_lock_tree* tree, toku_range* to_in
return 0; return 0;
} }
static inline int toku__lt_split_border(toku_lock_tree* tree, toku_range* to_insert, static inline int lt_split_border(toku_lock_tree* tree, toku_range* to_insert,
toku_range* pred, toku_range* succ, toku_range* pred, toku_range* succ,
BOOL found_p, BOOL found_s) { BOOL found_p, BOOL found_s) {
assert(tree && to_insert && pred && succ); assert(tree && to_insert && pred && succ);
int r; int r;
assert(toku__lt_txn_cmp(tree->buf[0].data, to_insert->data)); assert(lt_txn_cmp(tree->buf[0].data, to_insert->data));
if (!found_s || !found_p) return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); if (!found_s || !found_p) return lt_panic(tree, TOKU_LT_INCONSISTENT);
r = toku_rt_delete(tree->borderwrite, &tree->buf[0]); r = toku_rt_delete(tree->borderwrite, &tree->buf[0]);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
pred->ends.left = tree->buf[0].ends.left; pred->ends.left = tree->buf[0].ends.left;
succ->ends.right = tree->buf[0].ends.right; succ->ends.right = tree->buf[0].ends.right;
if (toku__r_backwards(&pred->ends) || toku__r_backwards(&succ->ends)) { if (r_backwards(&pred->ends) || r_backwards(&succ->ends)) {
return toku__lt_panic(tree, TOKU_LT_INCONSISTENT);} return lt_panic(tree, TOKU_LT_INCONSISTENT);}
r = toku_rt_insert(tree->borderwrite, pred); r = toku_rt_insert(tree->borderwrite, pred);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
r = toku_rt_insert(tree->borderwrite, succ); r = toku_rt_insert(tree->borderwrite, succ);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
return 0; return 0;
} }
...@@ -1105,22 +1110,22 @@ static inline int toku__lt_split_border(toku_lock_tree* tree, toku_range* to_ins ...@@ -1105,22 +1110,22 @@ static inline int toku__lt_split_border(toku_lock_tree* tree, toku_range* to_ins
done with borderwrite. done with borderwrite.
insert point,point into selfwrite. insert point,point into selfwrite.
*/ */
static inline int toku__lt_borderwrite_insert(toku_lock_tree* tree, static inline int lt_borderwrite_insert(toku_lock_tree* tree,
toku_interval* query, toku_interval* query,
toku_range* to_insert) { toku_range* to_insert) {
assert(tree && query && to_insert); assert(tree && query && to_insert);
int r; int r;
toku_range_tree* borderwrite = tree->borderwrite; assert(borderwrite); toku_range_tree* borderwrite = tree->borderwrite; assert(borderwrite);
const u_int32_t query_size = 1; const uint32_t query_size = 1;
u_int32_t numfound; uint32_t numfound;
r = toku_rt_find(borderwrite, query, query_size, &tree->buf, &tree->buflen, r = toku_rt_find(borderwrite, query, query_size, &tree->buf, &tree->buflen,
&numfound); &numfound);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
assert(numfound <= query_size); assert(numfound <= query_size);
/* No updated needed in borderwrite: we return right away. */ /* No updated needed in borderwrite: we return right away. */
if (numfound == 1 && !toku__lt_txn_cmp(tree->buf[0].data, to_insert->data)) return 0; if (numfound == 1 && !lt_txn_cmp(tree->buf[0].data, to_insert->data)) return 0;
/* Find predecessor and successors */ /* Find predecessor and successors */
toku_range pred; toku_range pred;
...@@ -1128,24 +1133,24 @@ static inline int toku__lt_borderwrite_insert(toku_lock_tree* tree, ...@@ -1128,24 +1133,24 @@ static inline int toku__lt_borderwrite_insert(toku_lock_tree* tree,
BOOL found_p = FALSE; BOOL found_p = FALSE;
BOOL found_s = FALSE; BOOL found_s = FALSE;
r = toku__lt_get_border(tree, (BOOL)(numfound == 0), &pred, &succ, r = lt_get_border(tree, (BOOL)(numfound == 0), &pred, &succ,
&found_p, &found_s, to_insert); &found_p, &found_s, to_insert);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
if (numfound == 0) { if (numfound == 0) {
if (found_p && found_s && !toku__lt_txn_cmp(pred.data, succ.data)) { if (found_p && found_s && !lt_txn_cmp(pred.data, succ.data)) {
return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); } return lt_panic(tree, TOKU_LT_INCONSISTENT); }
r = toku__lt_expand_border(tree, to_insert, &pred, &succ, r = lt_expand_border(tree, to_insert, &pred, &succ,
found_p, found_s); found_p, found_s);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
} }
else { else {
r = toku__lt_split_border( tree, to_insert, &pred, &succ, r = lt_split_border( tree, to_insert, &pred, &succ,
found_p, found_s); found_p, found_s);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
} }
r = toku_rt_insert(borderwrite, to_insert); r = toku_rt_insert(borderwrite, to_insert);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
return 0; return 0;
} }
...@@ -1175,16 +1180,14 @@ int toku_lt_create(toku_lock_tree** ptree, ...@@ -1175,16 +1180,14 @@ int toku_lt_create(toku_lock_tree** ptree,
tmp_tree->realloc = user_realloc; tmp_tree->realloc = user_realloc;
tmp_tree->get_compare_fun_from_db = get_compare_fun_from_db; tmp_tree->get_compare_fun_from_db = get_compare_fun_from_db;
tmp_tree->lock_escalation_allowed = TRUE; tmp_tree->lock_escalation_allowed = TRUE;
r = toku_ltm_get_max_locks_per_db(mgr, &tmp_tree->max_locks);
if (r!=0) { goto cleanup; }
#if !defined(TOKU_RT_NOOVERLAPS) #if !defined(TOKU_RT_NOOVERLAPS)
r = toku_rt_create(&tmp_tree->mainread, r = toku_rt_create(&tmp_tree->mainread,
toku__lt_point_cmp, toku__lt_txn_cmp, TRUE, toku_lt_point_cmp, lt_txn_cmp, TRUE,
user_malloc, user_free, user_realloc); user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
#endif #endif
r = toku_rt_create(&tmp_tree->borderwrite, r = toku_rt_create(&tmp_tree->borderwrite,
toku__lt_point_cmp, toku__lt_txn_cmp, FALSE, toku_lt_point_cmp, lt_txn_cmp, FALSE,
user_malloc, user_free, user_realloc); user_malloc, user_free, user_realloc);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
r = toku_rth_create(&tmp_tree->rth, user_malloc, user_free, user_realloc); r = toku_rth_create(&tmp_tree->rth, user_malloc, user_free, user_realloc);
...@@ -1197,6 +1200,8 @@ int toku_lt_create(toku_lock_tree** ptree, ...@@ -1197,6 +1200,8 @@ int toku_lt_create(toku_lock_tree** ptree,
tmp_tree->buf = (toku_range*) tmp_tree->buf = (toku_range*)
user_malloc(tmp_tree->buflen * sizeof(toku_range)); user_malloc(tmp_tree->buflen * sizeof(toku_range));
if (!tmp_tree->buf) { r = ENOMEM; goto cleanup; } if (!tmp_tree->buf) { r = ENOMEM; goto cleanup; }
r = toku_omt_create(&tmp_tree->dbs);
if (r!=0) { goto cleanup; }
tmp_tree->ref_count = 1; tmp_tree->ref_count = 1;
*ptree = tmp_tree; *ptree = tmp_tree;
...@@ -1210,6 +1215,7 @@ int toku_lt_create(toku_lock_tree** ptree, ...@@ -1210,6 +1215,7 @@ int toku_lt_create(toku_lock_tree** ptree,
if (tmp_tree->rth) { toku_rth_close(tmp_tree->rth); } if (tmp_tree->rth) { toku_rth_close(tmp_tree->rth); }
if (tmp_tree->txns_to_unlock) { toku_rth_close(tmp_tree->txns_to_unlock); } if (tmp_tree->txns_to_unlock) { toku_rth_close(tmp_tree->txns_to_unlock); }
if (tmp_tree->buf) { user_free(tmp_tree->buf); } if (tmp_tree->buf) { user_free(tmp_tree->buf); }
if (tmp_tree->dbs) { toku_omt_destroy(&tmp_tree->dbs); }
user_free(tmp_tree); user_free(tmp_tree);
} }
} }
...@@ -1231,8 +1237,11 @@ static inline void toku_lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id ...@@ -1231,8 +1237,11 @@ static inline void toku_lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id
lt->dict_id = dict_id; lt->dict_id = dict_id;
} }
static void lt_add_db(toku_lock_tree* tree, DB *db);
static void lt_remove_db(toku_lock_tree* tree, DB *db);
int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
DICTIONARY_ID dict_id) { DICTIONARY_ID dict_id, DB *db) {
/* first look in hash table to see if lock tree exists for that db, /* first look in hash table to see if lock tree exists for that db,
if so return it */ if so return it */
int r = ENOSYS; int r = ENOSYS;
...@@ -1240,13 +1249,16 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, ...@@ -1240,13 +1249,16 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
toku_lock_tree* tree = NULL; toku_lock_tree* tree = NULL;
BOOL added_to_ltm = FALSE; BOOL added_to_ltm = FALSE;
BOOL added_to_idlth = FALSE; BOOL added_to_idlth = FALSE;
BOOL added_extant_db = FALSE;
map = toku_idlth_find(mgr->idlth, dict_id); map = toku_idlth_find(mgr->idlth, dict_id);
if (map != NULL) { if (map != NULL) {
/* Load already existing lock tree. */ /* Load already existing lock tree. */
assert (map->tree != NULL); tree = map->tree;
*ptree = map->tree; assert (tree != NULL);
toku_lt_add_ref(*ptree); toku_lt_add_ref(tree);
lt_add_db(tree, db);
*ptree = tree;
r = 0; r = 0;
goto cleanup; goto cleanup;
} }
...@@ -1266,6 +1278,9 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, ...@@ -1266,6 +1278,9 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
if (r != 0) { goto cleanup; } if (r != 0) { goto cleanup; }
added_to_idlth = TRUE; added_to_idlth = TRUE;
lt_add_db(tree, db);
added_extant_db = TRUE;
map = toku_idlth_find(mgr->idlth, dict_id); map = toku_idlth_find(mgr->idlth, dict_id);
assert(map); assert(map);
map->tree = tree; map->tree = tree;
...@@ -1278,6 +1293,7 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, ...@@ -1278,6 +1293,7 @@ int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
if (tree != NULL) { if (tree != NULL) {
if (added_to_ltm) { toku_ltm_remove_lt(mgr, tree); } if (added_to_ltm) { toku_ltm_remove_lt(mgr, tree); }
if (added_to_idlth) { toku_idlth_delete(mgr->idlth, dict_id); } if (added_to_idlth) { toku_idlth_delete(mgr->idlth, dict_id); }
if (added_extant_db) { lt_remove_db(tree, db); }
toku_lt_close(tree); toku_lt_close(tree);
} }
} }
...@@ -1299,14 +1315,15 @@ int toku_lt_close(toku_lock_tree* tree) { ...@@ -1299,14 +1315,15 @@ int toku_lt_close(toku_lock_tree* tree) {
rt_forest* forest; rt_forest* forest;
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(tree->rth)) != NULL) {
r = toku__lt_free_contents(tree, forest->self_read, NULL, TRUE); r = lt_free_contents(tree, forest->self_read, NULL, TRUE);
if (!first_error && r!=0) { first_error = r; } if (!first_error && r!=0) { first_error = r; }
r = toku__lt_free_contents(tree, forest->self_write, NULL, TRUE); r = lt_free_contents(tree, forest->self_write, NULL, TRUE);
if (!first_error && r!=0) { first_error = r; } if (!first_error && r!=0) { first_error = r; }
} }
toku_rth_close(tree->rth); toku_rth_close(tree->rth);
toku_rth_close(tree->txns_to_unlock); toku_rth_close(tree->txns_to_unlock);
toku_rth_close(tree->txns_still_locked); toku_rth_close(tree->txns_still_locked);
toku_omt_destroy(&tree->dbs);
tree->free(tree->buf); tree->free(tree->buf);
tree->free(tree); tree->free(tree);
...@@ -1323,23 +1340,21 @@ int toku_lt_acquire_read_lock(toku_lock_tree* tree, ...@@ -1323,23 +1340,21 @@ int toku_lt_acquire_read_lock(toku_lock_tree* tree,
} }
static int toku__lt_try_acquire_range_read_lock(toku_lock_tree* tree, static int lt_try_acquire_range_read_lock(toku_lock_tree* tree,
DB* db, TXNID txn, DB* db, TXNID txn,
const DBT* key_left, const DBT* key_left,
const DBT* key_right, const DBT* key_right) {
BOOL* out_of_locks) {
int r; int r;
toku_point left; toku_point left;
toku_point right; toku_point right;
toku_interval query; toku_interval query;
BOOL dominated; BOOL dominated;
if (!out_of_locks) { return EINVAL; } r = lt_preprocess(tree, db, txn,
r = toku__lt_preprocess(tree, db, txn,
key_left, key_left,
key_right, key_right,
&left, &right, &left, &right,
&query, out_of_locks); &query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
/* /*
...@@ -1358,36 +1373,36 @@ static int toku__lt_try_acquire_range_read_lock(toku_lock_tree* tree, ...@@ -1358,36 +1373,36 @@ static int toku__lt_try_acquire_range_read_lock(toku_lock_tree* tree,
} }
/* if 'K' is dominated by selfwrite('txn') then return success. */ /* if 'K' is dominated by selfwrite('txn') then return success. */
r = toku__lt_rt_dominates(tree, &query, r = lt_rt_dominates(tree, &query,
toku__lt_ifexist_selfwrite(tree, txn), &dominated); toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) { goto cleanup; } if (r || dominated) { goto cleanup; }
/* else if 'K' is dominated by selfread('txn') then return success. */ /* else if 'K' is dominated by selfread('txn') then return success. */
r = toku__lt_rt_dominates(tree, &query, r = lt_rt_dominates(tree, &query,
toku__lt_ifexist_selfread(tree, txn), &dominated); toku_lt_ifexist_selfread(tree, txn), &dominated);
if (r || dominated) { goto cleanup; } if (r || dominated) { goto cleanup; }
/* /*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') && else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure. 'K' meets selfwrite('peer') then return failure.
*/ */
r = toku__lt_check_borderwrite_conflict(tree, txn, &query); r = lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
/* Now need to merge, copy the memory and insert. */ /* Now need to merge, copy the memory and insert. */
toku_range to_insert; toku_range to_insert;
toku__init_insert(&to_insert, &left, &right, txn); init_insert(&to_insert, &left, &right, txn);
/* Consolidate the new range and all the overlapping ranges */ /* Consolidate the new range and all the overlapping ranges */
r = toku__consolidate(tree, FALSE, &to_insert, txn, out_of_locks); r = consolidate(tree, FALSE, &to_insert, txn);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
r = 0; r = 0;
cleanup: cleanup:
if (tree) { toku__lt_postprocess(tree); } if (tree) { lt_postprocess(tree); }
return r; return r;
} }
/* Checks for if a write range conflicts with reads. /* Checks for if a write range conflicts with reads.
Supports ranges. */ Supports ranges. */
static inline int toku__lt_write_range_conflicts_reads(toku_lock_tree* tree, static inline int lt_write_range_conflicts_reads(toku_lock_tree* tree,
TXNID txn, toku_interval* query) { TXNID txn, toku_interval* query) {
int r = 0; int r = 0;
BOOL met = FALSE; BOOL met = FALSE;
...@@ -1395,8 +1410,8 @@ static inline int toku__lt_write_range_conflicts_reads(toku_lock_tree* tree, ...@@ -1395,8 +1410,8 @@ static inline int toku__lt_write_range_conflicts_reads(toku_lock_tree* tree,
rt_forest* forest; rt_forest* forest;
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_read != NULL && toku__lt_txn_cmp(forest->hash_key, txn)) { if (forest->self_read != NULL && lt_txn_cmp(forest->hash_key, txn)) {
r = toku__lt_meets_peer(tree, query, forest->self_read, TRUE, txn, r = lt_meets_peer(tree, query, forest->self_read, TRUE, txn,
&met); &met);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
if (met) { r = DB_LOCK_NOTGRANTED; goto cleanup; } if (met) { r = DB_LOCK_NOTGRANTED; goto cleanup; }
...@@ -1411,7 +1426,7 @@ static inline int toku__lt_write_range_conflicts_reads(toku_lock_tree* tree, ...@@ -1411,7 +1426,7 @@ static inline int toku__lt_write_range_conflicts_reads(toku_lock_tree* tree,
Tests whether a range from BorderWrite is trivially escalatable. Tests whether a range from BorderWrite is trivially escalatable.
i.e. No read locks from other transactions overlap the range. i.e. No read locks from other transactions overlap the range.
*/ */
static inline int toku__border_escalation_trivial(toku_lock_tree* tree, static inline int border_escalation_trivial(toku_lock_tree* tree,
toku_range* border_range, toku_range* border_range,
BOOL* trivial) { BOOL* trivial) {
assert(tree && border_range && trivial); assert(tree && border_range && trivial);
...@@ -1419,7 +1434,7 @@ static inline int toku__border_escalation_trivial(toku_lock_tree* tree, ...@@ -1419,7 +1434,7 @@ static inline int toku__border_escalation_trivial(toku_lock_tree* tree,
toku_interval query = border_range->ends; toku_interval query = border_range->ends;
r = toku__lt_write_range_conflicts_reads(tree, border_range->data, &query); r = lt_write_range_conflicts_reads(tree, border_range->data, &query);
if (r == DB_LOCK_NOTGRANTED || r == DB_LOCK_DEADLOCK) { *trivial = FALSE; } if (r == DB_LOCK_NOTGRANTED || r == DB_LOCK_DEADLOCK) { *trivial = FALSE; }
else if (r!=0) { goto cleanup; } else if (r!=0) { goto cleanup; }
else { *trivial = TRUE; } else { *trivial = TRUE; }
...@@ -1436,7 +1451,7 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) { ...@@ -1436,7 +1451,7 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
//Create the self write table if it does not exist. //Create the self write table if it does not exist.
//This saves the fact that txn is still locked. //This saves the fact that txn is still locked.
toku_range_tree* selfwrite; toku_range_tree* selfwrite;
if ((r = toku__lt_selfwrite(tree, txn, &selfwrite))) return r; if ((r = lt_selfwrite(tree, txn, &selfwrite))) return r;
//Clear out the borderwrite, selfwrite, selfread, and mainread tables. //Clear out the borderwrite, selfwrite, selfread, and mainread tables.
//The selfread and selfwrite tables also need to free memory. //The selfread and selfwrite tables also need to free memory.
...@@ -1446,27 +1461,27 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) { ...@@ -1446,27 +1461,27 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
toku_rt_clear(tree->mainread); toku_rt_clear(tree->mainread);
#endif #endif
u_int32_t ranges; uint32_t ranges;
r = toku_rt_get_size(selfwrite, &ranges); r = toku_rt_get_size(selfwrite, &ranges);
if ((r = toku__lt_free_contents(tree, selfwrite, NULL, FALSE))) { if ((r = lt_free_contents(tree, selfwrite, NULL, FALSE))) {
r = toku__lt_panic(tree, r); r = lt_panic(tree, r);
goto cleanup; goto cleanup;
} }
toku_range_tree* selfread = toku__lt_ifexist_selfread(tree, txn); toku_range_tree* selfread = toku_lt_ifexist_selfread(tree, txn);
if (selfread) { if (selfread) {
u_int32_t size; uint32_t size;
r = toku_rt_get_size(selfread, &size); r = toku_rt_get_size(selfread, &size);
assert(r==0); assert(r==0);
ranges += size; ranges += size;
if ((r = toku__lt_free_contents(tree, selfread, NULL, FALSE))) { if ((r = lt_free_contents(tree, selfread, NULL, FALSE))) {
r = toku__lt_panic(tree, r); r = lt_panic(tree, r);
goto cleanup; goto cleanup;
} }
} }
toku__lt_lock_decr_per_db(tree, ranges); ltm_lock_decr(tree->mgr, ranges);
tree->table_lock_owner = txn; tree->table_lock_owner = txn;
tree->table_is_locked = TRUE; tree->table_is_locked = TRUE;
r = 0; r = 0;
...@@ -1475,15 +1490,15 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) { ...@@ -1475,15 +1490,15 @@ static int lt_global_lock(toku_lock_tree* tree, TXNID txn) {
} }
/* */ /* */
static inline int toku__escalate_writes_from_border_range(toku_lock_tree* tree, static inline int escalate_writes_from_border_range(toku_lock_tree* tree,
toku_range* border_range) { toku_range* border_range) {
int r = ENOSYS; int r = ENOSYS;
if (!tree || !border_range) { r = EINVAL; goto cleanup; } if (!tree || !border_range) { r = EINVAL; goto cleanup; }
TXNID txn = border_range->data; TXNID txn = border_range->data;
toku_range_tree* self_write = toku__lt_ifexist_selfwrite(tree, txn); toku_range_tree* self_write = toku_lt_ifexist_selfwrite(tree, txn);
assert(self_write); assert(self_write);
toku_interval query = border_range->ends; toku_interval query = border_range->ends;
u_int32_t numfound = 0; uint32_t numfound = 0;
/* /*
* Delete all overlapping ranges * Delete all overlapping ranges
...@@ -1502,27 +1517,27 @@ static inline int toku__escalate_writes_from_border_range(toku_lock_tree* tree, ...@@ -1502,27 +1517,27 @@ static inline int toku__escalate_writes_from_border_range(toku_lock_tree* tree,
if ((r = lt_global_lock(tree, txn))) goto cleanup; if ((r = lt_global_lock(tree, txn))) goto cleanup;
} }
else { else {
u_int32_t i; uint32_t i;
for (i = 0; i < numfound; i++) { for (i = 0; i < numfound; i++) {
r = toku_rt_delete(self_write, &tree->buf[i]); r = toku_rt_delete(self_write, &tree->buf[i]);
if (r != 0) { r = toku__lt_panic(tree, r); goto cleanup; } if (r != 0) { r = lt_panic(tree, r); goto cleanup; }
/* /*
* Clean up memory that is not referenced by border_range. * Clean up memory that is not referenced by border_range.
*/ */
if (tree->buf[i].ends.left != tree->buf[i].ends.right && if (tree->buf[i].ends.left != tree->buf[i].ends.right &&
toku__lt_p_independent(tree->buf[i].ends.left, &border_range->ends)) { lt_p_independent(tree->buf[i].ends.left, &border_range->ends)) {
/* Do not double free if left and right are same point. */ /* Do not double free if left and right are same point. */
toku__p_free(tree, tree->buf[i].ends.left); p_free(tree, tree->buf[i].ends.left);
} }
if (toku__lt_p_independent(tree->buf[i].ends.right, &border_range->ends)) { if (lt_p_independent(tree->buf[i].ends.right, &border_range->ends)) {
toku__p_free(tree, tree->buf[i].ends.right); p_free(tree, tree->buf[i].ends.right);
} }
} }
//Insert escalated range. //Insert escalated range.
r = toku_rt_insert(self_write, border_range); r = toku_rt_insert(self_write, border_range);
if (r != 0) { r = toku__lt_panic(tree, r); goto cleanup; } if (r != 0) { r = lt_panic(tree, r); goto cleanup; }
toku__lt_lock_incr_per_db(tree, numfound); ltm_lock_incr(tree->mgr, numfound);
} }
r = 0; r = 0;
...@@ -1530,15 +1545,14 @@ static inline int toku__escalate_writes_from_border_range(toku_lock_tree* tree, ...@@ -1530,15 +1545,14 @@ static inline int toku__escalate_writes_from_border_range(toku_lock_tree* tree,
return r; return r;
} }
static int toku__lt_escalate_read_locks_in_interval(toku_lock_tree* tree, static int lt_escalate_read_locks_in_interval(toku_lock_tree* tree,
toku_interval* query, toku_interval* query,
TXNID txn) { TXNID txn) {
int r = ENOSYS; int r = ENOSYS;
toku_range to_insert; toku_range to_insert;
BOOL ignore_out_of_locks;
toku__init_insert(&to_insert, query->left, query->right, txn); init_insert(&to_insert, query->left, query->right, txn);
r = toku__consolidate(tree, TRUE, &to_insert, txn, &ignore_out_of_locks); r = consolidate(tree, TRUE, &to_insert, txn);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
r = 0; r = 0;
cleanup: cleanup:
...@@ -1556,9 +1570,9 @@ static int escalate_read_locks_helper(toku_range* border_range, void* extra) { ...@@ -1556,9 +1570,9 @@ static int escalate_read_locks_helper(toku_range* border_range, void* extra) {
escalate_info* info = extra; escalate_info* info = extra;
int r = ENOSYS; int r = ENOSYS;
if (!toku__lt_txn_cmp(border_range->data, info->txn)) { r = 0; goto cleanup; } if (!lt_txn_cmp(border_range->data, info->txn)) { r = 0; goto cleanup; }
info->escalate_interval->right = border_range->ends.left; info->escalate_interval->right = border_range->ends.left;
r = toku__lt_escalate_read_locks_in_interval(info->lt, r = lt_escalate_read_locks_in_interval(info->lt,
info->escalate_interval, info->txn); info->escalate_interval, info->txn);
if (r!=0) goto cleanup; if (r!=0) goto cleanup;
info->escalate_interval->left = border_range->ends.right; info->escalate_interval->left = border_range->ends.right;
...@@ -1568,7 +1582,7 @@ static int escalate_read_locks_helper(toku_range* border_range, void* extra) { ...@@ -1568,7 +1582,7 @@ static int escalate_read_locks_helper(toku_range* border_range, void* extra) {
} }
//TODO: Whenever comparing TXNIDs use the comparison function INSTEAD of just '!= or ==' //TODO: Whenever comparing TXNIDs use the comparison function INSTEAD of just '!= or =='
static int toku__lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) { static int lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS; int r = ENOSYS;
assert(tree); assert(tree);
assert(tree->lock_escalation_allowed); assert(tree->lock_escalation_allowed);
...@@ -1577,7 +1591,7 @@ static int toku__lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) { ...@@ -1577,7 +1591,7 @@ static int toku__lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
toku_point neg_infinite; toku_point neg_infinite;
toku_point infinite; toku_point infinite;
toku_interval query; toku_interval query;
toku__lt_init_full_query(tree, &query, &neg_infinite, &infinite); lt_init_full_query(tree, &query, &neg_infinite, &infinite);
toku_range_tree* border = tree->borderwrite; toku_range_tree* border = tree->borderwrite;
assert(border); assert(border);
...@@ -1589,7 +1603,7 @@ static int toku__lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) { ...@@ -1589,7 +1603,7 @@ static int toku__lt_escalate_read_locks(toku_lock_tree* tree, TXNID txn) {
if ((r = toku_rt_iterate(border, escalate_read_locks_helper, &info))) goto cleanup; if ((r = toku_rt_iterate(border, escalate_read_locks_helper, &info))) goto cleanup;
/* Special case for zero entries in border? Just do the 'after'? */ /* Special case for zero entries in border? Just do the 'after'? */
query.right = &infinite; query.right = &infinite;
r = toku__lt_escalate_read_locks_in_interval(tree, &query, txn); r = lt_escalate_read_locks_in_interval(tree, &query, txn);
if (r!=0) goto cleanup; if (r!=0) goto cleanup;
r = 0; r = 0;
cleanup: cleanup:
...@@ -1600,14 +1614,14 @@ static int escalate_write_locks_helper(toku_range* border_range, void* extra) { ...@@ -1600,14 +1614,14 @@ static int escalate_write_locks_helper(toku_range* border_range, void* extra) {
toku_lock_tree* tree = extra; toku_lock_tree* tree = extra;
int r = ENOSYS; int r = ENOSYS;
BOOL trivial; BOOL trivial;
if ((r = toku__border_escalation_trivial(tree, border_range, &trivial))) goto cleanup; if ((r = border_escalation_trivial(tree, border_range, &trivial))) goto cleanup;
if (!trivial) { r = 0; goto cleanup; } if (!trivial) { r = 0; goto cleanup; }
/* /*
* At this point, we determine that escalation is simple, * At this point, we determine that escalation is simple,
* Attempt escalation * Attempt escalation
*/ */
r = toku__escalate_writes_from_border_range(tree, border_range); r = escalate_writes_from_border_range(tree, border_range);
if (r!=0) { r = toku__lt_panic(tree, r); goto cleanup; } if (r!=0) { r = lt_panic(tree, r); goto cleanup; }
r = 0; r = 0;
cleanup: cleanup:
return r; return r;
...@@ -1619,7 +1633,7 @@ static int escalate_write_locks_helper(toku_range* border_range, void* extra) { ...@@ -1619,7 +1633,7 @@ static int escalate_write_locks_helper(toku_range* border_range, void* extra) {
* Replaces all writes that overlap with range * Replaces all writes that overlap with range
* Deletes all reads dominated by range * Deletes all reads dominated by range
*/ */
static int toku__lt_escalate_write_locks(toku_lock_tree* tree) { static int lt_escalate_write_locks(toku_lock_tree* tree) {
int r = ENOSYS; int r = ENOSYS;
assert(tree); assert(tree);
assert(tree->borderwrite); assert(tree->borderwrite);
...@@ -1630,100 +1644,79 @@ static int toku__lt_escalate_write_locks(toku_lock_tree* tree) { ...@@ -1630,100 +1644,79 @@ static int toku__lt_escalate_write_locks(toku_lock_tree* tree) {
return r; return r;
} }
static inline int toku__lt_do_escalation(toku_lock_tree* tree) { // run escalation algorithm on a given locktree
static int lt_do_escalation(toku_lock_tree* lt) {
invariant(lt);
int r = ENOSYS; int r = ENOSYS;
if (!tree->lock_escalation_allowed) { r = 0; goto cleanup; } DB* db; // extract db from lt
r = toku__lt_escalate_write_locks(tree); OMTVALUE dbv;
invariant(toku_omt_size(lt->dbs) > 0); // there is at least one db associated with this locktree
r = toku_omt_fetch(lt->dbs, 0, &dbv, NULL);
invariant(r==0);
db = dbv;
lt_set_comparison_functions(lt, db);
if (!lt->lock_escalation_allowed) { r = 0; goto cleanup; }
r = lt_escalate_write_locks(lt);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
rt_forest* forest; rt_forest* forest;
toku_rth_start_scan(tree->rth); toku_rth_start_scan(lt->rth);
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(lt->rth)) != NULL) {
if (forest->self_read) { if (forest->self_read) {
r = toku__lt_escalate_read_locks(tree, forest->hash_key); r = lt_escalate_read_locks(lt, forest->hash_key);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
} }
} }
r = 0; r = 0;
cleanup: cleanup:
lt_clear_comparison_functions(lt);
return r; return r;
} }
/* TODO: Different error code for escalation failed vs not even happened. */ // run escalation algorithm on all locktrees
#if 0 //See ticket #596 static int ltm_do_escalation(toku_ltm* mgr) {
static int toku__ltm_do_escalation(toku_ltm* mgr, BOOL* locks_available) { invariant(mgr);
assert(mgr && locks_available);
int r = ENOSYS; int r = ENOSYS;
toku_lock_tree* lt = NULL; toku_lock_tree* lt = NULL;
toku_lth_start_scan(mgr->lth); toku_lth_start_scan(mgr->lth); // initialize iterator in mgr
while ((lt = toku_lth_next(mgr->lth)) != NULL) { while ((lt = toku_lth_next(mgr->lth)) != NULL) {
r = toku__lt_do_escalation(lt); r = lt_do_escalation(lt);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
} }
*locks_available = toku__ltm_lock_test_incr(mgr, 0);
#warning 'if this code is ever made real, add accountability counters here'
r = 0; r = 0;
cleanup: cleanup:
return r; return r;
} }
#endif
static int toku__lt_do_escalation_per_db(toku_lock_tree* lt, DB* db, BOOL* locks_available) {
assert(lt && locks_available);
int r = ENOSYS;
toku__lt_set_comparison_functions(lt, db);
r = toku__lt_do_escalation(lt);
if (r!=0) { goto cleanup; }
*locks_available = toku__lt_lock_test_incr_per_db(lt, 0);
if (*locks_available)
lt->mgr->status.lock_escalation_successes++;
else
lt->mgr->status.lock_escalation_failures++;
r = 0;
cleanup:
toku__lt_clear_comparison_functions(lt);
return r;
}
int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
const DBT* key_left, const DBT* key_left,
const DBT* key_right) { const DBT* key_right) {
BOOL out_of_locks = FALSE;
int r = ENOSYS; int r = ENOSYS;
r = toku__lt_try_acquire_range_read_lock(tree, db, txn, r = lt_try_acquire_range_read_lock(tree, db, txn,
key_left, key_left, key_right);
key_right, if (r==TOKUDB_OUT_OF_LOCKS) {
&out_of_locks); r = ltm_do_escalation(tree->mgr);
if (r != 0) { goto cleanup; } if (r == 0) {
r = lt_try_acquire_range_read_lock(tree, db, txn,
if (out_of_locks) { key_left, key_right);
BOOL locks_available = FALSE; if (r==0) {
r = toku__lt_do_escalation_per_db(tree, db, &locks_available); tree->mgr->status.lock_escalation_successes++;
if (r != 0) { goto cleanup; } }
else if (r==TOKUDB_OUT_OF_LOCKS) {
if (!locks_available) { tree->mgr->status.lock_escalation_failures++;
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
r = toku__lt_try_acquire_range_read_lock(tree, db, txn,
key_left,
key_right,
&out_of_locks);
if (r != 0) { goto cleanup; }
} }
if (out_of_locks) {
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
r = 0;
cleanup:
if (tree) { if (tree) {
LTM_STATUS s = &(tree->mgr->status); LTM_STATUS s = &(tree->mgr->status);
if (r == 0) { if (r == 0) {
...@@ -1743,16 +1736,16 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, ...@@ -1743,16 +1736,16 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn,
(one in each selfread). (one in each selfread).
Does not support write ranges. Does not support write ranges.
*/ */
static int toku__lt_write_point_conflicts_reads(toku_lock_tree* tree, static int lt_write_point_conflicts_reads(toku_lock_tree* tree,
TXNID txn, toku_interval* query) { TXNID txn, toku_interval* query) {
int r = 0; int r = 0;
#if defined(TOKU_RT_NOOVERLAPS) #if defined(TOKU_RT_NOOVERLAPS)
r = toku__lt_write_range_conflicts_reads(tree, txn, query); r = lt_write_range_conflicts_reads(tree, txn, query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
#else #else
BOOL met = FALSE; BOOL met = FALSE;
toku_range_tree* mainread = tree->mainread; assert(mainread); toku_range_tree* mainread = tree->mainread; assert(mainread);
r = toku__lt_meets_peer(tree, query, mainread, FALSE, txn, &met); r = lt_meets_peer(tree, query, mainread, FALSE, txn, &met);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
if (met) { r = DB_LOCK_NOTGRANTED; goto cleanup; } if (met) { r = DB_LOCK_NOTGRANTED; goto cleanup; }
#endif #endif
...@@ -1761,21 +1754,20 @@ static int toku__lt_write_point_conflicts_reads(toku_lock_tree* tree, ...@@ -1761,21 +1754,20 @@ static int toku__lt_write_point_conflicts_reads(toku_lock_tree* tree,
return r; return r;
} }
static int toku__lt_try_acquire_write_lock(toku_lock_tree* tree, static int lt_try_acquire_write_lock(toku_lock_tree* tree,
DB* db, TXNID txn, DB* db, TXNID txn,
const DBT* key, const DBT* key) {
BOOL* out_of_locks) {
int r = ENOSYS; int r = ENOSYS;
toku_point endpoint; toku_point endpoint;
toku_interval query; toku_interval query;
BOOL dominated; BOOL dominated;
BOOL free_left = FALSE; BOOL free_left = FALSE;
r = toku__lt_preprocess(tree, db, txn, r = lt_preprocess(tree, db, txn,
key, key,
key, key,
&endpoint, &endpoint, &endpoint, &endpoint,
&query, out_of_locks); &query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
if (tree->table_is_locked) { if (tree->table_is_locked) {
...@@ -1783,17 +1775,17 @@ static int toku__lt_try_acquire_write_lock(toku_lock_tree* tree, ...@@ -1783,17 +1775,17 @@ static int toku__lt_try_acquire_write_lock(toku_lock_tree* tree,
goto cleanup; goto cleanup;
} }
/* if 'K' is dominated by selfwrite('txn') then return success. */ /* if 'K' is dominated by selfwrite('txn') then return success. */
r = toku__lt_rt_dominates(tree, &query, r = lt_rt_dominates(tree, &query,
toku__lt_ifexist_selfwrite(tree, txn), &dominated); toku_lt_ifexist_selfwrite(tree, txn), &dominated);
if (r || dominated) { goto cleanup; } if (r || dominated) { goto cleanup; }
/* else if K meets mainread at 'txn2' then return failure */ /* else if K meets mainread at 'txn2' then return failure */
r = toku__lt_write_point_conflicts_reads(tree, txn, &query); r = lt_write_point_conflicts_reads(tree, txn, &query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
/* /*
else if 'K' meets borderwrite at 'peer' ('peer'!='txn') && else if 'K' meets borderwrite at 'peer' ('peer'!='txn') &&
'K' meets selfwrite('peer') then return failure. 'K' meets selfwrite('peer') then return failure.
*/ */
r = toku__lt_check_borderwrite_conflict(tree, txn, &query); r = lt_check_borderwrite_conflict(tree, txn, &query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
/* Now need to copy the memory and insert. /* Now need to copy the memory and insert.
No merging required in selfwrite. No merging required in selfwrite.
...@@ -1801,93 +1793,89 @@ static int toku__lt_try_acquire_write_lock(toku_lock_tree* tree, ...@@ -1801,93 +1793,89 @@ static int toku__lt_try_acquire_write_lock(toku_lock_tree* tree,
dominated by selfwrite. dominated by selfwrite.
*/ */
toku_range to_insert; toku_range to_insert;
toku__init_insert(&to_insert, &endpoint, &endpoint, txn); init_insert(&to_insert, &endpoint, &endpoint, txn);
if (!toku__lt_lock_test_incr_per_db(tree, 0)) { if (!ltm_lock_test_incr(tree->mgr, 0)) {
*out_of_locks = TRUE; r = 0; goto cleanup; r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
BOOL dummy = TRUE; BOOL dummy = TRUE;
r = toku__lt_alloc_extreme(tree, &to_insert, TRUE, &dummy); r = lt_alloc_extreme(tree, &to_insert, TRUE, &dummy);
if (r!=0) { goto cleanup; } if (r!=0) {
goto cleanup;
}
toku_range_tree* selfwrite; toku_range_tree* selfwrite;
r = toku__lt_selfwrite(tree, txn, &selfwrite); r = lt_selfwrite(tree, txn, &selfwrite);
if (r!=0) { free_left = TRUE; goto cleanup; } if (r!=0) { free_left = TRUE; goto cleanup; }
assert(selfwrite); assert(selfwrite);
r = toku_rt_insert(selfwrite, &to_insert); r = toku_rt_insert(selfwrite, &to_insert);
if (r!=0) { free_left = TRUE; goto cleanup; } if (r!=0) { free_left = TRUE; goto cleanup; }
/* Need to update borderwrite. */ /* Need to update borderwrite. */
r = toku__lt_borderwrite_insert(tree, &query, &to_insert); r = lt_borderwrite_insert(tree, &query, &to_insert);
if (r!=0) { r = toku__lt_panic(tree, r); goto cleanup; } if (r!=0) { r = lt_panic(tree, r); goto cleanup; }
toku__lt_lock_incr_per_db(tree, 0); ltm_lock_incr(tree->mgr, 0);
r = 0; r = 0;
cleanup: cleanup:
if (r!=0) { if (r!=0) {
if (free_left) { if (free_left) {
toku__p_free(tree, to_insert.ends.left); p_free(tree, to_insert.ends.left);
} }
} }
if (tree) { toku__lt_postprocess(tree); } if (tree) { lt_postprocess(tree); }
return r; return r;
} }
// toku_lt_acquire_write_lock() used only by test programs // toku_lt_acquire_write_lock() used only by test programs
int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
const DBT* key) { const DBT* key) {
BOOL out_of_locks = FALSE;
int r = ENOSYS; int r = ENOSYS;
r = toku__lt_try_acquire_write_lock(tree, db, txn, r = lt_try_acquire_write_lock(tree, db, txn, key);
key, if (r==TOKUDB_OUT_OF_LOCKS) {
&out_of_locks); r = ltm_do_escalation(tree->mgr);
if (r != 0) { goto cleanup; } if (r == 0) {
r = lt_try_acquire_write_lock(tree, db, txn, key);
if (out_of_locks) { if (r==0) {
BOOL locks_available = FALSE; tree->mgr->status.lock_escalation_successes++;
r = toku__lt_do_escalation_per_db(tree, db, &locks_available); }
if (r != 0) { goto cleanup; } else if (r==TOKUDB_OUT_OF_LOCKS) {
tree->mgr->status.lock_escalation_failures++;
if (!locks_available) {
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
r = toku__lt_try_acquire_write_lock(tree, db, txn,
key,
&out_of_locks);
if (r != 0) { goto cleanup; }
} }
if (out_of_locks) {
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
r = 0; if (tree) {
cleanup: LTM_STATUS s = &(tree->mgr->status);
if (r == 0) {
s->write_lock++;
}
else {
s->write_lock_fail++;
if (r == TOKUDB_OUT_OF_LOCKS)
s->out_of_write_locks++;
}
}
return r; return r;
} }
static int toku__lt_try_acquire_range_write_lock(toku_lock_tree* tree, static int lt_try_acquire_range_write_lock(toku_lock_tree* tree,
DB* db, TXNID txn, DB* db, TXNID txn,
const DBT* key_left, const DBT* key_left,
const DBT* key_right, const DBT* key_right) {
BOOL* out_of_locks) {
int r; int r;
toku_point left; toku_point left;
toku_point right; toku_point right;
toku_interval query; toku_interval query;
if (key_left == key_right) { if (key_left == key_right) {
return toku__lt_try_acquire_write_lock(tree, db, txn, return lt_try_acquire_write_lock(tree, db, txn, key_left);
key_left,
out_of_locks);
} }
r = toku__lt_preprocess(tree, db, txn, r = lt_preprocess(tree, db, txn,
key_left, key_left, key_right,
key_right,
&left, &right, &left, &right,
&query, out_of_locks); &query);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
if (tree->table_is_locked) { if (tree->table_is_locked) {
...@@ -1903,52 +1891,38 @@ static int toku__lt_try_acquire_range_write_lock(toku_lock_tree* tree, ...@@ -1903,52 +1891,38 @@ static int toku__lt_try_acquire_range_write_lock(toku_lock_tree* tree,
} }
// Acquire table write lock. // Acquire table write lock.
//If there are any other writes, we fail. //If there are any other writes, we fail.
if ((r = toku__lt_check_borderwrite_conflict(tree, txn, &query))) goto cleanup; if ((r = lt_check_borderwrite_conflict(tree, txn, &query))) goto cleanup;
//If there are any other reads, we fail. //If there are any other reads, we fail.
if ((r = toku__lt_write_point_conflicts_reads(tree, txn, &query))) goto cleanup; if ((r = lt_write_point_conflicts_reads(tree, txn, &query))) goto cleanup;
if ((r = lt_global_lock(tree, txn))) goto cleanup; if ((r = lt_global_lock(tree, txn))) goto cleanup;
r = 0; r = 0;
cleanup: cleanup:
if (tree) { toku__lt_postprocess(tree); } if (tree) { lt_postprocess(tree); }
return r; return r;
} }
int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
const DBT* key_left, const DBT* key_left,
const DBT* key_right) { const DBT* key_right) {
BOOL out_of_locks = FALSE;
int r = ENOSYS; int r = ENOSYS;
r = toku__lt_try_acquire_range_write_lock(tree, db, txn, r = lt_try_acquire_range_write_lock(tree, db, txn,
key_left, key_left, key_right);
key_right, if (r==TOKUDB_OUT_OF_LOCKS) {
&out_of_locks); r = ltm_do_escalation(tree->mgr);
if (r != 0) { goto cleanup; } if (r == 0) {
r = lt_try_acquire_range_write_lock(tree, db, txn,
if (out_of_locks) { key_left, key_right);
BOOL locks_available = FALSE; if (r==0) {
r = toku__lt_do_escalation_per_db(tree, db, &locks_available); tree->mgr->status.lock_escalation_successes++;
if (r != 0) { goto cleanup; } }
else if (r==TOKUDB_OUT_OF_LOCKS) {
if (!locks_available) { tree->mgr->status.lock_escalation_failures++;
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
r = toku__lt_try_acquire_range_write_lock(tree, db, txn,
key_left,
key_right,
&out_of_locks);
if (r != 0) { goto cleanup; }
} }
if (out_of_locks) {
r = TOKUDB_OUT_OF_LOCKS;
goto cleanup;
} }
r = 0;
cleanup:
if (tree) { if (tree) {
LTM_STATUS s = &(tree->mgr->status); LTM_STATUS s = &(tree->mgr->status);
if (r == 0) { if (r == 0) {
...@@ -1963,18 +1937,18 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, ...@@ -1963,18 +1937,18 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
return r; return r;
} }
static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) { static inline int sweep_border(toku_lock_tree* tree, toku_range* range) {
assert(tree && range); assert(tree && range);
toku_range_tree* borderwrite = tree->borderwrite; toku_range_tree* borderwrite = tree->borderwrite;
assert(borderwrite); assert(borderwrite);
/* Find overlapping range in borderwrite */ /* Find overlapping range in borderwrite */
int r; int r;
const u_int32_t query_size = 1; const uint32_t query_size = 1;
toku_range buffer[query_size]; toku_range buffer[query_size];
u_int32_t buflen = query_size; uint32_t buflen = query_size;
toku_range* buf = &buffer[0]; toku_range* buf = &buffer[0];
u_int32_t numfound; uint32_t numfound;
toku_interval query = range->ends; toku_interval query = range->ends;
r = toku_rt_find(borderwrite, &query, query_size, &buf, &buflen, &numfound); r = toku_rt_find(borderwrite, &query, query_size, &buf, &buflen, &numfound);
...@@ -1983,7 +1957,7 @@ static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) { ...@@ -1983,7 +1957,7 @@ static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) {
/* If none exists or data is not ours (we have already deleted the real /* If none exists or data is not ours (we have already deleted the real
overlapping range), continue to the end of the loop (i.e., return) */ overlapping range), continue to the end of the loop (i.e., return) */
if (!numfound || toku__lt_txn_cmp(buf[0].data, range->data)) return 0; if (!numfound || lt_txn_cmp(buf[0].data, range->data)) return 0;
assert(numfound == 1); assert(numfound == 1);
/* Delete s from borderwrite */ /* Delete s from borderwrite */
...@@ -1996,16 +1970,16 @@ static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) { ...@@ -1996,16 +1970,16 @@ static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) {
BOOL found_p = FALSE; BOOL found_p = FALSE;
BOOL found_s = FALSE; BOOL found_s = FALSE;
r = toku__lt_get_border(tree, TRUE, &pred, &succ, &found_p, &found_s, r = lt_get_border(tree, TRUE, &pred, &succ, &found_p, &found_s,
&buf[0]); &buf[0]);
if (r!=0) return r; if (r!=0) return r;
if (found_p && found_s && !toku__lt_txn_cmp(pred.data, succ.data) && if (found_p && found_s && !lt_txn_cmp(pred.data, succ.data) &&
!toku__lt_txn_cmp(pred.data, buf[0].data)) { !lt_txn_cmp(pred.data, buf[0].data)) {
return toku__lt_panic(tree, TOKU_LT_INCONSISTENT); } return lt_panic(tree, TOKU_LT_INCONSISTENT); }
/* If both found and pred.data=succ.data, merge pred and succ (expand?) /* If both found and pred.data=succ.data, merge pred and succ (expand?)
free_points */ free_points */
if (!found_p || !found_s || toku__lt_txn_cmp(pred.data, succ.data)) return 0; if (!found_p || !found_s || lt_txn_cmp(pred.data, succ.data)) return 0;
r = toku_rt_delete(borderwrite, &pred); r = toku_rt_delete(borderwrite, &pred);
if (r!=0) return r; if (r!=0) return r;
...@@ -2030,7 +2004,7 @@ static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) { ...@@ -2030,7 +2004,7 @@ static inline int toku__sweep_border(toku_lock_tree* tree, toku_range* range) {
If both found and pred.data=succ.data, merge pred and succ (expand?) If both found and pred.data=succ.data, merge pred and succ (expand?)
free_points free_points
*/ */
static inline int toku__lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) { static inline int lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {
int r; int r;
assert(tree); assert(tree);
if (!rt) return 0; if (!rt) return 0;
...@@ -2039,23 +2013,23 @@ static inline int toku__lt_border_delete(toku_lock_tree* tree, toku_range_tree* ...@@ -2039,23 +2013,23 @@ static inline int toku__lt_border_delete(toku_lock_tree* tree, toku_range_tree*
toku_interval query; toku_interval query;
toku_point left; toku_point left;
toku_point right; toku_point right;
toku__lt_init_full_query(tree, &query, &left, &right); lt_init_full_query(tree, &query, &left, &right);
u_int32_t numfound; uint32_t numfound;
r = toku_rt_find(rt, &query, 0, &tree->buf, &tree->buflen, &numfound); r = toku_rt_find(rt, &query, 0, &tree->buf, &tree->buflen, &numfound);
if (r!=0) return r; if (r!=0) return r;
assert(numfound <= tree->buflen); assert(numfound <= tree->buflen);
u_int32_t i; uint32_t i;
for (i = 0; i < numfound; i++) { for (i = 0; i < numfound; i++) {
r = toku__sweep_border(tree, &tree->buf[i]); r = sweep_border(tree, &tree->buf[i]);
if (r!=0) return r; if (r!=0) return r;
} }
return 0; return 0;
} }
static inline int toku__lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid) { static inline int lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid) {
int r = ENOSYS; int r = ENOSYS;
rt_forest* forest = toku_rth_find(tree->txns_to_unlock, txnid); rt_forest* forest = toku_rth_find(tree->txns_to_unlock, txnid);
...@@ -2071,49 +2045,49 @@ static inline int toku__lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid ...@@ -2071,49 +2045,49 @@ static inline int toku__lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid
return r; return r;
} }
static inline int toku__lt_unlock_txn(toku_lock_tree* tree, TXNID txn) { static inline int lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
if (!tree) return EINVAL; if (!tree) return EINVAL;
int r; int r;
toku_range_tree *selfwrite = toku__lt_ifexist_selfwrite(tree, txn); toku_range_tree *selfwrite = toku_lt_ifexist_selfwrite(tree, txn);
toku_range_tree *selfread = toku__lt_ifexist_selfread (tree, txn); toku_range_tree *selfread = toku_lt_ifexist_selfread (tree, txn);
u_int32_t ranges = 0; uint32_t ranges = 0;
if (selfread) { if (selfread) {
u_int32_t size; uint32_t size;
r = toku_rt_get_size(selfread, &size); r = toku_rt_get_size(selfread, &size);
assert(r==0); assert(r==0);
ranges += size; ranges += size;
r = toku__lt_free_contents(tree, selfread, tree->mainread, TRUE); r = lt_free_contents(tree, selfread, tree->mainread, TRUE);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
} }
if (selfwrite) { if (selfwrite) {
u_int32_t size; uint32_t size;
r = toku_rt_get_size(selfwrite, &size); r = toku_rt_get_size(selfwrite, &size);
assert(r==0); assert(r==0);
ranges += size; ranges += size;
r = toku__lt_border_delete(tree, selfwrite); r = lt_border_delete(tree, selfwrite);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
r = toku__lt_free_contents(tree, selfwrite, NULL, TRUE); r = lt_free_contents(tree, selfwrite, NULL, TRUE);
if (r!=0) return toku__lt_panic(tree, r); if (r!=0) return lt_panic(tree, r);
} }
if (tree->table_lock_owner==txn) tree->table_is_locked = FALSE; if (tree->table_lock_owner==txn) tree->table_is_locked = FALSE;
if (selfread || selfwrite) toku_rth_delete(tree->rth, txn); if (selfread || selfwrite) toku_rth_delete(tree->rth, txn);
toku__lt_lock_decr_per_db(tree, ranges); ltm_lock_decr(tree->mgr, ranges);
return 0; return 0;
} }
static inline int toku__lt_unlock_deferred_txns(toku_lock_tree* tree) { static inline int lt_unlock_deferred_txns(toku_lock_tree* tree) {
int r = ENOSYS; int r = ENOSYS;
toku_rth_start_scan(tree->txns_to_unlock); toku_rth_start_scan(tree->txns_to_unlock);
rt_forest* forest = NULL; rt_forest* forest = NULL;
while ((forest = toku_rth_next(tree->txns_to_unlock)) != NULL) { while ((forest = toku_rth_next(tree->txns_to_unlock)) != NULL) {
/* This can only fail with a panic so it is fine to quit immediately. */ /* This can only fail with a panic so it is fine to quit immediately. */
r = toku__lt_unlock_txn(tree, forest->hash_key); r = lt_unlock_txn(tree, forest->hash_key);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
} }
toku_rth_clear(tree->txns_to_unlock); toku_rth_clear(tree->txns_to_unlock);
...@@ -2122,7 +2096,7 @@ static inline int toku__lt_unlock_deferred_txns(toku_lock_tree* tree) { ...@@ -2122,7 +2096,7 @@ static inline int toku__lt_unlock_deferred_txns(toku_lock_tree* tree) {
return r; return r;
} }
static inline void toku__lt_clear(toku_lock_tree* tree) { static inline void lt_clear(toku_lock_tree* tree) {
int r; int r;
assert(tree); assert(tree);
#if !defined(TOKU_RT_NOOVERLAPS) #if !defined(TOKU_RT_NOOVERLAPS)
...@@ -2132,21 +2106,21 @@ static inline void toku__lt_clear(toku_lock_tree* tree) { ...@@ -2132,21 +2106,21 @@ static inline void toku__lt_clear(toku_lock_tree* tree) {
toku_rth_start_scan(tree->rth); toku_rth_start_scan(tree->rth);
rt_forest* forest; rt_forest* forest;
u_int32_t ranges = 0; uint32_t ranges = 0;
while ((forest = toku_rth_next(tree->rth)) != NULL) { while ((forest = toku_rth_next(tree->rth)) != NULL) {
u_int32_t size; uint32_t size;
if (forest->self_read) { if (forest->self_read) {
r = toku_rt_get_size(forest->self_read, &size); r = toku_rt_get_size(forest->self_read, &size);
assert(r==0); assert(r==0);
ranges += size; ranges += size;
r = toku__lt_free_contents(tree, forest->self_read, NULL, TRUE); r = lt_free_contents(tree, forest->self_read, NULL, TRUE);
assert(r==0); assert(r==0);
} }
if (forest->self_write) { if (forest->self_write) {
r = toku_rt_get_size(forest->self_write, &size); r = toku_rt_get_size(forest->self_write, &size);
assert(r==0); assert(r==0);
ranges += size; ranges += size;
r = toku__lt_free_contents(tree, forest->self_write, NULL, TRUE); r = lt_free_contents(tree, forest->self_write, NULL, TRUE);
assert(r==0); assert(r==0);
} }
...@@ -2154,16 +2128,16 @@ static inline void toku__lt_clear(toku_lock_tree* tree) { ...@@ -2154,16 +2128,16 @@ static inline void toku__lt_clear(toku_lock_tree* tree) {
toku_rth_clear(tree->rth); toku_rth_clear(tree->rth);
toku_rth_clear(tree->txns_to_unlock); toku_rth_clear(tree->txns_to_unlock);
/* tree->txns_still_locked is already empty, so we do not clear it. */ /* tree->txns_still_locked is already empty, so we do not clear it. */
toku__lt_lock_decr_per_db(tree, ranges); ltm_lock_decr(tree->mgr, ranges);
tree->table_is_locked = FALSE; tree->table_is_locked = FALSE;
} }
int toku_lt_unlock(toku_lock_tree* tree, TXNID txn) { int toku_lt_unlock(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS; int r = ENOSYS;
if (!tree) { r = EINVAL; goto cleanup; } if (!tree) { r = EINVAL; goto cleanup; }
r = toku__lt_defer_unlocking_txn(tree, txn); r = lt_defer_unlocking_txn(tree, txn);
if (r!=0) { goto cleanup; } if (r!=0) { goto cleanup; }
if (toku_rth_is_empty(tree->txns_still_locked)) { toku__lt_clear(tree); } if (toku_rth_is_empty(tree->txns_still_locked)) { lt_clear(tree); }
r = 0; r = 0;
cleanup: cleanup:
return r; return r;
...@@ -2199,3 +2173,49 @@ int toku_lt_remove_ref(toku_lock_tree* tree) { ...@@ -2199,3 +2173,49 @@ int toku_lt_remove_ref(toku_lock_tree* tree) {
return r; return r;
} }
//Heaviside function to find a DB by DB (used to find the index) (just sort by pointer addr)
static int
find_db (OMTVALUE v, void *dbv) {
DB *db = v;
DB *dbfind = dbv;
if (db < dbfind) return -1;
if (db > dbfind) return +1;
return 0;
}
static void
lt_add_db(toku_lock_tree* tree, DB *db) {
if (db!=NULL) {
int r;
OMTVALUE get_dbv = NULL;
uint32_t index;
r = toku_omt_find_zero(tree->dbs, find_db, db, &get_dbv, &index, NULL);
invariant(r==DB_NOTFOUND);
r = toku_omt_insert_at(tree->dbs, db, index);
lazy_assert(r==0);
}
}
static void
lt_remove_db(toku_lock_tree* tree, DB *db) {
if (db!=NULL) {
int r;
OMTVALUE get_dbv = NULL;
uint32_t index;
r = toku_omt_find_zero(tree->dbs, find_db, db, &get_dbv, &index, NULL);
invariant(r==0);
invariant(db==get_dbv);
r = toku_omt_delete_at(tree->dbs, index);
invariant(r==0);
}
}
void
toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
int r;
lt_remove_db(tree, db);
r = toku_lt_remove_ref(tree);
assert(r==0);
}
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <lth.h> #include <lth.h>
#include <rth.h> #include <rth.h>
#include <idlth.h> #include <idlth.h>
#include <omt.h>
#include "toku_assert.h" #include "toku_assert.h"
...@@ -93,7 +94,7 @@ struct __toku_lock_tree { ...@@ -93,7 +94,7 @@ struct __toku_lock_tree {
the lt, we made copies from the DB at some point the lt, we made copies from the DB at some point
*/ */
toku_range* buf; toku_range* buf;
u_int32_t buflen; /**< The length of buf */ uint32_t buflen; /**< The length of buf */
/** Whether lock escalation is allowed. */ /** Whether lock escalation is allowed. */
BOOL lock_escalation_allowed; BOOL lock_escalation_allowed;
/** Lock tree manager */ /** Lock tree manager */
...@@ -110,16 +111,13 @@ struct __toku_lock_tree { ...@@ -110,16 +111,13 @@ struct __toku_lock_tree {
void (*free) (void*); void (*free) (void*);
/** The user realloc function */ /** The user realloc function */
void* (*realloc)(void*, size_t); void* (*realloc)(void*, size_t);
/** The maximum number of locks allowed for this lock tree. */
u_int32_t max_locks;
/** The current number of locks for this lock tree. */
u_int32_t curr_locks;
/** The number of references held by DB instances and transactions to this lock tree*/ /** The number of references held by DB instances and transactions to this lock tree*/
u_int32_t ref_count; uint32_t ref_count;
/** DICTIONARY_ID associated with the lock tree */ /** DICTIONARY_ID associated with the lock tree */
DICTIONARY_ID dict_id; DICTIONARY_ID dict_id;
TXNID table_lock_owner; TXNID table_lock_owner;
BOOL table_is_locked; BOOL table_is_locked;
OMT dbs; //The extant dbs using this lock tree.
}; };
...@@ -139,11 +137,13 @@ typedef struct ltm_status { ...@@ -139,11 +137,13 @@ typedef struct ltm_status {
struct __toku_ltm { struct __toku_ltm {
/** The maximum number of locks allowed for the environment. */ /** The maximum number of locks allowed for the environment. */
u_int32_t max_locks; uint32_t max_locks;
/** The current number of locks for the environment. */ /** The current number of locks for the environment. */
u_int32_t curr_locks; uint32_t curr_locks;
/** The maximum number of locks allowed for the db. */ /** The maximum amount of memory for locks allowed for the environment. */
u_int32_t max_locks_per_db; uint64_t max_lock_memory;
/** The current amount of memory for locks for the environment. */
uint64_t curr_lock_memory;
/** Status / accountability information */ /** Status / accountability information */
LTM_STATUS_S status; LTM_STATUS_S status;
/** The list of lock trees it manages. */ /** The list of lock trees it manages. */
...@@ -185,7 +185,7 @@ struct __toku_point { ...@@ -185,7 +185,7 @@ struct __toku_point {
toku_lock_tree* lt; /**< The lock tree, where toku_lt_point_cmp toku_lock_tree* lt; /**< The lock tree, where toku_lt_point_cmp
is defined */ is defined */
void* key_payload; /**< The key ... */ void* key_payload; /**< The key ... */
u_int32_t key_len; /**< and its length */ uint32_t key_len; /**< and its length */
}; };
#if !defined(__TOKU_POINT) #if !defined(__TOKU_POINT)
#define __TOKU_POINT #define __TOKU_POINT
...@@ -229,7 +229,7 @@ int toku_lt_create(toku_lock_tree** ptree, ...@@ -229,7 +229,7 @@ int toku_lt_create(toku_lock_tree** ptree,
Gets a lock tree for a given DB with id dict_id Gets a lock tree for a given DB with id dict_id
*/ */
int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
DICTIONARY_ID dict_id); DICTIONARY_ID dict_id, DB *db);
void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id); void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id);
...@@ -419,7 +419,8 @@ int toku_lt_unlock(toku_lock_tree* tree, TXNID txn); ...@@ -419,7 +419,8 @@ int toku_lt_unlock(toku_lock_tree* tree, TXNID txn);
- May return other errors due to system calls. - May return other errors due to system calls.
*/ */
int toku_ltm_create(toku_ltm** pmgr, int toku_ltm_create(toku_ltm** pmgr,
u_int32_t max_locks, uint32_t max_locks,
uint64_t max_lock_memory,
int (*panic)(DB*, int), int (*panic)(DB*, int),
toku_dbt_cmp (*get_compare_fun_from_db)(DB*), toku_dbt_cmp (*get_compare_fun_from_db)(DB*),
void* (*user_malloc) (size_t), void* (*user_malloc) (size_t),
...@@ -450,50 +451,29 @@ int toku_ltm_close(toku_ltm* mgr); ...@@ -450,50 +451,29 @@ int toku_ltm_close(toku_ltm* mgr);
- EDOM if max_locks is less than the number of locks held by any lock tree - EDOM if max_locks is less than the number of locks held by any lock tree
held by the manager held by the manager
*/ */
int toku_ltm_set_max_locks(toku_ltm* mgr, u_int32_t max_locks); int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t max_locks);
/** int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* max_lock_memory);
Sets the maximum number of locks for each lock tree.
This is a temporary function until we can complete ticket #596.
This will be used instead of toku_ltm_set_max_locks.
\param mgr The lock tree manager to which to set max_locks. int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t max_lock_memory);
\param max_locks The new maximum number of locks.
\return void toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
- 0 on success. uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
- EINVAL if tree is NULL or max_locks is 0 LTM_STATUS s);
- EDOM if max_locks is less than the number of locks held by any lock tree
held by the manager
*/
int toku_ltm_set_max_locks_per_db(toku_ltm* mgr, u_int32_t max_locks);
/** int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* max_locks);
Sets the maximum number of locks on the lock tree manager.
\param mgr The lock tree manager to which to set max_locks.
\param max_locks A buffer to return the number of max locks.
\return
- 0 on success.
- EINVAL if any parameter is NULL.
*/
void toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks, uint32_t * max_locks_per_db, LTM_STATUS s);
int toku_ltm_get_max_locks(toku_ltm* mgr, u_int32_t* max_locks);
int toku_ltm_get_max_locks_per_db(toku_ltm* mgr, u_int32_t* max_locks);
void toku_lt_add_ref(toku_lock_tree* tree); void toku_lt_add_ref(toku_lock_tree* tree);
int toku_lt_remove_ref(toku_lock_tree* tree); int toku_lt_remove_ref(toku_lock_tree* tree);
int toku__lt_point_cmp(const toku_point* x, const toku_point* y); void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db);
int toku_lt_point_cmp(const toku_point* x, const toku_point* y);
toku_range_tree* toku__lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn); toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);
toku_range_tree* toku__lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn); toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);
#if defined(__cplusplus) #if defined(__cplusplus)
} }
......
...@@ -16,9 +16,9 @@ ...@@ -16,9 +16,9 @@
#include <string.h> #include <string.h>
/* TODO: reallocate the hash lth if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */ /* TODO: reallocate the hash lth if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */
const u_int32_t __toku_lth_init_size = 521; const uint32_t __toku_lth_init_size = 521;
static inline u_int32_t toku__lth_hash(toku_lth* lth, toku_lock_tree* key) { static inline uint32_t toku__lth_hash(toku_lth* lth, toku_lock_tree* key) {
size_t tmp = (size_t)key; size_t tmp = (size_t)key;
return tmp % lth->num_buckets; return tmp % lth->num_buckets;
} }
...@@ -65,7 +65,7 @@ int toku_lth_create(toku_lth** plth, ...@@ -65,7 +65,7 @@ int toku_lth_create(toku_lth** plth,
toku_lock_tree* toku_lth_find(toku_lth* lth, toku_lock_tree* key) { toku_lock_tree* toku_lth_find(toku_lth* lth, toku_lock_tree* key) {
assert(lth && key); assert(lth && key);
u_int32_t index = toku__lth_hash(lth, key); uint32_t index = toku__lth_hash(lth, key);
toku_lth_elt* head = &lth->buckets[index]; toku_lth_elt* head = &lth->buckets[index];
toku_lth_elt* current = head->next_in_bucket; toku_lth_elt* current = head->next_in_bucket;
while (current) { while (current) {
...@@ -104,7 +104,7 @@ void toku_lth_delete(toku_lth* lth, toku_lock_tree* key) { ...@@ -104,7 +104,7 @@ void toku_lth_delete(toku_lth* lth, toku_lock_tree* key) {
/* Must have elements. */ /* Must have elements. */
assert(lth->num_keys); assert(lth->num_keys);
u_int32_t index = toku__lth_hash(lth, key); uint32_t index = toku__lth_hash(lth, key);
toku_lth_elt* head = &lth->buckets[index]; toku_lth_elt* head = &lth->buckets[index];
toku_lth_elt* prev = head; toku_lth_elt* prev = head;
toku_lth_elt* current = prev->next_in_bucket; toku_lth_elt* current = prev->next_in_bucket;
...@@ -130,7 +130,7 @@ int toku_lth_insert(toku_lth* lth, toku_lock_tree* key) { ...@@ -130,7 +130,7 @@ int toku_lth_insert(toku_lth* lth, toku_lock_tree* key) {
assert(lth && key); assert(lth && key);
toku__invalidate_scan(lth); toku__invalidate_scan(lth);
u_int32_t index = toku__lth_hash(lth, key); uint32_t index = toku__lth_hash(lth, key);
/* Allocate a new one. */ /* Allocate a new one. */
toku_lth_elt* element = (toku_lth_elt*)lth->malloc(sizeof(*element)); toku_lth_elt* element = (toku_lth_elt*)lth->malloc(sizeof(*element));
......
...@@ -47,8 +47,8 @@ typedef struct __toku_lth toku_lth; ...@@ -47,8 +47,8 @@ typedef struct __toku_lth toku_lth;
struct __toku_lth { struct __toku_lth {
toku_lth_elt* buckets; toku_lth_elt* buckets;
u_int32_t num_buckets; uint32_t num_buckets;
u_int32_t num_keys; uint32_t num_keys;
toku_lth_elt iter_head; toku_lth_elt iter_head;
toku_lth_elt* iter_curr; toku_lth_elt* iter_curr;
BOOL iter_is_valid; BOOL iter_is_valid;
......
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
#include <string.h> #include <string.h>
/* TODO: reallocate the hash rth if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */ /* TODO: reallocate the hash rth if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */
const u_int32_t __toku_rth_init_size = 521; const uint32_t __toku_rth_init_size = 521;
static inline u_int32_t toku__rth_hash(toku_rth* rth, TXNID key) { static inline uint32_t toku__rth_hash(toku_rth* rth, TXNID key) {
u_int64_t tmp = (u_int64_t)key; uint64_t tmp = (uint64_t)key;
return (u_int32_t)(tmp % rth->num_buckets); return (uint32_t)(tmp % rth->num_buckets);
} }
static inline void toku__invalidate_scan(toku_rth* rth) { static inline void toku__invalidate_scan(toku_rth* rth) {
...@@ -65,7 +65,7 @@ int toku_rth_create(toku_rth** prth, ...@@ -65,7 +65,7 @@ int toku_rth_create(toku_rth** prth,
rt_forest* toku_rth_find(toku_rth* rth, TXNID key) { rt_forest* toku_rth_find(toku_rth* rth, TXNID key) {
assert(rth); assert(rth);
u_int32_t index = toku__rth_hash(rth, key); uint32_t index = toku__rth_hash(rth, key);
toku_rth_elt* head = &rth->buckets[index]; toku_rth_elt* head = &rth->buckets[index];
toku_rth_elt* current = head->next_in_bucket; toku_rth_elt* current = head->next_in_bucket;
while (current) { while (current) {
...@@ -104,7 +104,7 @@ void toku_rth_delete(toku_rth* rth, TXNID key) { ...@@ -104,7 +104,7 @@ void toku_rth_delete(toku_rth* rth, TXNID key) {
/* Must have elements. */ /* Must have elements. */
assert(rth->num_keys); assert(rth->num_keys);
u_int32_t index = toku__rth_hash(rth, key); uint32_t index = toku__rth_hash(rth, key);
toku_rth_elt* head = &rth->buckets[index]; toku_rth_elt* head = &rth->buckets[index];
toku_rth_elt* prev = head; toku_rth_elt* prev = head;
toku_rth_elt* current = prev->next_in_bucket; toku_rth_elt* current = prev->next_in_bucket;
...@@ -130,7 +130,7 @@ int toku_rth_insert(toku_rth* rth, TXNID key) { ...@@ -130,7 +130,7 @@ int toku_rth_insert(toku_rth* rth, TXNID key) {
assert(rth); assert(rth);
toku__invalidate_scan(rth); toku__invalidate_scan(rth);
u_int32_t index = toku__rth_hash(rth, key); uint32_t index = toku__rth_hash(rth, key);
/* Allocate a new one. */ /* Allocate a new one. */
toku_rth_elt* element = (toku_rth_elt*)rth->malloc(sizeof(*element)); toku_rth_elt* element = (toku_rth_elt*)rth->malloc(sizeof(*element));
......
...@@ -23,8 +23,8 @@ extern "C" { ...@@ -23,8 +23,8 @@ extern "C" {
typedef struct __rt_forest rt_forest; typedef struct __rt_forest rt_forest;
struct __rt_forest { struct __rt_forest {
TXNID hash_key; TXNID hash_key;
toku_range_tree* self_read; toku_range_tree* self_read; //Set of range read locks held by txn 'hash_key'
toku_range_tree* self_write; toku_range_tree* self_write; //Set of range write locks held by txn 'hash_key'
}; };
typedef struct __toku_rth_elt toku_rth_elt; typedef struct __toku_rth_elt toku_rth_elt;
...@@ -38,8 +38,8 @@ struct __toku_rth_elt { ...@@ -38,8 +38,8 @@ struct __toku_rth_elt {
typedef struct __toku_rth toku_rth; typedef struct __toku_rth toku_rth;
struct __toku_rth { struct __toku_rth {
toku_rth_elt* buckets; toku_rth_elt* buckets;
u_int32_t num_buckets; uint32_t num_buckets;
u_int32_t num_keys; uint32_t num_keys;
toku_rth_elt iter_head; toku_rth_elt iter_head;
toku_rth_elt* iter_curr; toku_rth_elt* iter_curr;
BOOL iter_is_valid; BOOL iter_is_valid;
......
...@@ -33,16 +33,11 @@ static inline int dbcmp (DB *db __attribute__((__unused__)), const DBT *a, const ...@@ -33,16 +33,11 @@ static inline int dbcmp (DB *db __attribute__((__unused__)), const DBT *a, const
} }
toku_dbt_cmp compare_fun = dbcmp; toku_dbt_cmp compare_fun = dbcmp;
toku_dbt_cmp dup_compare = dbcmp;
static inline toku_dbt_cmp get_compare_fun_from_db(__attribute__((unused)) DB* db) { static inline toku_dbt_cmp get_compare_fun_from_db(__attribute__((unused)) DB* db) {
return compare_fun; return compare_fun;
} }
static inline toku_dbt_cmp get_dup_compare_from_db(__attribute__((unused)) DB* db) {
return dup_compare;
}
BOOL panicked = FALSE; BOOL panicked = FALSE;
static inline int dbpanic(DB* db, int r) { static inline int dbpanic(DB* db, int r) {
...@@ -78,17 +73,17 @@ static inline void parse_args (int argc, const char *argv[]) { ...@@ -78,17 +73,17 @@ static inline void parse_args (int argc, const char *argv[]) {
} }
// Simle LCG random number generator. Not high quality, but good enough. // Simle LCG random number generator. Not high quality, but good enough.
static u_int32_t rstate=1; static uint32_t rstate=1;
static inline void mysrandom (int s) { static inline void mysrandom (int s) {
rstate=s; rstate=s;
} }
static inline u_int32_t myrandom (void) { static inline uint32_t myrandom (void) {
rstate = (279470275ull*(u_int64_t)rstate)%4294967291ull; rstate = (279470275ull*(uint64_t)rstate)%4294967291ull;
return rstate; return rstate;
} }
static inline DBT *dbt_init(DBT *dbt, void *data, u_int32_t size) { static inline DBT *dbt_init(DBT *dbt, void *data, uint32_t size) {
memset(dbt, 0, sizeof *dbt); memset(dbt, 0, sizeof *dbt);
dbt->data = data; dbt->data = data;
dbt->size = size; dbt->size = size;
...@@ -98,12 +93,12 @@ static inline DBT *dbt_init(DBT *dbt, void *data, u_int32_t size) { ...@@ -98,12 +93,12 @@ static inline DBT *dbt_init(DBT *dbt, void *data, u_int32_t size) {
/** /**
A comparison function between toku_point's. A comparison function between toku_point's.
It is implemented as a wrapper of db compare and dup_compare functions, It is implemented as a wrapper of db compare functions,
but it checks whether the point is +/- infty. but it checks whether the point is +/- infty.
Parameters are of type toku_point. Parameters are of type toku_point.
Return values conform to cmp from qsort(3). Return values conform to cmp from qsort(3).
*/ */
// extern int toku__lt_point_cmp(void* a, void* b); // extern int toku_lt_point_cmp(void* a, void* b);
static inline void init_point(toku_point* point, toku_lock_tree* tree) { static inline void init_point(toku_point* point, toku_lock_tree* tree) {
assert(point && tree); assert(point && tree);
......
...@@ -4,9 +4,10 @@ int main(void) { ...@@ -4,9 +4,10 @@ int main(void) {
int r; int r;
toku_lock_tree* lt = NULL; toku_lock_tree* lt = NULL;
toku_ltm* mgr = NULL; toku_ltm* mgr = NULL;
u_int32_t max_locks = 1000; uint32_t max_locks = 1000;
uint64_t max_lock_memory = max_locks*64;
r = toku_ltm_create(&mgr, max_locks, dbpanic, r = toku_ltm_create(&mgr, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
......
...@@ -4,7 +4,10 @@ ...@@ -4,7 +4,10 @@
static DBT _key; static DBT _key;
DBT* key; DBT* key;
u_int32_t max_locks = 1000;
enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
toku_ltm* ltm = NULL; toku_ltm* ltm = NULL;
static void do_range_test(int (*acquire)(toku_lock_tree*, DB*, TXNID, static void do_range_test(int (*acquire)(toku_lock_tree*, DB*, TXNID,
...@@ -97,34 +100,39 @@ int main(int argc, const char *argv[]) { ...@@ -97,34 +100,39 @@ int main(int argc, const char *argv[]) {
int r; int r;
toku_lock_tree* lt = NULL; toku_lock_tree* lt = NULL;
r = toku_ltm_create(NULL, max_locks, dbpanic, r = toku_ltm_create(NULL, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL); CKERR2(r, EINVAL);
assert(ltm == NULL); assert(ltm == NULL);
r = toku_ltm_create(&ltm, 0, dbpanic, r = toku_ltm_create(&ltm, 0, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL); CKERR2(r, EINVAL);
assert(ltm == NULL); assert(ltm == NULL);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, 0, dbpanic,
get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
assert(ltm == NULL);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
NULL, toku_free, toku_realloc); NULL, toku_free, toku_realloc);
CKERR2(r, EINVAL); CKERR2(r, EINVAL);
assert(ltm == NULL); assert(ltm == NULL);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, NULL, toku_realloc); toku_malloc, NULL, toku_realloc);
CKERR2(r, EINVAL); CKERR2(r, EINVAL);
assert(ltm == NULL); assert(ltm == NULL);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, NULL); toku_malloc, toku_free, NULL);
CKERR2(r, EINVAL); CKERR2(r, EINVAL);
assert(ltm == NULL); assert(ltm == NULL);
/* Actually create it. */ /* Actually create it. */
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
...@@ -137,7 +145,7 @@ int main(int argc, const char *argv[]) { ...@@ -137,7 +145,7 @@ int main(int argc, const char *argv[]) {
r = toku_ltm_set_max_locks(ltm, max_locks); r = toku_ltm_set_max_locks(ltm, max_locks);
CKERR(r); CKERR(r);
u_int32_t get_max = 73; //Some random number that isn't 0. uint32_t get_max = 73; //Some random number that isn't 0.
r = toku_ltm_get_max_locks(NULL, &get_max); r = toku_ltm_get_max_locks(NULL, &get_max);
CKERR2(r, EINVAL); CKERR2(r, EINVAL);
assert(get_max == 73); assert(get_max == 73);
...@@ -148,6 +156,24 @@ int main(int argc, const char *argv[]) { ...@@ -148,6 +156,24 @@ int main(int argc, const char *argv[]) {
CKERR(r); CKERR(r);
assert(get_max == max_locks); assert(get_max == max_locks);
r = toku_ltm_set_max_lock_memory(NULL, max_lock_memory);
CKERR2(r, EINVAL);
r = toku_ltm_set_max_lock_memory(ltm, 0);
CKERR2(r, EINVAL);
r = toku_ltm_set_max_lock_memory(ltm, max_lock_memory);
CKERR(r);
uint64_t get_max_memory = 73; //Some random number that isn't 0.
r = toku_ltm_get_max_lock_memory(NULL, &get_max_memory);
CKERR2(r, EINVAL);
assert(get_max_memory == 73);
r = toku_ltm_get_max_lock_memory(ltm, NULL);
CKERR2(r, EINVAL);
assert(get_max_memory == 73);
r = toku_ltm_get_max_lock_memory(ltm, &get_max_memory);
CKERR(r);
assert(get_max_memory == max_lock_memory);
/* create tests. */ /* create tests. */
{ {
r = toku_lt_create(NULL, dbpanic, ltm, r = toku_lt_create(NULL, dbpanic, ltm,
......
...@@ -7,7 +7,9 @@ toku_lock_tree* lt = NULL; ...@@ -7,7 +7,9 @@ toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL; toku_ltm* ltm = NULL;
DB* db = (DB*)1; DB* db = (DB*)1;
TXNID txn = (TXNID)1; TXNID txn = (TXNID)1;
u_int32_t max_locks = 1000; enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
BOOL duplicates = FALSE; BOOL duplicates = FALSE;
int nums[100]; int nums[100];
...@@ -40,7 +42,7 @@ static void init_query(void) { ...@@ -40,7 +42,7 @@ static void init_query(void) {
static void setup_tree(void) { static void setup_tree(void) {
assert(!lt && !ltm); assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
...@@ -90,7 +92,7 @@ static void lt_insert(int key_l, int key_r) { ...@@ -90,7 +92,7 @@ static void lt_insert(int key_l, int key_r) {
CKERR(r); CKERR(r);
} }
static void setup_payload_len(void** payload, u_int32_t* len, int val) { static void setup_payload_len(void** payload, uint32_t* len, int val) {
assert(payload && len); assert(payload && len);
DBT temp; DBT temp;
...@@ -133,8 +135,8 @@ temporarily_fake_comparison_functions(); ...@@ -133,8 +135,8 @@ temporarily_fake_comparison_functions();
setup_payload_len(&right.key_payload, &right.key_len, key_r); setup_payload_len(&right.key_payload, &right.key_len, key_r);
unsigned i; unsigned i;
for (i = 0; i < numfound; i++) { for (i = 0; i < numfound; i++) {
if (toku__lt_point_cmp(buf[i].ends.left, &left ) == 0 && if (toku_lt_point_cmp(buf[i].ends.left, &left ) == 0 &&
toku__lt_point_cmp(buf[i].ends.right, &right) == 0 && toku_lt_point_cmp(buf[i].ends.right, &right) == 0 &&
buf[i].data == find_txn) { goto cleanup; } buf[i].data == find_txn) { goto cleanup; }
} }
assert(FALSE); //Crash since we didn't find it. assert(FALSE); //Crash since we didn't find it.
...@@ -193,7 +195,7 @@ static void runtest(void) { ...@@ -193,7 +195,7 @@ static void runtest(void) {
lt_insert(3, 7); lt_insert(3, 7);
lt_insert(4, 5); lt_insert(4, 5);
rt = toku__lt_ifexist_selfread(lt, txn); rt = toku_lt_ifexist_selfread(lt, txn);
assert(rt); assert(rt);
lt_find(rt, 1, lt_find(rt, 1,
...@@ -221,7 +223,7 @@ static void runtest(void) { ...@@ -221,7 +223,7 @@ static void runtest(void) {
lt_insert(4, 5); lt_insert(4, 5);
lt_insert(3, 7); lt_insert(3, 7);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, lt_find(rt, 1,
3, 3,
...@@ -243,7 +245,7 @@ static void runtest(void) { ...@@ -243,7 +245,7 @@ static void runtest(void) {
lt_insert(3, 3); lt_insert(3, 3);
lt_insert(4, 4); lt_insert(4, 4);
lt_insert(3, 3); lt_insert(3, 3);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 2, 3, 3, txn); lt_find(rt, 2, 3, 3, txn);
lt_find(rt, 2, 4, 4, txn); lt_find(rt, 2, 4, 4, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
...@@ -258,7 +260,7 @@ static void runtest(void) { ...@@ -258,7 +260,7 @@ static void runtest(void) {
for (i = 0; i < 20; i += 2) { for (i = 0; i < 20; i += 2) {
lt_insert(i, i + 1); lt_insert(i, i + 1);
} }
rt = toku__lt_ifexist_selfread(lt, txn); rt = toku_lt_ifexist_selfread(lt, txn);
assert(rt); assert(rt);
for (i = 0; i < 20; i += 2) { for (i = 0; i < 20; i += 2) {
lt_find(rt, 10, i, i + 1, txn); lt_find(rt, 10, i, i + 1, txn);
...@@ -270,7 +272,7 @@ static void runtest(void) { ...@@ -270,7 +272,7 @@ static void runtest(void) {
} }
#endif #endif
lt_insert(0, 20); lt_insert(0, 20);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find( rt, 1, 0, 20, txn); lt_find( rt, 1, 0, 20, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
...@@ -286,7 +288,7 @@ static void runtest(void) { ...@@ -286,7 +288,7 @@ static void runtest(void) {
lt_insert(4, 5); lt_insert(4, 5);
lt_insert(3, 4); lt_insert(3, 4);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 2, 0, 2, txn); lt_find(rt, 2, 0, 2, txn);
lt_find(rt, 2, 3, 5, txn); lt_find(rt, 2, 3, 5, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
...@@ -297,7 +299,7 @@ static void runtest(void) { ...@@ -297,7 +299,7 @@ static void runtest(void) {
lt_insert(2, 3); lt_insert(2, 3);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 0, 5, txn); lt_find(rt, 1, 0, 5, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
...@@ -310,7 +312,7 @@ static void runtest(void) { ...@@ -310,7 +312,7 @@ static void runtest(void) {
lt_insert(1, 3); lt_insert(1, 3);
lt_insert(4, 6); lt_insert(4, 6);
lt_insert(2, 5); lt_insert(2, 5);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, 6, txn); lt_find(rt, 1, 1, 6, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
...@@ -323,7 +325,7 @@ static void runtest(void) { ...@@ -323,7 +325,7 @@ static void runtest(void) {
lt_insert( 4, 5); lt_insert( 4, 5);
lt_insert( 6, 8); lt_insert( 6, 8);
lt_insert( 2, 7); lt_insert( 2, 7);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, neg_infinite, 8, txn); lt_find(rt, 1, neg_infinite, 8, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
...@@ -335,7 +337,7 @@ static void runtest(void) { ...@@ -335,7 +337,7 @@ static void runtest(void) {
lt_insert(1, 2); lt_insert(1, 2);
lt_insert(3, infinite); lt_insert(3, infinite);
lt_insert(2, 3); lt_insert(2, 3);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, infinite, txn); lt_find(rt, 1, 1, infinite, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
...@@ -348,7 +350,7 @@ static void runtest(void) { ...@@ -348,7 +350,7 @@ static void runtest(void) {
lt_insert(3, 4); lt_insert(3, 4);
lt_insert(5, 6); lt_insert(5, 6);
lt_insert(2, 5); lt_insert(2, 5);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, 6, txn); lt_find(rt, 1, 1, 6, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
...@@ -360,7 +362,7 @@ static void runtest(void) { ...@@ -360,7 +362,7 @@ static void runtest(void) {
lt_insert(1, 2); lt_insert(1, 2);
lt_insert(3, 5); lt_insert(3, 5);
lt_insert(2, 4); lt_insert(2, 4);
rt = toku__lt_ifexist_selfread(lt, txn); assert(rt); rt = toku_lt_ifexist_selfread(lt, txn); assert(rt);
lt_find(rt, 1, 1, 5, txn); lt_find(rt, 1, 1, 5, txn);
#ifndef TOKU_RT_NOOVERLAPS #ifndef TOKU_RT_NOOVERLAPS
rt = lt->mainread; assert(rt); rt = lt->mainread; assert(rt);
......
...@@ -6,7 +6,9 @@ int r; ...@@ -6,7 +6,9 @@ int r;
toku_lock_tree* lt = NULL; toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL; toku_ltm* ltm = NULL;
DB* db = (DB*)1; DB* db = (DB*)1;
u_int32_t max_locks = 1000; enum { MAX_LT_LOCKS = 1000 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[100]; int nums[100];
DBT _keys_left[2]; DBT _keys_left[2];
...@@ -34,7 +36,7 @@ static void init_query(void) { ...@@ -34,7 +36,7 @@ static void init_query(void) {
static void setup_tree(void) { static void setup_tree(void) {
assert(!lt && !ltm); assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
......
...@@ -6,7 +6,9 @@ int r; ...@@ -6,7 +6,9 @@ int r;
toku_lock_tree* lt = NULL; toku_lock_tree* lt = NULL;
toku_ltm* ltm = NULL; toku_ltm* ltm = NULL;
DB* db = (DB*)1; DB* db = (DB*)1;
u_int32_t max_locks = 10; enum { MAX_LT_LOCKS = 10 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
BOOL duplicates = FALSE; BOOL duplicates = FALSE;
int nums[10000]; int nums[10000];
...@@ -35,14 +37,15 @@ static void init_query(void) { ...@@ -35,14 +37,15 @@ static void init_query(void) {
static void setup_tree(void) { static void setup_tree(void) {
assert(!lt && !ltm); assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
assert(ltm); assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, //ask ltm for lock tree
get_compare_fun_from_db, DICTIONARY_ID dict_id = {0x1234};
toku_malloc, toku_free, toku_realloc); r = toku_ltm_get_lt(ltm, &lt, dict_id, db);
CKERR(r); CKERR(r);
assert(lt); assert(lt);
init_query(); init_query();
...@@ -50,8 +53,8 @@ static void setup_tree(void) { ...@@ -50,8 +53,8 @@ static void setup_tree(void) {
static void close_tree(void) { static void close_tree(void) {
assert(lt && ltm); assert(lt && ltm);
r = toku_lt_close(lt);
CKERR(r); toku_lt_remove_db_ref(lt, db);
r = toku_ltm_close(ltm); r = toku_ltm_close(ltm);
CKERR(r); CKERR(r);
lt = NULL; lt = NULL;
...@@ -354,7 +357,6 @@ static void init_test(void) { ...@@ -354,7 +357,6 @@ static void init_test(void) {
buflen = 64; buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range)); buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
compare_fun = intcmp; compare_fun = intcmp;
dup_compare = intcmp;
} }
static void close_test(void) { static void close_test(void) {
......
...@@ -10,12 +10,14 @@ int r; ...@@ -10,12 +10,14 @@ int r;
toku_lock_tree* lt [10] = {0}; toku_lock_tree* lt [10] = {0};
toku_ltm* ltm = NULL; toku_ltm* ltm = NULL;
DB* db = (DB*)1; DB* db = (DB*)1;
u_int32_t max_locks = 10; enum { MAX_LT_LOCKS = 10 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[10000]; int nums[10000];
static void setup_ltm(void) { static void setup_ltm(void) {
assert(!ltm); assert(!ltm);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
...@@ -24,7 +26,7 @@ static void setup_ltm(void) { ...@@ -24,7 +26,7 @@ static void setup_ltm(void) {
static void setup_tree(size_t index, DICTIONARY_ID dict_id) { static void setup_tree(size_t index, DICTIONARY_ID dict_id) {
assert(!lt[index] && ltm); assert(!lt[index] && ltm);
r = toku_ltm_get_lt(ltm, &lt[index], dict_id); r = toku_ltm_get_lt(ltm, &lt[index], dict_id, NULL);
CKERR(r); CKERR(r);
assert(lt[index]); assert(lt[index]);
} }
...@@ -34,7 +36,7 @@ static void close_ltm(void) { ...@@ -34,7 +36,7 @@ static void close_ltm(void) {
assert(ltm); assert(ltm);
r = toku_ltm_close(ltm); r = toku_ltm_close(ltm);
CKERR(r); CKERR(r);
u_int32_t i = 0; uint32_t i = 0;
for (i = 0; i < sizeof(lt)/sizeof(*lt); i++) { lt[i] = NULL; } for (i = 0; i < sizeof(lt)/sizeof(*lt); i++) { lt[i] = NULL; }
ltm = NULL; ltm = NULL;
} }
...@@ -68,7 +70,6 @@ static void run_test(void) { ...@@ -68,7 +70,6 @@ static void run_test(void) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
parse_args(argc, argv); parse_args(argc, argv);
compare_fun = intcmp; compare_fun = intcmp;
dup_compare = intcmp;
r = system("rm -rf " TESTDIR); r = system("rm -rf " TESTDIR);
CKERR(r); CKERR(r);
......
...@@ -9,16 +9,18 @@ ...@@ -9,16 +9,18 @@
static void initial_setup(void); static void initial_setup(void);
static int r; static int r;
static u_int32_t lt_refs[100]; static uint32_t lt_refs[100];
static toku_lock_tree* lts [100]; static toku_lock_tree* lts [100];
static toku_ltm* ltm = NULL; static toku_ltm* ltm = NULL;
static DICTIONARY_ID dict_ids[100]; static DICTIONARY_ID dict_ids[100];
static u_int32_t max_locks = 10; enum { MAX_LT_LOCKS = 10 };
uint32_t max_locks = MAX_LT_LOCKS;
uint64_t max_lock_memory = MAX_LT_LOCKS*64;
int nums[10000]; int nums[10000];
static void setup_ltm(void) { static void setup_ltm(void) {
assert(!ltm); assert(!ltm);
r = toku_ltm_create(&ltm, max_locks, dbpanic, r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db, get_compare_fun_from_db,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
CKERR(r); CKERR(r);
...@@ -30,7 +32,7 @@ static void db_open_tree(size_t index, size_t db_id_index) { ...@@ -30,7 +32,7 @@ static void db_open_tree(size_t index, size_t db_id_index) {
(lt_refs[index] > 0 && lts[index])); (lt_refs[index] > 0 && lts[index]));
assert(ltm); assert(ltm);
lt_refs[index]++; lt_refs[index]++;
r = toku_ltm_get_lt(ltm, &lts[index], dict_ids[db_id_index]); r = toku_ltm_get_lt(ltm, &lts[index], dict_ids[db_id_index], NULL);
CKERR(r); CKERR(r);
assert(lts[index]); assert(lts[index]);
} }
...@@ -112,7 +114,7 @@ static void run_test(void) { ...@@ -112,7 +114,7 @@ static void run_test(void) {
} }
static void initial_setup(void) { static void initial_setup(void) {
u_int32_t i; uint32_t i;
ltm = NULL; ltm = NULL;
assert(sizeof(dict_ids) / sizeof(dict_ids[0]) == sizeof(lts) / sizeof(lts[0])); assert(sizeof(dict_ids) / sizeof(dict_ids[0]) == sizeof(lts) / sizeof(lts[0]));
...@@ -127,7 +129,7 @@ static void initial_setup(void) { ...@@ -127,7 +129,7 @@ static void initial_setup(void) {
} }
static void close_test(void) { static void close_test(void) {
u_int32_t i; uint32_t i;
for (i = 0; i < sizeof(lts) / sizeof(lts[0]); i++) { for (i = 0; i < sizeof(lts) / sizeof(lts[0]); i++) {
assert(lt_refs[i]==0); //The internal reference isn't counted. assert(lt_refs[i]==0); //The internal reference isn't counted.
assert(dict_ids[i].dictid != DICTIONARY_ID_NONE.dictid); assert(dict_ids[i].dictid != DICTIONARY_ID_NONE.dictid);
...@@ -137,7 +139,6 @@ static void close_test(void) { ...@@ -137,7 +139,6 @@ static void close_test(void) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
parse_args(argc, argv); parse_args(argc, argv);
compare_fun = intcmp; compare_fun = intcmp;
dup_compare = intcmp;
r = system("rm -rf " TESTDIR); r = system("rm -rf " TESTDIR);
CKERR(r); CKERR(r);
......
...@@ -22,6 +22,8 @@ test_setup (void) { ...@@ -22,6 +22,8 @@ test_setup (void) {
r=db_env_create(&env, 0); CKERR(r); r=db_env_create(&env, 0); CKERR(r);
env->set_errfile(env, stderr); env->set_errfile(env, stderr);
multiply_locks_for_n_dbs(env, NFILES);
r=env->open(env, ENVDIR, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r=env->open(env, ENVDIR, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r=env->txn_begin(env, 0, &txn, 0); assert(r==0); r=env->txn_begin(env, 0, &txn, 0); assert(r==0);
...@@ -64,7 +66,7 @@ doit (void) { ...@@ -64,7 +66,7 @@ doit (void) {
dbt_init(&data, str, 1+strlen(str)); dbt_init(&data, str, 1+strlen(str));
for (i=0; i<NFILES; i++) { for (i=0; i<NFILES; i++) {
r = dbs[i]->put(dbs[i], txn, &key, &data, DB_YESOVERWRITE); r = dbs[i]->put(dbs[i], txn, &key, &data, DB_YESOVERWRITE);
assert(r==0); CKERR(r);
} }
} }
r=txn->commit(txn, 0); assert(r==0); r=txn->commit(txn, 0); assert(r==0);
......
...@@ -287,6 +287,23 @@ toku_hard_crash_on_purpose(void) { ...@@ -287,6 +287,23 @@ toku_hard_crash_on_purpose(void) {
fflush(stderr); fflush(stderr);
} }
static void UU()
multiply_locks_for_n_dbs(DB_ENV *env, int num_dbs) {
int r;
uint32_t current_max_locks;
r = env->get_lk_max_locks(env, &current_max_locks);
CKERR(r);
r = env->set_lk_max_locks(env, current_max_locks * num_dbs);
CKERR(r);
#if defined(USE_TDB)
uint64_t current_max_lock_memory;
r = env->get_lk_max_memory(env, &current_max_lock_memory);
CKERR(r);
r = env->set_lk_max_memory(env, current_max_lock_memory * num_dbs);
CKERR(r);
#endif
}
#if defined(__cilkplusplus) || defined(__cplusplus) #if defined(__cilkplusplus) || defined(__cplusplus)
} }
#endif #endif
......
...@@ -95,6 +95,7 @@ single_process_unlock(int *lockfd) { ...@@ -95,6 +95,7 @@ single_process_unlock(int *lockfd) {
/** The default maximum number of persistent locks in a lock tree */ /** The default maximum number of persistent locks in a lock tree */
const u_int32_t __toku_env_default_max_locks = 1000; const u_int32_t __toku_env_default_max_locks = 1000;
const uint64_t __toku_env_default_max_lock_memory = 1000*1024;
static inline DBT* static inline DBT*
init_dbt_realloc(DBT *dbt) { init_dbt_realloc(DBT *dbt) {
...@@ -1103,7 +1104,7 @@ static int toku_env_set_lk_max_locks(DB_ENV *dbenv, u_int32_t max) { ...@@ -1103,7 +1104,7 @@ static int toku_env_set_lk_max_locks(DB_ENV *dbenv, u_int32_t max) {
int r = ENOSYS; int r = ENOSYS;
HANDLE_PANICKED_ENV(dbenv); HANDLE_PANICKED_ENV(dbenv);
if (env_opened(dbenv)) { return EINVAL; } if (env_opened(dbenv)) { return EINVAL; }
r = toku_ltm_set_max_locks_per_db(dbenv->i->ltm, max); r = toku_ltm_set_max_locks(dbenv->i->ltm, max);
return r; return r;
} }
...@@ -1119,17 +1120,38 @@ static int locked_env_set_lk_max(DB_ENV * env, u_int32_t lk_max) { ...@@ -1119,17 +1120,38 @@ static int locked_env_set_lk_max(DB_ENV * env, u_int32_t lk_max) {
static int toku_env_get_lk_max_locks(DB_ENV *dbenv, u_int32_t *lk_maxp) { static int toku_env_get_lk_max_locks(DB_ENV *dbenv, u_int32_t *lk_maxp) {
HANDLE_PANICKED_ENV(dbenv); HANDLE_PANICKED_ENV(dbenv);
return toku_ltm_get_max_locks_per_db(dbenv->i->ltm, lk_maxp); return toku_ltm_get_max_locks(dbenv->i->ltm, lk_maxp);
} }
static int locked_env_set_lk_max_locks(DB_ENV *dbenv, u_int32_t max) { static int locked_env_set_lk_max_locks(DB_ENV *dbenv, u_int32_t max) {
toku_ydb_lock(); int r = toku_env_set_lk_max_locks(dbenv, max); toku_ydb_unlock(); return r; toku_ydb_lock(); int r = toku_env_set_lk_max_locks(dbenv, max); toku_ydb_unlock(); return r;
} }
static int __attribute__((unused)) locked_env_get_lk_max_locks(DB_ENV *dbenv, u_int32_t *lk_maxp) { static int locked_env_get_lk_max_locks(DB_ENV *dbenv, u_int32_t *lk_maxp) {
toku_ydb_lock(); int r = toku_env_get_lk_max_locks(dbenv, lk_maxp); toku_ydb_unlock(); return r; toku_ydb_lock(); int r = toku_env_get_lk_max_locks(dbenv, lk_maxp); toku_ydb_unlock(); return r;
} }
static int toku_env_set_lk_max_memory(DB_ENV *dbenv, uint64_t max) {
int r = ENOSYS;
HANDLE_PANICKED_ENV(dbenv);
if (env_opened(dbenv)) { return EINVAL; }
r = toku_ltm_set_max_lock_memory(dbenv->i->ltm, max);
return r;
}
static int toku_env_get_lk_max_memory(DB_ENV *dbenv, uint64_t *lk_maxp) {
HANDLE_PANICKED_ENV(dbenv);
return toku_ltm_get_max_lock_memory(dbenv->i->ltm, lk_maxp);
}
static int locked_env_set_lk_max_memory(DB_ENV *dbenv, uint64_t max) {
toku_ydb_lock(); int r = toku_env_set_lk_max_memory(dbenv, max); toku_ydb_unlock(); return r;
}
static int locked_env_get_lk_max_memory(DB_ENV *dbenv, uint64_t *lk_maxp) {
toku_ydb_lock(); int r = toku_env_get_lk_max_memory(dbenv, lk_maxp); toku_ydb_unlock(); return r;
}
//void toku__env_set_noticecall (DB_ENV *env, void (*noticecall)(DB_ENV *, db_notices)) { //void toku__env_set_noticecall (DB_ENV *env, void (*noticecall)(DB_ENV *, db_notices)) {
// env->i->noticecall = noticecall; // env->i->noticecall = noticecall;
//} //}
...@@ -1524,11 +1546,15 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) { ...@@ -1524,11 +1546,15 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
{ {
toku_ltm* ltm = env->i->ltm; toku_ltm* ltm = env->i->ltm;
LTM_STATUS_S ltmstat; LTM_STATUS_S ltmstat;
uint32_t max_locks, curr_locks, max_locks_per_db; uint32_t max_locks, curr_locks;
toku_ltm_get_status(ltm, &max_locks, &curr_locks, &max_locks_per_db, &ltmstat); uint64_t max_lock_memory, curr_lock_memory;
toku_ltm_get_status(ltm, &max_locks, &curr_locks,
&max_lock_memory, &curr_lock_memory,
&ltmstat);
engstat->range_locks_max = max_locks; engstat->range_locks_max = max_locks;
engstat->range_locks_max_per_index = max_locks_per_db;
engstat->range_locks_curr = curr_locks; engstat->range_locks_curr = curr_locks;
engstat->range_locks_max_memory = max_lock_memory;
engstat->range_locks_curr_memory = curr_lock_memory;
engstat->range_lock_escalation_successes = ltmstat.lock_escalation_successes; engstat->range_lock_escalation_successes = ltmstat.lock_escalation_successes;
engstat->range_lock_escalation_failures = ltmstat.lock_escalation_failures; engstat->range_lock_escalation_failures = ltmstat.lock_escalation_failures;
engstat->range_read_locks = ltmstat.read_lock; engstat->range_read_locks = ltmstat.read_lock;
...@@ -1657,8 +1683,9 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) { ...@@ -1657,8 +1683,9 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "local_checkpoint_files %"PRId64"\n", engstat.local_checkpoint_files); n += snprintf(buff + n, bufsiz - n, "local_checkpoint_files %"PRId64"\n", engstat.local_checkpoint_files);
n += snprintf(buff + n, bufsiz - n, "local_checkpoint_during_checkpoint %"PRId64"\n", engstat.local_checkpoint_during_checkpoint); n += snprintf(buff + n, bufsiz - n, "local_checkpoint_during_checkpoint %"PRId64"\n", engstat.local_checkpoint_during_checkpoint);
n += snprintf(buff + n, bufsiz - n, "range_locks_max %"PRIu32"\n", engstat.range_locks_max); n += snprintf(buff + n, bufsiz - n, "range_locks_max %"PRIu32"\n", engstat.range_locks_max);
n += snprintf(buff + n, bufsiz - n, "range_locks_max_per_index %"PRIu32"\n", engstat.range_locks_max_per_index);
n += snprintf(buff + n, bufsiz - n, "range_locks_curr %"PRIu32"\n", engstat.range_locks_curr); n += snprintf(buff + n, bufsiz - n, "range_locks_curr %"PRIu32"\n", engstat.range_locks_curr);
n += snprintf(buff + n, bufsiz - n, "range_locks_max_memory %"PRIu64"\n", engstat.range_locks_max_memory);
n += snprintf(buff + n, bufsiz - n, "range_locks_curr_memory %"PRIu64"\n", engstat.range_locks_curr_memory);
n += snprintf(buff + n, bufsiz - n, "range_locks_escalation_successes %"PRIu32"\n", engstat.range_lock_escalation_successes); n += snprintf(buff + n, bufsiz - n, "range_locks_escalation_successes %"PRIu32"\n", engstat.range_lock_escalation_successes);
n += snprintf(buff + n, bufsiz - n, "range_locks_escalation_failures %"PRIu32"\n", engstat.range_lock_escalation_failures); n += snprintf(buff + n, bufsiz - n, "range_locks_escalation_failures %"PRIu32"\n", engstat.range_lock_escalation_failures);
n += snprintf(buff + n, bufsiz - n, "range_read_locks %"PRIu64"\n", engstat.range_read_locks); n += snprintf(buff + n, bufsiz - n, "range_read_locks %"PRIu64"\n", engstat.range_read_locks);
...@@ -1756,6 +1783,8 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) { ...@@ -1756,6 +1783,8 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) {
SENV(get_lg_max); SENV(get_lg_max);
SENV(set_lk_max_locks); SENV(set_lk_max_locks);
SENV(get_lk_max_locks); SENV(get_lk_max_locks);
SENV(set_lk_max_memory);
SENV(get_lk_max_memory);
SENV(set_cachesize); SENV(set_cachesize);
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3 #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
SENV(get_cachesize); SENV(get_cachesize);
...@@ -1781,7 +1810,8 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) { ...@@ -1781,7 +1810,8 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) {
env_init_open_txn(result); env_init_open_txn(result);
env_fs_init(result); env_fs_init(result);
r = toku_ltm_create(&result->i->ltm, __toku_env_default_max_locks, r = toku_ltm_create(&result->i->ltm,
__toku_env_default_max_locks, __toku_env_default_max_lock_memory,
toku_db_lt_panic, toku_db_lt_panic,
toku_db_get_compare_fun, toku_db_get_compare_fun,
toku_malloc, toku_free, toku_realloc); toku_malloc, toku_free, toku_realloc);
...@@ -2176,11 +2206,7 @@ db_close_before_brt(DB *db, u_int32_t UU(flags)) { ...@@ -2176,11 +2206,7 @@ db_close_before_brt(DB *db, u_int32_t UU(flags)) {
assert(error_string==0); assert(error_string==0);
int r2 = 0; int r2 = 0;
if (db->i->lt) { if (db->i->lt) {
r2 = toku_lt_remove_ref(db->i->lt); toku_lt_remove_db_ref(db->i->lt, db);
if (r2) {
db->dbenv->i->is_panicked = r2; // Panicking the whole environment may be overkill, but I'm not sure what else to do.
db->dbenv->i->panic_string = 0;
}
} }
// printf("%s:%d %d=__toku_db_close(%p)\n", __FILE__, __LINE__, r, db); // printf("%s:%d %d=__toku_db_close(%p)\n", __FILE__, __LINE__, r, db);
// Even if panicked, let's close as much as we can. // Even if panicked, let's close as much as we can.
...@@ -3780,7 +3806,7 @@ db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, u_int32_t flags, ...@@ -3780,7 +3806,7 @@ db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, u_int32_t flags,
db->i->opened = 1; db->i->opened = 1;
if (need_locktree) { if (need_locktree) {
db->i->dict_id = toku_brt_get_dictionary_id(db->i->brt); db->i->dict_id = toku_brt_get_dictionary_id(db->i->brt);
r = toku_ltm_get_lt(db->dbenv->i->ltm, &db->i->lt, db->i->dict_id); r = toku_ltm_get_lt(db->dbenv->i->ltm, &db->i->lt, db->i->dict_id, db);
if (r!=0) { goto error_cleanup; } if (r!=0) { goto error_cleanup; }
} }
//Add to transaction's list of 'must close' if necessary. //Add to transaction's list of 'must close' if necessary.
...@@ -3796,7 +3822,7 @@ db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, u_int32_t flags, ...@@ -3796,7 +3822,7 @@ db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, u_int32_t flags,
db->i->dict_id = DICTIONARY_ID_NONE; db->i->dict_id = DICTIONARY_ID_NONE;
db->i->opened = 0; db->i->opened = 0;
if (db->i->lt) { if (db->i->lt) {
toku_lt_remove_ref(db->i->lt); toku_lt_remove_db_ref(db->i->lt, db);
db->i->lt = NULL; db->i->lt = NULL;
} }
return r; return r;
...@@ -4754,11 +4780,21 @@ char *db_strerror(int error) { ...@@ -4754,11 +4780,21 @@ char *db_strerror(int error) {
return errorstr; return errorstr;
} }
if (error==DB_BADFORMAT) { switch (error) {
case DB_BADFORMAT:
return "Database Bad Format (probably a corrupted database)"; return "Database Bad Format (probably a corrupted database)";
} case DB_NOTFOUND:
if (error==DB_NOTFOUND) {
return "Not found"; return "Not found";
case TOKUDB_OUT_OF_LOCKS:
return "Out of locks";
case TOKUDB_DICTIONARY_TOO_OLD:
return "Dictionary too old for this version of TokuDB";
case TOKUDB_DICTIONARY_TOO_NEW:
return "Dictionary too new for this version of TokuDB";
case TOKUDB_CANCELED:
return "User cancelled operation";
case TOKUDB_NO_DATA:
return "Ran out of data (not EOF)";
} }
static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string. static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment