Commit 4c4950f3 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:3117], merge fractal tree piece to main

git-svn-id: file:///svn/toku/tokudb@26451 c7de825b-a66e-492c-adef-691d508d4ae1
parent ea0f0ef0
......@@ -251,6 +251,7 @@ typedef enum {
#define DB_RMW 1073741824
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4
......@@ -406,6 +407,7 @@ struct __toku_db {
void *app_private; /* 32-bit offset=16 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=20 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......@@ -417,7 +419,7 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
void* __toku_dummy0[17];
void* __toku_dummy0[16];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=236 size=4, 64=bit offset=376 size=8 */
void* __toku_dummy2[5];
......
......@@ -252,6 +252,7 @@ typedef enum {
#define DB_RMW 536870912
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4
......@@ -416,6 +417,7 @@ struct __toku_db {
void *app_private; /* 32-bit offset=16 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=20 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......@@ -427,7 +429,7 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
void* __toku_dummy0[20];
void* __toku_dummy0[19];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=248 size=4, 64=bit offset=400 size=8 */
void* __toku_dummy2[5];
......
......@@ -252,6 +252,7 @@ typedef enum {
#define DB_RMW 536870912
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4
......@@ -418,6 +419,7 @@ struct __toku_db {
void *app_private; /* 32-bit offset=16 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=20 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......@@ -429,7 +431,7 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
void* __toku_dummy0[22];
void* __toku_dummy0[21];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=256 size=4, 64=bit offset=416 size=8 */
void* __toku_dummy2[5];
......
......@@ -252,6 +252,7 @@ typedef enum {
#define DB_RMW 1073741824
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 128
#define DB_DBT_MALLOC 4
......@@ -418,6 +419,7 @@ struct __toku_db {
void *app_private; /* 32-bit offset=16 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=20 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......@@ -429,7 +431,7 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
void* __toku_dummy0[25];
void* __toku_dummy0[24];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=268 size=4, 64=bit offset=440 size=8 */
void* __toku_dummy2[5];
......
......@@ -252,6 +252,7 @@ typedef enum {
#define DB_RMW 1073741824
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8
......@@ -421,6 +422,7 @@ struct __toku_db {
DB_ENV *dbenv; /* 32-bit offset=24 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_table_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......@@ -432,7 +434,7 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
void* __toku_dummy1[29];
void* __toku_dummy1[28];
char __toku_dummy2[80];
void *api_internal; /* 32-bit offset=276 size=4, 64=bit offset=464 size=8 */
void* __toku_dummy3[5];
......
......@@ -156,6 +156,7 @@ static void print_defines (void) {
dodefine(DB_RMW);
printf("#define DB_PRELOCKED 0x00800000\n"); // private tokudb
printf("#define DB_PRELOCKED_WRITE 0x00400000\n"); // private tokudb
printf("#define DB_PRELOCKED_FILE_READ 0x00200000\n"); // private tokudb
{
//dbt flags
......@@ -667,6 +668,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
"int (*stat64)(DB *, DB_TXN *, DB_BTREE_STAT64 *)",
"int (*pre_acquire_table_lock)(DB*, DB_TXN*)",
"int (*pre_acquire_fileops_lock)(DB*, DB_TXN*)",
"int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*)",
"const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/",
"const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/",
"int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */",
......
......@@ -252,6 +252,7 @@ typedef enum {
#define DB_RMW 1073741824
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8
......@@ -390,6 +391,7 @@ struct __toku_db {
DB_ENV *dbenv;
int (*pre_acquire_table_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......
......@@ -252,6 +252,7 @@ typedef enum {
#define DB_RMW 1073741824
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8
......@@ -390,6 +391,7 @@ struct __toku_db {
DB_ENV *dbenv;
int (*pre_acquire_table_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_shared_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
......
......@@ -39,6 +39,10 @@ static void verify_shared_ops_fail(DB_ENV* env, DB* db) {
dbt_init(&key, "a", 4);
dbt_init(&val, "a", 4);
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
r = db->pre_acquire_fileops_shared_lock(db, txn); CKERR2(r,DB_LOCK_NOTGRANTED);
r = txn->commit(txn,0); CKERR(r);
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
r = db->cursor(db, txn, &c, 0); CKERR2(r,DB_LOCK_NOTGRANTED);
r = txn->commit(txn,0); CKERR(r);
......@@ -207,6 +211,14 @@ int test_main (int argc, char * const argv[]) {
r = txna->commit(txna, 0); CKERR(r);
r = env->txn_begin(env, NULL, &txna, 0); CKERR(r);
r = env->txn_begin(env, NULL, &txnb, 0); CKERR(r);
r = db->pre_acquire_fileops_shared_lock(db, txna); CKERR(r);
r = db->pre_acquire_fileops_shared_lock(db, txnb); CKERR(r);
verify_excl_ops_fail(env,db);
r = txna->abort(txna); CKERR(r);
r = txnb->abort(txnb); CKERR(r);
r = env->txn_begin(env, NULL, &txna, 0); CKERR(r);
r = env->txn_begin(env, NULL, &txnb, 0); CKERR(r);
r = db->cursor(db, txna, &c1, 0); CKERR(r);
......
......@@ -2980,13 +2980,13 @@ typedef struct {
static inline u_int32_t
get_prelocked_flags(u_int32_t flags) {
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE);
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE | DB_PRELOCKED_FILE_READ);
return lock_flags;
}
static inline u_int32_t
get_cursor_prelocked_flags(u_int32_t flags, DBC* dbc) {
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE);
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE | DB_PRELOCKED_FILE_READ);
//DB_READ_UNCOMMITTED and DB_READ_COMMITTED transactions 'own' all read locks for user-data dictionaries.
if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE)
......@@ -3876,12 +3876,13 @@ toku_db_del(DB *db, DB_TXN *txn, DBT *key, u_int32_t flags) {
u_int32_t lock_flags = get_prelocked_flags(flags);
unchecked_flags &= ~lock_flags;
BOOL do_locking = (BOOL)(db->i->lt && !(lock_flags&DB_PRELOCKED_WRITE));
BOOL do_dir_locking = !(lock_flags&DB_PRELOCKED_FILE_READ);
int r = 0;
if (unchecked_flags!=0)
r = EINVAL;
if (r == 0) {
if (r == 0 && do_dir_locking) {
r = toku_grab_read_lock_on_directory(db, txn);
}
if (r == 0 && error_if_missing) {
......@@ -4039,9 +4040,10 @@ env_del_multiple(
}
//Do locking if necessary.
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
if (!(lock_flags[which_db] & DB_PRELOCKED_FILE_READ)) {
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
}
if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE)) {
//Needs locking
r = get_point_lock(db, txn, &del_keys[which_db]);
......@@ -4199,7 +4201,7 @@ toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, u_int32_t flags) {
if ((db->i->open_flags & DB_THREAD) && db_thread_need_flags(data))
return EINVAL;
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE);
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE | DB_PRELOCKED_FILE_READ);
flags &= ~lock_flags;
flags &= ~DB_ISOLATION_FLAGS;
// And DB_GET_BOTH is no longer supported. #2862.
......@@ -4598,13 +4600,15 @@ static int
toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, u_int32_t flags) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
int r;
int r = 0;
u_int32_t lock_flags = get_prelocked_flags(flags);
flags &= ~lock_flags;
r = toku_grab_read_lock_on_directory(db, txn);
if (!(lock_flags & DB_PRELOCKED_FILE_READ)) {
r = toku_grab_read_lock_on_directory(db, txn);
}
if (r == 0)
r = db_put_check_size_constraints(db, key, val);
if (r == 0) {
......@@ -4751,9 +4755,10 @@ env_put_multiple(
}
//Do locking if necessary.
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
if (!(lock_flags[which_db] & DB_PRELOCKED_FILE_READ)) {
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
}
if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE)) {
//Needs locking
r = get_point_lock(db, txn, &put_keys[which_db]);
......@@ -4857,9 +4862,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
toku_dbt_cmp cmpfun = toku_db_get_compare_fun(db);
BOOL key_eq = cmpfun(db, &curr_old_key, &curr_new_key) == 0;
if (!key_eq) {
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
if (!(lock_flags[which_db] & DB_PRELOCKED_FILE_READ)) {
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
}
//Check overwrite constraints only in the case where
// the keys are not equal.
// If the keys are equal, then we do not care of the flag is DB_NOOVERWRITE or DB_YESOVERWRITE
......@@ -4892,9 +4898,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
r = db_put_check_size_constraints(db, &curr_new_key, &curr_new_val);
if (r != 0) goto cleanup;
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
if (!(lock_flags[which_db] & DB_PRELOCKED_FILE_READ)) {
r = toku_grab_read_lock_on_directory(db, txn);
if (r != 0) goto cleanup;
}
// lock new key
if (db->i->lt) {
r = get_point_lock(db, txn, &curr_new_key);
......@@ -5489,6 +5496,13 @@ static int locked_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn) {
return r;
}
static int locked_db_pre_acquire_fileops_shared_lock(DB *db, DB_TXN *txn) {
toku_ydb_lock();
int r = toku_grab_read_lock_on_directory(db, txn);
toku_ydb_unlock();
return r;
}
// truncate a database
// effect: remove all of the rows from a database
static int
......@@ -5774,6 +5788,7 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
SDB(fd);
SDB(pre_acquire_table_lock);
SDB(pre_acquire_fileops_lock);
SDB(pre_acquire_fileops_shared_lock);
SDB(truncate);
SDB(row_size_supported);
SDB(getf_set);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment