Commit 6c60261a authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:3347], copy the handlerton over

git-svn-id: file:///svn/mysql/tokudb-engine/tokudb-engine@29148 c7de825b-a66e-492c-adef-691d508d4ae1
parent 5eba0d9d
This diff is collapsed.
......@@ -15,6 +15,7 @@
class ha_tokudb;
typedef struct loader_context {
THD* thd;
char write_status_msg[200];
......@@ -129,6 +130,14 @@ int generate_row_for_put(
const DBT *src_key,
const DBT *src_val
);
int tokudb_update_fun(
DB* db,
const DBT *key,
const DBT *old_val,
const DBT *extra,
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra
);
class ha_tokudb : public handler {
......@@ -221,7 +230,7 @@ class ha_tokudb : public handler {
// transaction used by ha_tokudb's cursor
//
DB_TXN *transaction;
bool is_fast_alter_running;
//
// instance of cursor being used for init_xxx and rnd_xxx functions
//
......@@ -363,6 +372,15 @@ class ha_tokudb : public handler {
int insert_row_to_main_dictionary(uchar* record, DBT* pk_key, DBT* pk_val, DB_TXN* txn);
int insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN* txn, THD* thd);
void test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val);
u_int32_t fill_row_mutator(
uchar* buf,
u_int32_t* dropped_columns,
u_int32_t num_dropped_columns,
TABLE* altered_table,
KEY_AND_COL_INFO* altered_kc_info,
u_int32_t keynr,
bool is_add
);
public:
......@@ -497,9 +515,50 @@ class ha_tokudb : public handler {
bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes);
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
int tokudb_add_index(
TABLE *table_arg,
KEY *key_info,
uint num_of_keys,
DB_TXN* txn,
bool* inc_num_DBs,
bool* modified_DB
);
void restore_add_index(TABLE* table_arg, uint num_of_keys, bool incremented_numDBs, bool modified_DBs);
int drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys, DB_TXN* txn);
int prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys);
void restore_drop_indexes(TABLE *table_arg, uint *key_num, uint num_of_keys);
int final_drop_index(TABLE *table_arg);
void print_alter_info(
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
uint table_changes
);
int check_if_supported_alter(TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_FLAGS *alter_flags,
uint table_changes
);
int alter_table_phase1(THD *thd,
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_INFO *alter_info,
HA_ALTER_FLAGS *alter_flags)
{
return 0;
}
int alter_table_phase2(THD *thd,
TABLE *altered_table,
HA_CREATE_INFO *create_info,
HA_ALTER_INFO *alter_info,
HA_ALTER_FLAGS *alter_flags);
int alter_table_phase3(THD *thd, TABLE *table)
{
return 0;
}
// delete all rows from the table
// effect: all dictionaries, including the main and indexes, should be empty
int discard_or_import_tablespace(my_bool discard);
......
......@@ -10,12 +10,12 @@ extern "C" {
#error "WORDS_BIGENDIAN not supported"
#endif
void get_var_field_info(
u_int32_t* field_len,
u_int32_t* start_offset,
u_int32_t var_field_index,
const uchar* var_field_offset_ptr,
u_int32_t num_offset_bytes
inline void get_var_field_info(
u_int32_t* field_len, // output: length of field
u_int32_t* start_offset, // output, length of offset where data starts
u_int32_t var_field_index, //input, index of var field we want info on
const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
u_int32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
)
{
u_int32_t data_start_offset = 0;
......@@ -2075,11 +2075,11 @@ u_int32_t pack_clustering_val_from_desc(
//
null_bytes_src_ptr = (uchar *)pk_val->data;
fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
var_src_offset_ptr = fixed_src_ptr + src_mcp_info.var_len_offset;
var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
fixed_dest_ptr = buf + num_null_bytes;
var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.var_len_offset;
var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
orig_var_dest_data_ptr = var_dest_data_ptr;
......@@ -2585,7 +2585,7 @@ u_int32_t pack_key_from_desc(
}
null_bytes_ptr = (uchar *)pk_val->data;
fixed_field_ptr = null_bytes_ptr + num_null_bytes;
var_field_offset_ptr = fixed_field_ptr + mcp_info.var_len_offset;
var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
while ( (u_int32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
uchar col_fix_val;
......@@ -2852,4 +2852,149 @@ u_int32_t pack_key_from_desc(
return (u_int32_t)(packed_key_pos - buf); //
}
bool fields_have_same_name(
Field* a,
Field* b
)
{
return strcmp(a->field_name, b->field_name) == 0;
}
bool are_two_fields_same(
Field* a,
Field* b
)
{
bool retval = true;
enum_field_types a_mysql_type = a->real_type();
enum_field_types b_mysql_type = b->real_type();
// make sure have same names
if (strcmp(a->field_name, b->field_name) != 0) {
retval = false;
goto cleanup;
}
// make sure have same types
if (a_mysql_type != b_mysql_type) {
retval = false;
goto cleanup;
}
// make sure that either both are nullable, or both not nullable
if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
retval = false;
goto cleanup;
}
switch (a_mysql_type) {
case MYSQL_TYPE_LONG:
case MYSQL_TYPE_LONGLONG:
case MYSQL_TYPE_TINY:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_DATE:
case MYSQL_TYPE_DATETIME:
case MYSQL_TYPE_YEAR:
case MYSQL_TYPE_NEWDATE:
case MYSQL_TYPE_TIME:
case MYSQL_TYPE_TIMESTAMP:
case MYSQL_TYPE_ENUM:
case MYSQL_TYPE_SET:
case MYSQL_TYPE_DOUBLE:
case MYSQL_TYPE_FLOAT:
case MYSQL_TYPE_NEWDECIMAL:
case MYSQL_TYPE_BIT:
{
TOKU_TYPE toku_type = mysql_to_toku_type(a);
if (toku_type == toku_type_int) {
if ( ((a->flags & UNSIGNED_FLAG) == 0) != ((b->flags & UNSIGNED_FLAG) == 0) ) {
retval = false;
goto cleanup;
}
}
if (a->pack_length() != b->pack_length()) {
retval = false;
goto cleanup;
}
}
break;
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_LONG_BLOB:
// test the charset
if (a->charset()->number != b->charset()->number) {
retval = false;
goto cleanup;
}
if (a->row_pack_length() != b->row_pack_length()) {
retval = false;
goto cleanup;
}
break;
case MYSQL_TYPE_STRING:
if (a->pack_length() != b->pack_length()) {
retval = false;
goto cleanup;
}
// if both are binary, we know have same pack lengths,
// so we can goto end
if (a->binary() && b->binary()) {
// nothing to do, we are good
}
else if (!a->binary() && !b->binary()) {
// test the charset
if (a->charset()->number != b->charset()->number) {
retval = false;
goto cleanup;
}
}
else {
// one is binary and the other is not, so not the same
retval = false;
goto cleanup;
}
break;
case MYSQL_TYPE_VARCHAR:
if (a->field_length != b->field_length) {
retval = false;
goto cleanup;
}
// if both are binary, we know have same pack lengths,
// so we can goto end
if (a->binary() && b->binary()) {
// nothing to do, we are good
}
else if (!a->binary() && !b->binary()) {
// test the charset
if (a->charset()->number != b->charset()->number) {
retval = false;
goto cleanup;
}
}
else {
// one is binary and the other is not, so not the same
retval = false;
goto cleanup;
}
break;
//
// I believe these are old types that are no longer
// in any 5.1 tables, so tokudb does not need
// to worry about them
// Putting in this assert in case I am wrong.
// Do not support geometry yet.
//
case MYSQL_TYPE_GEOMETRY:
case MYSQL_TYPE_DECIMAL:
case MYSQL_TYPE_VAR_STRING:
default:
assert(false);
}
cleanup:
return retval;
}
......@@ -10,24 +10,106 @@ extern "C" {
#include <db.h>
//
// A MySQL row is encoded in TokuDB, as follows:
// Keys:
// Keys pack the defined columns in the order that they are declared.
// The primary key contains only the columns listed
// If no primary key is defined, then an eight byte hidden primary key is autogenerated (like an auto increment) and used
// Secondary keys contains the defined key and the primary key.
// Two examples:
// 1) table foo (a int, b int, c int, d int, key(b))
// The key of the main dictionary contains an eight byte autogenerated hidden primary key
// The key of key-b is the column 'b' followed by the hidden primary key
// 2) table foo (a int, b int, c int, d int, primary key(a), key(b))
// The key of the main dictionary contains 'a'
// The key of key-b is the column 'b followed by 'a'
// Vals:
// For secondary keys they are empty.
// For the main dictionary and clustering keys, they contain all columns that do not show up in the dictionary's key
// Two examples:
// 1) table foo (a int, b int, c int, d varchar(100), primary key(a), clustering key d(d), clustering key d2(d(20))
// the val of the main dictionary contains (b,c,d)
// the val of d contains (b,c)
// the val of d2 contains (b,c,d). d is there because the entire row does not show up in the key
// Vals are encoded as follows. They have four components:
// 1) Null bytes: contains a bit field that states what columns are NULL.
// 2) Fixed fields: all fixed fields are then packed together. If a fixed field is NULL, its data is considered junk
// 3) varchars and varbinaries: stored in two pieces, first all the offsets and then all the data. If a var field is NULL, its data is considered junk
// 4) blobs: stored in (length, data) pairs. If a blob is NULL, its data is considered junk
// An example:
// Table: (a int, b varchar(20), c blob, d bigint, e varbinary(10), f largeblob, g varchar(10)) <-- no primary key defined
// Row inserted: (1, "bbb", "cc", 100, "eeeee", "ffff", "g")
// The packed format of the val looks like:
// NULL byte <-- 1 byte to encode nothing is NULL
// 1 <-- four bytes for 'a'
// 100 <-- four bytes for 'd'
// 3,8,9 <--offsets for location of data fields, note offsets point to where data ENDS
// "bbbeeeeeg" <-- data for variable length stuff
// 2,"cc",4,"ffff"<-- data that stores the blobs
// The structures below describe are used for the TokuDB encoding of a row
//
// used for queries
typedef struct st_col_pack_info {
u_int32_t col_pack_val; //offset if fixed, pack_index if var
} COL_PACK_INFO;
//
// used to define a couple of characteristics of a packed val for the main dictionary or a clustering dictionary
// fixed_field_size is the size of the fixed fields in the val.
// len_of_offsets is the size of the bytes that make up the offsets of variable size columns
// Some notes:
// If the val has no fixed fields, fixed_field_size is 0
// If the val has no variable fields, len_of_offsets is 0
// The number of null bytes at the beginning of a row is not saved, it is derived from table_share->null_bytes
// The pointer to where the variable data in a val starts is table_share->null_bytes + fixed_field_size + len_of_offsets
// To figure out where the blobs start, find the last offset listed (if offsets exist)
//
typedef struct st_multi_col_pack_info {
u_int32_t var_len_offset; //where the fixed length stuff ends and the offsets for var stuff begins
u_int32_t fixed_field_size; //where the fixed length stuff ends and the offsets for var stuff begins
u_int32_t len_of_offsets; //length of the offset bytes in a packed row
} MULTI_COL_PACK_INFO;
typedef struct st_key_and_col_info {
//
// bitmaps for each key. key_filters[i] is associated with the i'th dictionary
// States what columns are not stored in the vals of each key, because
// the column is stored in the key. So, for example, the table (a int, b int, c int, d int, primary key (b,d)) will
// have the second and fourth bit of the primary key's bitmap set for the main dictionary's bitmap,
// because 'b' and 'd' do not show up in the val
//
MY_BITMAP key_filters[MAX_KEY+1];
u_int16_t* field_lengths; //stores the field lengths of fixed size fields (1<<16 - 1 max)
//
// following three arrays are used to identify the types of rows in the field
// If table->field[i] is a fixed field:
// field_lengths[i] stores the field length, which is fixed
// length_bytes[i] is 0
// 'i' does not show up in the array blob_fields
// If table->field[i] is a varchar or varbinary:
// field_lengths[i] is 0
// length_bytes[i] stores the number of bytes MySQL uses to encode the length of the field in table->record[0]
// 'i' does not show up in the array blob_fields
// If table->field[i] is a blob:
// field_lengths[i] is 0
// length_bytes[i] is 0
// 'i' shows up in blob_fields
//
u_int16_t* field_lengths; //stores the field lengths of fixed size fields (1<<16 - 1 max),
uchar* length_bytes; // stores the length of lengths of varchars and varbinaries
u_int32_t* blob_fields; // list of indexes of blob fields
u_int32_t num_blobs;
u_int32_t* blob_fields; // list of indexes of blob fields,
u_int32_t num_blobs; // number of blobs in the table
//
// val packing info for all dictionaries. i'th one represents info for i'th dictionary
//
MULTI_COL_PACK_INFO mcp_info[MAX_KEY+1];
COL_PACK_INFO* cp_info[MAX_KEY+1];
//
// number bytes used to represent an offset in a val. Can be 1 or 2.
// The number of var fields in a val for dictionary i can be evaluated by
// mcp_info[i].len_of_offsets/num_offset_bytes.
//
u_int32_t num_offset_bytes; //number of bytes needed to encode the offset
} KEY_AND_COL_INFO;
......@@ -279,6 +361,15 @@ u_int32_t pack_key_from_desc(
const DBT* pk_val
);
bool fields_have_same_name(
Field* a,
Field* b
);
bool are_two_fields_same(
Field* a,
Field* b
);
#endif
......@@ -37,6 +37,7 @@ extern ulong tokudb_debug;
#define TOKUDB_DEBUG_LOCKRETRY 512
#define TOKUDB_DEBUG_CHECK_KEY 1024
#define TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS 2048
#define TOKUDB_DEBUG_ALTER_TABLE_INFO 4096
#define TOKUDB_TRACE(f, ...) \
printf("%d:%s:%d:" f, my_tid(), __FILE__, __LINE__, ##__VA_ARGS__);
......
......@@ -97,6 +97,13 @@ static MYSQL_THDVAR_BOOL(load_save_space,
NULL,
FALSE
);
static MYSQL_THDVAR_BOOL(disable_slow_alter,
0,
"if on, alter tables that require copy are disabled",
NULL,
NULL,
FALSE
);
static MYSQL_THDVAR_BOOL(create_index_online,
0,
"if on, create index done online",
......@@ -386,6 +393,7 @@ static int tokudb_init_func(void *p) {
assert(!r);
r = db_env->set_generate_row_callback_for_del(db_env,generate_row_for_del);
assert(!r);
db_env->set_update(db_env, tokudb_update_fun);
r = db_env->open(db_env, tokudb_home, tokudb_init_flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
......@@ -560,6 +568,10 @@ bool get_load_save_space(THD* thd) {
return (THDVAR(thd, load_save_space) != 0);
}
bool get_disable_slow_alter(THD* thd) {
return (THDVAR(thd, disable_slow_alter) != 0);
}
bool get_create_index_online(THD* thd) {
return (THDVAR(thd, create_index_online) != 0);
}
......@@ -1056,6 +1068,14 @@ static bool tokudb_show_engine_status(THD * thd, stat_print_fn * stat_print) {
STATPRINT("dictionary updates", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.updates_fail);
STATPRINT("dictionary updates fail", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.updates_broadcast);
STATPRINT("dictionary broadcast updates", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.updates_broadcast_fail);
STATPRINT("dictionary broadcast updates fail", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.le_updates);
STATPRINT("leafentry updates", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.le_updates_broadcast);
STATPRINT("leafentry broadcast updates", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.multi_inserts);
STATPRINT("dictionary inserts multi", buf);
snprintf(buf, bufsiz, "%" PRIu64, engstat.multi_inserts_fail);
......@@ -1380,7 +1400,7 @@ void tokudb_cleanup_log_files(void) {
static uint tokudb_alter_table_flags(uint flags)
{
return (HA_ONLINE_ADD_INDEX_NO_WRITES| HA_ONLINE_DROP_INDEX_NO_WRITES |
HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES| HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES);
HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES| HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES|HA_GENERAL_ONLINE);
}
......@@ -1429,6 +1449,7 @@ static struct st_mysql_sys_var *tokudb_system_variables[] = {
MYSQL_SYSVAR(read_lock_wait),
MYSQL_SYSVAR(pk_insert_mode),
MYSQL_SYSVAR(load_save_space),
MYSQL_SYSVAR(disable_slow_alter),
MYSQL_SYSVAR(create_index_online),
MYSQL_SYSVAR(version),
MYSQL_SYSVAR(init_flags),
......
......@@ -15,6 +15,7 @@ ulonglong get_write_lock_wait_time (THD* thd);
ulonglong get_read_lock_wait_time (THD* thd);
uint get_pk_insert_mode(THD* thd);
bool get_load_save_space(THD* thd);
bool get_disable_slow_alter(THD* thd);
bool get_create_index_online(THD* thd);
bool get_prelock_empty(THD* thd);
uint get_tokudb_block_size(THD* thd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment