Commit af54fc04 authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

refs #5513 fix hot column expansion code review issues on the mainline

git-svn-id: file:///svn/mysql/tokudb-engine/tokudb-engine@48080 c7de825b-a66e-492c-adef-691d508d4ae1
parent 49f4651c
......@@ -16,16 +16,16 @@
// later when the alter operation is executed.
class tokudb_alter_ctx : public inplace_alter_handler_ctx {
public:
tokudb_alter_ctx() {
handler_flags = 0;
alter_txn = NULL;
add_index_changed = false;
drop_index_changed = false;
compression_changed = false;
table_kc_info = NULL;
altered_table_kc_info = NULL;
expand_varchar_update_needed = false;
expand_fixed_update_needed = false;
tokudb_alter_ctx() :
handler_flags(0),
alter_txn(NULL),
add_index_changed(false),
drop_index_changed(false),
compression_changed(false),
expand_varchar_update_needed(false),
expand_fixed_update_needed(false),
table_kc_info(NULL),
altered_table_kc_info(NULL) {
}
~tokudb_alter_ctx() {
if (altered_table_kc_info)
......@@ -165,8 +165,7 @@ is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
}
// Return true if some bit in mask is set and no bit in ~mask is set, otherwise return false.
static bool
only_flags(ulong bits, ulong mask) {
static bool only_flags(ulong bits, ulong mask) {
return (bits & mask) != 0 && (bits & ~mask) == 0;
}
......@@ -266,9 +265,9 @@ ha_tokudb::check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else
// change varchar length
// change column length
if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH + Alter_inplace_info::ALTER_COLUMN_DEFAULT + Alter_inplace_info::ALTER_COLUMN_NAME)) {
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH + Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
find_changed_fields(table, altered_table, ha_alter_info, ctx->changed_fields);
ctx->table_kc_info = &share->kc_info;
......@@ -283,7 +282,7 @@ ha_tokudb::check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_
} else
// change column type
if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) &&
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_TYPE + Alter_inplace_info::ALTER_COLUMN_DEFAULT + Alter_inplace_info::ALTER_COLUMN_NAME)) {
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_TYPE + Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
find_changed_fields(table, altered_table, ha_alter_info, ctx->changed_fields);
ctx->table_kc_info = &share->kc_info;
ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
......@@ -669,13 +668,13 @@ ha_tokudb::alter_table_expand_varchar_offsets(TABLE *altered_table, Alter_inplac
if (error)
break;
// make and broadcast the update offsets message to all keys that have values
// for all trees that have values, make an update variable offsets message and broadcast it into the tree
if (i == primary_key || (table_share->key_info[i].flags & HA_CLUSTERING)) {
uint32_t offset_start = table_share->null_bytes + share->kc_info.mcp_info[i].fixed_field_size;
uint32_t offset_end = offset_start + share->kc_info.mcp_info[i].len_of_offsets;
uint32_t number_of_offsets = offset_end - offset_start;
// make the expand varchar offsets message
// make the expand variable offsets message
DBT expand; memset(&expand, 0, sizeof expand);
expand.size = sizeof (uchar) + sizeof offset_start + sizeof offset_end;
expand.data = my_malloc(expand.size, MYF(MY_WME));
......@@ -684,7 +683,7 @@ ha_tokudb::alter_table_expand_varchar_offsets(TABLE *altered_table, Alter_inplac
break;
}
uchar *expand_ptr = (uchar *)expand.data;
expand_ptr[0] = UPDATE_OP_EXPAND_VARCHAR_OFFSETS;
expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
expand_ptr += sizeof (uchar);
memcpy(expand_ptr, &number_of_offsets, sizeof number_of_offsets);
......@@ -717,7 +716,7 @@ field_in_key(KEY *key, Field *field) {
// Return true if a field is part of any key
static bool
field_in_key(TABLE *table, Field *field) {
field_in_key_of_table(TABLE *table, Field *field) {
for (uint i = 0; i < table->s->keys; i++) {
if (field_in_key(&table->key_info[i], field))
return true;
......@@ -728,14 +727,14 @@ field_in_key(TABLE *table, Field *field) {
// Return true if all changed varchar/varbinary field lengths can be changed inplace, otherwise return false
static bool
change_varchar_length_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
enum_field_types old_type = old_field->real_type();
enum_field_types new_type = new_field->real_type();
if (old_type != MYSQL_TYPE_VARCHAR || new_type != MYSQL_TYPE_VARCHAR || old_field->binary() != new_field->binary() || old_field->charset() != new_field->charset())
return false;
if (old_field->max_display_length() > new_field->max_display_length())
if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
new_field->real_type() != MYSQL_TYPE_VARCHAR ||
old_field->binary() != new_field->binary() ||
old_field->charset()->number != new_field->charset()->number ||
old_field->field_length > new_field->field_length)
return false;
if (ctx->table_kc_info->num_offset_bytes > ctx->altered_table_kc_info->num_offset_bytes)
return false; // something is wrong
return false; // shrink is not supported
if (ctx->table_kc_info->num_offset_bytes < ctx->altered_table_kc_info->num_offset_bytes)
ctx->expand_varchar_update_needed = true; // sum of varchar lengths changed from 1 to 2
return true;
......@@ -746,15 +745,20 @@ static bool
change_length_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
if (table->s->fields != altered_table->s->fields)
return false;
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
for (int ai = 0; ai < ctx->changed_fields.elements(); ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (field_in_key(table, old_field) || field_in_key(altered_table, new_field))
return false;
// varchar(X) -> varchar(Y)
if (old_field->real_type() != new_field->real_type())
return false; // no type conversions
if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
return false; // only varchar
if (field_in_key_of_table(table, old_field) || field_in_key_of_table(altered_table, new_field))
return false; // not in any key
if (!change_varchar_length_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
return false;
}
......@@ -790,8 +794,7 @@ ha_tokudb::alter_table_expand_columns(TABLE *altered_table, Alter_inplace_info *
// Return the starting offset in the value for a particular index (selected by idx) of a
// particular field (selected by expand_field_num)
static uint32_t
field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, int idx, int expand_field_num) {
static uint32_t field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, int idx, int expand_field_num) {
uint32_t offset = null_bytes;
for (int i = 0; i < expand_field_num; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
......@@ -802,24 +805,8 @@ field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, int idx, int expand
}
// Return true of the field is an unsigned int
static bool
is_unsigned(Field *f) {
switch (f->key_type()) {
case HA_KEYTYPE_BINARY:
case HA_KEYTYPE_USHORT_INT:
case HA_KEYTYPE_UINT24:
case HA_KEYTYPE_ULONG_INT:
case HA_KEYTYPE_ULONGLONG:
return true;
case HA_KEYTYPE_INT8:
case HA_KEYTYPE_SHORT_INT:
case HA_KEYTYPE_INT24:
case HA_KEYTYPE_LONG_INT:
case HA_KEYTYPE_LONGLONG:
return false;
default:
assert(0);
}
static bool is_unsigned(Field *f) {
return (f->flags & UNSIGNED_FLAG) != 0;
}
// Send an expand message into all clustered indexes including the primary
......@@ -835,6 +822,7 @@ ha_tokudb::alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_inf
assert(old_field_type == new_field_type);
uchar operation;
uchar pad_char;
switch (old_field_type) {
case toku_type_int:
assert(is_unsigned(old_field) == is_unsigned(new_field));
......@@ -845,9 +833,11 @@ ha_tokudb::alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_inf
break;
case toku_type_fixstring:
operation = UPDATE_OP_EXPAND_CHAR;
pad_char = old_field->charset()->pad_char;
break;
case toku_type_fixbinary:
operation = UPDATE_OP_EXPAND_BINARY;
pad_char = 0;
break;
default:
assert(0);
......@@ -865,10 +855,22 @@ ha_tokudb::alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_inf
if (error)
break;
// make and broadcast the expand update message to all keys that have values
// for all trees that have values, make an expand update message and broadcast it into the tree
if (i == primary_key || (table_share->key_info[i].flags & HA_CLUSTERING)) {
uint32_t old_offset = field_offset(table_share->null_bytes, ctx->table_kc_info, i, expand_field_num);
uint32_t new_offset = field_offset(table_share->null_bytes, ctx->altered_table_kc_info, i, expand_field_num);
assert(old_offset <= new_offset);
uint32_t old_length = ctx->table_kc_info->field_lengths[expand_field_num];
assert(old_length == old_field->pack_length());
uint32_t new_length = ctx->altered_table_kc_info->field_lengths[expand_field_num];
assert(new_length == new_field->pack_length());
DBT expand; memset(&expand, 0, sizeof expand);
expand.size = 1+4+4+4;
expand.size = sizeof operation + sizeof new_offset + sizeof old_length + sizeof new_length;
if (operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY)
expand.size += sizeof pad_char;
expand.data = my_malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
......@@ -876,27 +878,24 @@ ha_tokudb::alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_inf
}
uchar *expand_ptr = (uchar *)expand.data;
expand_ptr[0] = operation;
expand_ptr += sizeof (uchar);
uint32_t old_offset = field_offset(table_share->null_bytes, ctx->table_kc_info, i, expand_field_num);
uint32_t new_offset = field_offset(table_share->null_bytes, ctx->altered_table_kc_info, i, expand_field_num);
assert(old_offset <= new_offset);
expand_ptr += sizeof operation;
// for the first altered field, old_offset == new_offset. for the subsequent altered fields, the new_offset
// should be used as it includes the length changes from the previous altered fields.
memcpy(expand_ptr, &new_offset, sizeof new_offset);
expand_ptr += sizeof new_offset;
uint32_t old_length = ctx->table_kc_info->field_lengths[expand_field_num];
assert(old_length == old_field->pack_length());
memcpy(expand_ptr, &old_length, sizeof old_length);
expand_ptr += sizeof old_length;
uint32_t new_length = ctx->altered_table_kc_info->field_lengths[expand_field_num];
assert(new_length == new_field->pack_length());
memcpy(expand_ptr, &new_length, sizeof new_length);
expand_ptr += sizeof new_length;
if (operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY) {
memcpy(expand_ptr, &pad_char, sizeof pad_char);
expand_ptr += sizeof pad_char;
}
assert(expand_ptr == (uchar *)expand.data + expand.size);
// and broadcast it into the tree
......@@ -940,7 +939,7 @@ change_fixed_length_is_supported(TABLE *table, TABLE *altered_table, Field *old_
// Return true if two field types can be changed inplace
static bool
change_type_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
change_field_type_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
enum_field_types old_type = old_field->real_type();
enum_field_types new_type = new_field->real_type();
if (is_int_type(old_type)) {
......@@ -951,7 +950,9 @@ change_type_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE
return false;
} else if (old_type == MYSQL_TYPE_STRING) {
// char(X) -> char(Y) and binary(X) -> binary(Y) expansion
if (new_type == MYSQL_TYPE_STRING && old_field->binary() == new_field->binary() && old_field->charset() == new_field->charset())
if (new_type == MYSQL_TYPE_STRING &&
old_field->binary() == new_field->binary() &&
old_field->charset()->number == new_field->charset()->number)
return change_fixed_length_is_supported(table, altered_table, old_field, new_field, ctx);
else
return false;
......@@ -966,6 +967,8 @@ change_type_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE
// Return true if all changed field types can be changed inplace
static bool
change_type_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (table->s->fields != altered_table->s->fields)
return false;
if (ctx->changed_fields.elements() > 1)
......@@ -974,9 +977,9 @@ change_type_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (field_in_key(table, old_field) || field_in_key(altered_table, new_field))
if (field_in_key_of_table(table, old_field) || field_in_key_of_table(altered_table, new_field))
return false;
if (!change_type_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
if (!change_field_type_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
return false;
}
return true;
......
......@@ -4,7 +4,7 @@
// is expanded beyond 1 byte.
enum {
UPDATE_OP_COL_ADD_OR_DROP = 0,
UPDATE_OP_EXPAND_VARCHAR_OFFSETS = 1,
UPDATE_OP_EXPAND_VARIABLE_OFFSETS = 1,
UPDATE_OP_EXPAND_INT = 2,
UPDATE_OP_EXPAND_UINT = 3,
UPDATE_OP_EXPAND_CHAR = 4,
......@@ -61,7 +61,7 @@ enum {
// So, upperbound is num_blobs(1+4+1+4) = num_columns*10
// The expand varchar offsets message is used to expand the size of an offset from 1 to 2 bytes.
// operation 1 == UPDATE_OP_EXPAND_VARCHAR_OFFSETS
// operation 1 == UPDATE_OP_EXPAND_VARIABLE_OFFSETS
// n_offsets 4 number of offsets
// offset_start 4 starting offset of the variable length field offsets
......@@ -72,6 +72,12 @@ enum {
// old length 4 the old length of the field's value
// new length 4 the new length of the field's value
// operation 1 == UPDATE_OP_EXPAND_CHAR, UPDATE_OP_EXPAND_BINARY
// offset 4 starting offset of the field in the row's value
// old length 4 the old length of the field's value
// new length 4 the new length of the field's value
// pad char 1
// The int add and sub update messages are used to add or subtract a constant to or from an integer field.
// operation 1 == UPDATE_OP_INT_ADD, UPDATE_OP_INT_SUB, UPDATE_OP_UINT_ADD, UPDATE_OP_UINT_SUB
// offset 4 starting offset of the int type field
......@@ -617,10 +623,9 @@ cleanup:
return error;
}
// Decode the expand varchar offsets message and expand the varchar offsets array in the old
// val to a new val. Call the set_val callback with the new val.
// Expand the variable offset array in the old row given the update mesage in the extra.
static int
tokudb_expand_varchar_offsets(
tokudb_expand_variable_offsets(
DB* db,
const DBT *key,
const DBT *old_val,
......@@ -634,7 +639,7 @@ tokudb_expand_varchar_offsets(
// decode the operation
uchar operation = extra_pos[0];
assert(operation == UPDATE_OP_EXPAND_VARCHAR_OFFSETS);
assert(operation == UPDATE_OP_EXPAND_VARIABLE_OFFSETS);
extra_pos += sizeof operation;
// decode number of offsets
......@@ -700,10 +705,9 @@ cleanup:
return error;
}
// Given a description of a fixed length field and the old value of a row, build a new value for the row and
// update it in the fractal tree.
// Expand an int field in a old row given the expand message in the extra.
static int
tokudb_expand_field(
tokudb_expand_int_field(
DB* db,
const DBT *key,
const DBT *old_val,
......@@ -716,8 +720,7 @@ tokudb_expand_field(
uchar *extra_pos = (uchar *)extra->data;
uchar operation = extra_pos[0];
assert(operation == UPDATE_OP_EXPAND_INT || operation == UPDATE_OP_EXPAND_UINT ||
operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY);
assert(operation == UPDATE_OP_EXPAND_INT || operation == UPDATE_OP_EXPAND_UINT);
extra_pos += sizeof operation;
uint32_t the_offset;
......@@ -755,32 +758,109 @@ tokudb_expand_field(
new_val_ptr += the_offset;
old_val_ptr += the_offset;
// read the old field, expand it, write to the new offset
switch (operation) {
case UPDATE_OP_EXPAND_INT:
if (old_val_ptr[old_length-1] & 0x80) // if sign bit on then sign extend
memset(new_val_ptr, 0xff, new_length);
else
memset(new_val_ptr, 0, new_length);
memcpy(new_val_ptr, old_val_ptr, old_length);
// fill the entire new value with ones or zeros depending on the sign bit
// the encoding is little endian
memset(new_val_ptr, (old_val_ptr[old_length-1] & 0x80) ? 0xff : 0x00, new_length);
memcpy(new_val_ptr, old_val_ptr, old_length); // overlay the low bytes of the new value with the old value
new_val_ptr += new_length;
old_val_ptr += old_length;
break;
case UPDATE_OP_EXPAND_UINT:
memset(new_val_ptr, 0, new_length);
memcpy(new_val_ptr, old_val_ptr, old_length);
memset(new_val_ptr, 0, new_length); // fill the entire new value with zeros
memcpy(new_val_ptr, old_val_ptr, old_length); // overlay the low bytes of the new value with the old value
new_val_ptr += new_length;
old_val_ptr += old_length;
break;
default:
assert(0);
}
// copy the rest
size_t n = old_val->size - (old_val_ptr - (uchar *)old_val->data);
memcpy(new_val_ptr, old_val_ptr, n);
new_val_ptr += n;
old_val_ptr += n;
new_val.size = new_val_ptr - (uchar *)new_val.data;
assert(new_val_ptr == (uchar *)new_val.data + new_val.size);
assert(old_val_ptr == (uchar *)old_val->data + old_val->size);
// set the new val
set_val(&new_val, set_extra);
}
error = 0;
cleanup:
my_free(new_val.data, MYF(MY_ALLOW_ZERO_PTR));
return error;
}
// Expand a char field in a old row given the expand message in the extra.
static int
tokudb_expand_char_field(
DB* db,
const DBT *key,
const DBT *old_val,
const DBT *extra,
void (*set_val)(const DBT *new_val, void *set_extra),
void *set_extra
)
{
int error = 0;
uchar *extra_pos = (uchar *)extra->data;
uchar operation = extra_pos[0];
assert(operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY);
extra_pos += sizeof operation;
uint32_t the_offset;
memcpy(&the_offset, extra_pos, sizeof the_offset);
extra_pos += sizeof the_offset;
uint32_t old_length;
memcpy(&old_length, extra_pos, sizeof old_length);
extra_pos += sizeof old_length;
uint32_t new_length;
memcpy(&new_length, extra_pos, sizeof new_length);
extra_pos += sizeof new_length;
uchar pad_char = 0;
memcpy(&pad_char, extra_pos, sizeof pad_char);
extra_pos += sizeof pad_char;
assert(extra_pos == (uchar *)extra->data + extra->size); // consumed the entire message
assert(new_length >= old_length); // expand only
assert(the_offset + old_length <= old_val->size); // old field within the old val
DBT new_val; memset(&new_val, 0, sizeof new_val);
if (old_val != NULL) {
// compute the new val from the old val
uchar *old_val_ptr = (uchar *)old_val->data;
// allocate space for the new val's data
uchar *new_val_ptr = (uchar *)my_malloc(old_val->size + (new_length - old_length), MYF(MY_FAE));
if (!new_val_ptr) {
error = ENOMEM;
goto cleanup;
}
new_val.data = new_val_ptr;
// copy up to the old offset
memcpy(new_val_ptr, old_val_ptr, the_offset);
new_val_ptr += the_offset;
old_val_ptr += the_offset;
switch (operation) {
case UPDATE_OP_EXPAND_CHAR:
memset(new_val_ptr, ' ', new_length); // expand the field with the char pad
memcpy(new_val_ptr, old_val_ptr, old_length);
new_val_ptr += new_length;
old_val_ptr += old_length;
break;
case UPDATE_OP_EXPAND_BINARY:
memset(new_val_ptr, 0, new_length); // expand the field with the binary pad
memcpy(new_val_ptr, old_val_ptr, old_length);
memset(new_val_ptr, pad_char, new_length); // fill the entire new value with the pad char
memcpy(new_val_ptr, old_val_ptr, old_length); // overlay the low bytes of the new value with the old value
new_val_ptr += new_length;
old_val_ptr += old_length;
break;
......@@ -827,14 +907,16 @@ tokudb_update_fun(
case UPDATE_OP_COL_ADD_OR_DROP:
error = tokudb_hcad_update_fun(db, key, old_val, extra, set_val, set_extra);
break;
case UPDATE_OP_EXPAND_VARCHAR_OFFSETS:
error = tokudb_expand_varchar_offsets(db, key, old_val, extra, set_val, set_extra);
case UPDATE_OP_EXPAND_VARIABLE_OFFSETS:
error = tokudb_expand_variable_offsets(db, key, old_val, extra, set_val, set_extra);
break;
case UPDATE_OP_EXPAND_INT:
case UPDATE_OP_EXPAND_UINT:
error = tokudb_expand_int_field(db, key, old_val, extra, set_val, set_extra);
break;
case UPDATE_OP_EXPAND_CHAR:
case UPDATE_OP_EXPAND_BINARY:
error = tokudb_expand_field(db, key, old_val, extra, set_val, set_extra);
error = tokudb_expand_char_field(db, key, old_val, extra, set_val, set_extra);
break;
default:
error = EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment