Commit ba8070f8 authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

refs #5728 merge varchar fast updates to mainline

git-svn-id: file:///svn/mysql/tokudb-engine/tokudb-engine@51917 c7de825b-a66e-492c-adef-691d508d4ae1
parent 15ef9684
......@@ -115,16 +115,31 @@ static Field *find_field_by_name(TABLE *table, Item *item) {
// Return the starting offset in the value for a particular index (selected by idx) of a
// particular field (selected by expand_field_num).
// This only works for fixed length fields
static uint32_t update_field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, int idx, int expand_field_num) {
static uint32_t fixed_field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, uint idx, uint expand_field_num) {
uint32_t offset = null_bytes;
for (int i = 0; i < expand_field_num; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
for (uint i = 0; i < expand_field_num; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i))
continue;
offset += kc_info->field_lengths[i];
}
return offset;
}
static uint32_t var_field_index(TABLE *table, KEY_AND_COL_INFO *kc_info, uint idx, uint field_num) {
assert(field_num < table->s->fields);
uint v_index = 0;
for (uint i = 0; i < table->s->fields; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i))
continue;
if (kc_info->length_bytes[i]) {
if (i == field_num)
break;
v_index++;
}
}
return v_index;
}
// Determine if an update operation can be offloaded to the storage engine.
// The update operation consists of a list of update expressions (fields[i] = values[i]), and a list
// of where conditions (conds). The function returns 0 if the update is handled in the storage engine.
......@@ -287,6 +302,10 @@ static bool check_simple_update_expression(Item *lhs_item, Item *rhs_item, TABLE
if (rhs_type == Item::INT_ITEM || rhs_type == Item::STRING_ITEM)
return true;
break;
case MYSQL_TYPE_VARCHAR:
if (rhs_type == Item::STRING_ITEM)
return true;
break;
default:
break;
}
......@@ -427,7 +446,7 @@ static bool is_strict_mode(THD *thd) {
// Check if an update operation can be handled by this storage engine. Return true if it can.
bool ha_tokudb::check_fast_update(THD *thd, List<Item> &fields, List<Item> &values, Item *conds) {
// fast upserts disabled
// fast updates disabled
if (!get_enable_fast_update(thd))
return false;
......@@ -450,10 +469,6 @@ bool ha_tokudb::check_fast_update(THD *thd, List<Item> &fields, List<Item> &valu
if (clustering_keys_exist(table))
return false;
// fast updates enabled with session variable
if (!get_enable_fast_update(thd))
return false;
if (!check_all_update_expressions(fields, values, table))
return false;
......@@ -465,11 +480,11 @@ bool ha_tokudb::check_fast_update(THD *thd, List<Item> &fields, List<Item> &valu
// Marshall a simple row descriptor to a buffer.
static void marshall_simple_descriptor(tokudb::buffer &b, TABLE *table, KEY_AND_COL_INFO &kc_info, uint key_num) {
Simple_row_descriptor sd;
tokudb::simple_row_descriptor sd;
sd.m_fixed_field_offset = table->s->null_bytes;
sd.m_var_field_offset = sd.m_fixed_field_offset + kc_info.mcp_info[key_num].fixed_field_size;
sd.m_var_offset_bytes = kc_info.mcp_info[key_num].len_of_offsets;
sd.m_num_var_fields = sd.m_var_offset_bytes == 0 ? 0 : kc_info.mcp_info[key_num].len_of_offsets / sd.m_var_offset_bytes;
sd.m_var_offset_bytes = kc_info.mcp_info[key_num].len_of_offsets; // total length of the var offsets
sd.m_bytes_per_offset = sd.m_var_offset_bytes == 0 ? 0 : kc_info.num_offset_bytes; // bytes per var offset
sd.append(b);
}
......@@ -483,11 +498,12 @@ static void marshall_simple_update(tokudb::buffer &b, Item *lhs_item, Item *rhs_
// compute the update info
uint32_t field_type;
uint32_t field_num = lhs_field->field_index;
uint32_t field_null_num = 0;
if (lhs_field->real_maybe_null())
if (lhs_field->real_maybe_null()) {
uint32_t field_num = lhs_field->field_index;
field_null_num = (1<<31) + (field_num/8)*8 + get_null_bit_position(lhs_field->null_bit);
uint32_t offset = update_field_offset(table->s->null_bytes, &share->kc_info, table->s->primary_key, lhs_field->field_index);
}
uint32_t offset;
void *v_ptr = NULL;
uint32_t v_length;
uint32_t update_operation;
......@@ -502,6 +518,7 @@ static void marshall_simple_update(tokudb::buffer &b, Item *lhs_item, Item *rhs_
case MYSQL_TYPE_LONGLONG: {
Field_num *lhs_num = static_cast<Field_num*>(lhs_field);
field_type = lhs_num->unsigned_flag ? UPDATE_TYPE_UINT : UPDATE_TYPE_INT;
offset = fixed_field_offset(table->s->null_bytes, &share->kc_info, table->s->primary_key, lhs_field->field_index);
switch (rhs_item->type()) {
case Item::INT_ITEM: {
update_operation = '=';
......@@ -535,7 +552,8 @@ static void marshall_simple_update(tokudb::buffer &b, Item *lhs_item, Item *rhs_
case MYSQL_TYPE_STRING: {
update_operation = '=';
field_type = lhs_field->binary() ? UPDATE_TYPE_BINARY : UPDATE_TYPE_CHAR;
field_type = lhs_field->binary() ? UPDATE_TYPE_BINARY : UPDATE_TYPE_CHAR;
offset = fixed_field_offset(table->s->null_bytes, &share->kc_info, table->s->primary_key, lhs_field->field_index);
v_str = *rhs_item->val_str(&v_str);
v_length = v_str.length();
if (v_length >= lhs_field->pack_length()) {
......@@ -549,6 +567,20 @@ static void marshall_simple_update(tokudb::buffer &b, Item *lhs_item, Item *rhs_
v_ptr = v_str.c_ptr();
break;
}
case MYSQL_TYPE_VARCHAR: {
update_operation = '=';
field_type = lhs_field->binary() ? UPDATE_TYPE_VARBINARY : UPDATE_TYPE_VARCHAR;
offset = var_field_index(table, &share->kc_info, table->s->primary_key, lhs_field->field_index);
v_str = *rhs_item->val_str(&v_str);
v_length = v_str.length();
if (v_length >= lhs_field->row_pack_length()) {
v_length = lhs_field->row_pack_length();
v_str.length(v_length); // truncate
}
v_ptr = v_str.c_ptr();
break;
}
default:
assert(0);
}
......@@ -556,7 +588,8 @@ static void marshall_simple_update(tokudb::buffer &b, Item *lhs_item, Item *rhs_
// marshall the update fields into the buffer
b.append(&update_operation, sizeof update_operation);
b.append(&field_type, sizeof field_type);
b.append(&field_num, sizeof field_num);
uint32_t unused = 0;
b.append(&unused, sizeof unused);
b.append(&field_null_num, sizeof field_null_num);
b.append(&offset, sizeof offset);
b.append(&v_length, sizeof v_length);
......
......@@ -24,6 +24,8 @@ enum {
UPDATE_TYPE_BINARY = 4,
UPDATE_TYPE_VARCHAR = 5,
UPDATE_TYPE_VARBINARY = 6,
UPDATE_TYPE_TEXT = 7,
UPDATE_TYPE_BLOB = 8,
};
#define UP_COL_ADD_OR_DROP UPDATE_OP_COL_ADD_OR_DROP
......@@ -97,14 +99,14 @@ enum {
// fixed field offset 4 offset of the beginning of the fixed fields
// var field offset 4 offset of the variable length offsets
// var_offset_bytes 1 size of each variable length offset
// num_var_fields 4 number of variable length offsets
// bytes_per_offset 4 number of bytes per offset
// Field descriptor:
// field type 4 see field types above
// field num 4 unused for fixed length fields
// unused 4 unused
// field null num 4 bit 31 is 1 if the field is nullible and the remaining bits contain the null bit number
// field offset 4 for fixed fields, this is the offset from begining of the row of the field
// field length 4 for fixed fields, this is the length of the field
// for variable length fields, this is the index of the variable length field in the dictionary
// Simple update operation:
// update operation 4 == { '=', '+', '-' }
......@@ -112,7 +114,7 @@ enum {
// x = x + k
// x = x - k
// field descriptor
// optional value:
// value:
// value length 4 == N, length of the value
// value N value to add or subtract
......@@ -917,42 +919,149 @@ static int tokudb_expand_char_field(
return error;
}
class Simple_row_descriptor {
// Update a fixed field: new_val@offset = extra_val
static void set_fixed_field(uint32_t the_offset, uint32_t length, uint32_t field_null_num,
tokudb::buffer &new_val, void *extra_val) {
assert(the_offset + length <= new_val.size());
new_val.replace(the_offset, length, extra_val, length);
if (field_null_num)
set_overall_null_position((uchar *) new_val.data(), field_null_num & ~(1<<31), false);
}
namespace tokudb {
class simple_row_descriptor {
public:
Simple_row_descriptor() : m_fixed_field_offset(0), m_var_field_offset(0), m_var_offset_bytes(0), m_num_var_fields(0) {
simple_row_descriptor() : m_fixed_field_offset(0), m_var_field_offset(0), m_var_offset_bytes(0), m_bytes_per_offset(0) {
}
~Simple_row_descriptor() {
~simple_row_descriptor() {
}
void consume(tokudb::buffer &b) {
b.consume(&m_fixed_field_offset, sizeof m_fixed_field_offset);
b.consume(&m_var_field_offset, sizeof m_var_field_offset);
b.consume(&m_var_offset_bytes, sizeof m_var_offset_bytes);
b.consume(&m_num_var_fields, sizeof m_num_var_fields);
b.consume(&m_bytes_per_offset, sizeof m_bytes_per_offset);
}
void append(tokudb::buffer &b) {
b.append(&m_fixed_field_offset, sizeof m_fixed_field_offset);
b.append(&m_var_field_offset, sizeof m_var_field_offset);
b.append(&m_var_offset_bytes, sizeof m_var_offset_bytes);
b.append(&m_num_var_fields, sizeof m_num_var_fields);
b.append(&m_bytes_per_offset, sizeof m_bytes_per_offset);
}
public:
uint32_t m_fixed_field_offset;
uint32_t m_var_field_offset;
uint8_t m_var_offset_bytes;
uint32_t m_num_var_fields;
uint32_t m_bytes_per_offset;
};
// Update a fixed field: new_val@offset = extra_val
static void set_fixed_field(uint32_t the_offset, uint32_t length, uint32_t field_num, uint32_t field_null_num,
tokudb::buffer &new_val, void *extra_val) {
assert(the_offset + length <= new_val.size());
new_val.replace(the_offset, length, extra_val, length);
class var_fields {
public:
var_fields(uint32_t var_offset, uint32_t offset_bytes, uint32_t bytes_per_offset) {
assert(bytes_per_offset == 1 || bytes_per_offset == 2);
m_var_offset = var_offset;
m_val_offset = m_var_offset + offset_bytes;
m_bytes_per_offset = bytes_per_offset;
m_max_fields = offset_bytes / bytes_per_offset;
}
uint32_t value_offset(uint32_t var_index, void *base);
uint32_t value_length(uint32_t var_index, void *base);
void update_offsets(uint32_t var_index, uint32_t old_s, uint32_t new_s, void *base);
private:
uint32_t read_offset(uint32_t var_index, void *base);
void write_offset(uint32_t var_index, uint32_t v, void *base);
private:
uint32_t m_var_offset;
uint32_t m_val_offset;
uint32_t m_bytes_per_offset;
uint32_t m_max_fields;
};
// Return the ith variable length offset
uint32_t var_fields::read_offset(uint32_t var_index, void *base) {
if (m_bytes_per_offset == 1) {
uint8_t offset;
memcpy(&offset, (char *)base + m_var_offset + var_index * m_bytes_per_offset, sizeof offset);
return offset;
} else {
uint16_t offset;
memcpy(&offset, (char *)base + m_var_offset + var_index * m_bytes_per_offset, sizeof offset);
return offset;
}
}
// Write the ith variable length offset with a new offset.
void var_fields::write_offset(uint32_t var_index, uint32_t new_offset, void *base) {
if (m_bytes_per_offset == 1) {
assert(new_offset < (1<<8));
uint8_t offset = new_offset;
memcpy((char *)base + m_var_offset + var_index * m_bytes_per_offset, &offset, sizeof offset);
} else {
assert(new_offset < (1<<16));
uint16_t offset = new_offset;
memcpy((char *)base + m_var_offset + var_index * m_bytes_per_offset, &offset, sizeof offset);
}
}
// Return the offset of the ith variable length field
uint32_t var_fields::value_offset(uint32_t var_index, void *base) {
assert(var_index < m_max_fields);
if (var_index == 0)
return m_val_offset;
else
return m_val_offset + read_offset(var_index-1, base);
}
// Return the length of the ith variable length field
uint32_t var_fields::value_length(uint32_t var_index, void *base) {
assert(var_index < m_max_fields);
if (var_index == 0)
return read_offset(0, base);
else
return read_offset(var_index, base) - read_offset(var_index-1, base);
}
// The length of the ith variable length fields changed. Update all of the subsequent offsets.
void var_fields::update_offsets(uint32_t var_index, uint32_t old_s, uint32_t new_s, void *base) {
assert(var_index < m_max_fields);
if (old_s == new_s)
return;
for (uint i = var_index; i < m_max_fields; i++) {
uint32_t v = read_offset(i, base);
if (new_s > old_s)
write_offset(i, v + (new_s - old_s), base);
else
write_offset(i, v - (old_s - new_s), base);
}
}
}
// Update a variable length field: new_val[i] = extra_val, where i is the ith variable length field.
// Compute the offset from the var index
// Replace the var value with the extra val
// Update the var offsets
// Reset the null bit
static void set_var_field(uint32_t var_index, uint32_t length, uint32_t field_null_num,
tokudb::buffer &new_val, void *extra_val, const tokudb::simple_row_descriptor &sd) {
tokudb::var_fields var_fields(sd.m_var_field_offset, sd.m_var_offset_bytes, sd.m_bytes_per_offset);
// replace the new val with the extra val
uint32_t the_offset = var_fields.value_offset(var_index, new_val.data());
uint32_t old_s = var_fields.value_length(var_index, new_val.data());
uint32_t new_s = length;
new_val.replace(the_offset, old_s, extra_val, new_s);
// update the var offsets
var_fields.update_offsets(var_index, old_s, new_s, new_val.data());
// reset null bit
if (field_null_num)
set_overall_null_position((uchar *) new_val.data(), field_null_num & ~(1<<31), false);
}
// Update an int field: signed newval@offset = old_val@offset OP extra_val
static void int_op(uint32_t operation, uint32_t the_offset, uint32_t length, uint32_t field_num, uint32_t field_null_num,
static void int_op(uint32_t operation, uint32_t the_offset, uint32_t length, uint32_t field_null_num,
tokudb::buffer &new_val, tokudb::buffer &old_val, void *extra_val) {
assert(the_offset + length <= new_val.size());
assert(the_offset + length <= old_val.size());
......@@ -1001,7 +1110,7 @@ static void int_op(uint32_t operation, uint32_t the_offset, uint32_t length, uin
}
// Update an unsigned field: unsigned newval@offset = old_val@offset OP extra_val
static void uint_op(uint32_t operation, uint32_t the_offset, uint32_t length, uint32_t field_num, uint32_t field_null_num,
static void uint_op(uint32_t operation, uint32_t the_offset, uint32_t length, uint32_t field_null_num,
tokudb::buffer &new_val, tokudb::buffer &old_val, void *extra_val) {
assert(the_offset + length <= new_val.size());
assert(the_offset + length <= old_val.size());
......@@ -1043,7 +1152,7 @@ static void uint_op(uint32_t operation, uint32_t the_offset, uint32_t length, ui
// Decode and apply a sequence of update operations defined in the extra to the old value and put the result
// in the new value.
static void apply_updates(tokudb::buffer &new_val, tokudb::buffer &old_val, tokudb::buffer &extra_val, const Simple_row_descriptor &sd) {
static void apply_updates(tokudb::buffer &new_val, tokudb::buffer &old_val, tokudb::buffer &extra_val, const tokudb::simple_row_descriptor &sd) {
uint32_t num_updates;
extra_val.consume(&num_updates, sizeof num_updates);
for ( ; num_updates > 0; num_updates--) {
......@@ -1052,8 +1161,8 @@ static void apply_updates(tokudb::buffer &new_val, tokudb::buffer &old_val, toku
extra_val.consume(&update_operation, sizeof update_operation);
uint32_t field_type;
extra_val.consume(&field_type, sizeof field_type);
uint32_t field_num;
extra_val.consume(&field_num, sizeof field_num);
uint32_t unused;
extra_val.consume(&unused, sizeof unused);
uint32_t field_null_num;
extra_val.consume(&field_null_num, sizeof field_null_num);
uint32_t the_offset;
......@@ -1066,23 +1175,30 @@ static void apply_updates(tokudb::buffer &new_val, tokudb::buffer &old_val, toku
switch (field_type) {
case UPDATE_TYPE_INT:
if (update_operation == '=')
set_fixed_field(the_offset, length, field_num, field_null_num, new_val, extra_val_ptr);
set_fixed_field(the_offset, length, field_null_num, new_val, extra_val_ptr);
else
int_op(update_operation, the_offset, length, field_num, field_null_num, new_val, old_val, extra_val_ptr);
int_op(update_operation, the_offset, length, field_null_num, new_val, old_val, extra_val_ptr);
break;
case UPDATE_TYPE_UINT:
if (update_operation == '=')
set_fixed_field(the_offset, length, field_num, field_null_num, new_val, extra_val_ptr);
set_fixed_field(the_offset, length, field_null_num, new_val, extra_val_ptr);
else
uint_op(update_operation, the_offset, length, field_num, field_null_num, new_val, old_val, extra_val_ptr);
uint_op(update_operation, the_offset, length, field_null_num, new_val, old_val, extra_val_ptr);
break;
case UPDATE_TYPE_CHAR:
case UPDATE_TYPE_BINARY:
if (update_operation == '=')
set_fixed_field(the_offset, length, field_num, field_null_num, new_val, extra_val_ptr);
set_fixed_field(the_offset, length, field_null_num, new_val, extra_val_ptr);
else
assert(0);
break;
case UPDATE_TYPE_VARBINARY:
case UPDATE_TYPE_VARCHAR:
if (update_operation == '=')
set_var_field(the_offset, length, field_null_num, new_val, extra_val_ptr, sd);
else
assert(0);
break;
default:
assert(0);
break;
......@@ -1110,7 +1226,7 @@ static int tokudb_simple_update_fun(
if (old_val_dbt != NULL) {
// get the simple descriptor
Simple_row_descriptor sd;
tokudb::simple_row_descriptor sd;
sd.consume(extra_val);
tokudb::buffer old_val(old_val_dbt->data, old_val_dbt->size, old_val_dbt->size);
......@@ -1161,7 +1277,7 @@ static int tokudb_simple_upsert_fun(
set_val(&new_val_dbt, set_extra);
} else {
// decode the simple descriptor
Simple_row_descriptor sd;
tokudb::simple_row_descriptor sd;
sd.consume(extra_val);
tokudb::buffer old_val(old_val_dbt->data, old_val_dbt->size, old_val_dbt->size);
......
......@@ -145,7 +145,7 @@ static MYSQL_THDVAR_BOOL(enable_fast_update,
"enable fast update",
NULL, // check
NULL, // update
false // default
true // default
);
static MYSQL_THDVAR_BOOL(disable_slow_update,
PLUGIN_VAR_THDLOCAL,
......@@ -159,7 +159,7 @@ static MYSQL_THDVAR_BOOL(enable_fast_upsert,
"enable fast upsert",
NULL, // check
NULL, // update
false // default
true // default
);
static MYSQL_THDVAR_BOOL(disable_slow_upsert,
PLUGIN_VAR_THDLOCAL,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment