Commit 7b063137 authored by unknown's avatar unknown

InnoDB: Fixed bugs in the padding and trimming of trailing spaces

that affected the UCS2 character set. (Bug #7350)


innobase/data/data0type.c:
  Added dtype_get_charset_coll_noninline()
innobase/include/data0type.h:
  Added dtype_get_charset_coll_noninline()
innobase/include/row0mysql.h:
  Added charset field to mysql_row_templ_struct.
innobase/include/row0mysql.ic:
  row_mysql_store_col_in_innobase_format():
  When removing trailing spaces, treat the UCS2 character set properly.
innobase/rem/rem0cmp.c:
  cmp_whole_field(): Do not remove trailing 0x20 bytes, as
  innobase_mysql_cmp() implicitly pads the strings with trailing
  spaces as necessary.
innobase/row/row0sel.c:
  row_sel_field_store_in_mysql_format(): Do not pad with 0x20 bytes.
  row_sel_store_mysql_rec(): Pad VARCHARs with trailing spaces
  (0x20, or 0x0020 in UCS2).
sql/ha_innodb.cc:
  build_template(): Initialize templ->charset
parent 7b592c9e
...@@ -165,6 +165,17 @@ dtype_is_non_binary_string_type( ...@@ -165,6 +165,17 @@ dtype_is_non_binary_string_type(
return(FALSE); return(FALSE);
} }
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype) /* in: precise data type */
{
return(dtype_get_charset_coll(prtype));
}
/************************************************************************* /*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */ charset-collation code. */
......
...@@ -234,6 +234,13 @@ dtype_get_prtype( ...@@ -234,6 +234,13 @@ dtype_get_prtype(
dtype_t* type); dtype_t* type);
/************************************************************************* /*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */ Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype);/* in: precise data type */
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE UNIV_INLINE
ulint ulint
dtype_get_charset_coll( dtype_get_charset_coll(
......
...@@ -454,6 +454,8 @@ struct mysql_row_templ_struct { ...@@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
zero if column cannot be NULL */ zero if column cannot be NULL */
ulint type; /* column type in Innobase mtype ulint type; /* column type in Innobase mtype
numbers DATA_CHAR... */ numbers DATA_CHAR... */
ulint charset; /* MySQL charset-collation code
of the column, or zero */
ulint is_unsigned; /* if a column type is an integer ulint is_unsigned; /* if a column type is an integer
type and this field is != 0, then type and this field is != 0, then
it is an unsigned integer type */ it is an unsigned integer type */
......
...@@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format( ...@@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format(
} }
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL } else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|| type == DATA_BINARY) { || type == DATA_BINARY) {
/* Remove trailing spaces. */
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1, we hardcode the
charset-collation codes here. In 5.0, the logic will
be based on mbminlen. */
ulint cset = dtype_get_charset_coll(
dtype_get_prtype(dfield_get_type(dfield)));
ptr = row_mysql_read_var_ref(&col_len, mysql_data); ptr = row_mysql_read_var_ref(&col_len, mysql_data);
if (cset == 35/*ucs2_general_ci*/
|| cset == 90/*ucs2_bin*/
|| (cset >= 128/*ucs2_unicode_ci*/
&& cset <= 144/*ucs2_persian_ci*/)) {
/* space=0x0020 */
/* Trim "half-chars", just in case. */
col_len &= ~1;
/* Remove trailing spaces */ while (col_len >= 2 && ptr[col_len - 2] == 0x00
while (col_len > 0 && ptr[col_len - 1] == ' ') { && ptr[col_len - 1] == 0x20) {
col_len -= 2;
}
} else {
/* space=0x20 */
while (col_len > 0 && ptr[col_len - 1] == 0x20) {
col_len--; col_len--;
} }
}
} else if (type == DATA_BLOB) { } else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
} }
......
...@@ -261,22 +261,6 @@ cmp_whole_field( ...@@ -261,22 +261,6 @@ cmp_whole_field(
"InnoDB: comparison!\n"); "InnoDB: comparison!\n");
} }
/* MySQL does not pad the ends of strings with spaces in a
comparison. That would cause a foreign key check to fail for
non-latin1 character sets if we have different length columns.
To prevent that we remove trailing spaces here before doing
the comparison. NOTE that if we in the future map more MySQL
types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
code. */
while (a_length > 0 && a[a_length - 1] == ' ') {
a_length--;
}
while (b_length > 0 && b[b_length - 1] == ' ') {
b_length--;
}
return(innobase_mysql_cmp( return(innobase_mysql_cmp(
(int)(type->prtype & DATA_MYSQL_TYPE_MASK), (int)(type->prtype & DATA_MYSQL_TYPE_MASK),
(uint)dtype_get_charset_coll(type->prtype), (uint)dtype_get_charset_coll(type->prtype),
......
...@@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format( ...@@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
dest = row_mysql_store_var_len(dest, len); dest = row_mysql_store_var_len(dest, len);
ut_memcpy(dest, data, len); ut_memcpy(dest, data, len);
/* Pad with trailing spaces */
memset(dest + len, ' ', col_len - len);
/* ut_ad(col_len >= len + 2); No real var implemented in /* ut_ad(col_len >= len + 2); No real var implemented in
MySQL yet! */ MySQL yet! */
...@@ -2335,6 +2332,44 @@ row_sel_store_mysql_rec( ...@@ -2335,6 +2332,44 @@ row_sel_store_mysql_rec(
templ->mysql_col_len, data, len, templ->mysql_col_len, data, len,
templ->type, templ->is_unsigned); templ->type, templ->is_unsigned);
if (templ->type == DATA_VARCHAR
|| templ->type == DATA_VARMYSQL
|| templ->type == DATA_BINARY) {
/* Pad with trailing spaces */
data = mysql_rec + templ->mysql_col_offset;
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1, we
hardcode the charset-collation codes here.
5.0 will use a different approach. */
if (templ->charset == 35
|| templ->charset == 90
|| (templ->charset >= 128
&& templ->charset <= 144)) {
/* space=0x0020 */
ulint col_len = templ->mysql_col_len;
ut_a(!(col_len & 1));
if (len & 1) {
/* A 0x20 has been stripped
from the column.
Pad it back. */
goto pad_0x20;
}
/* Pad the rest of the string
with 0x0020 */
while (len < col_len) {
data[len++] = 0x00;
pad_0x20:
data[len++] = 0x20;
}
} else {
/* space=0x20 */
memset(data + len, 0x20,
templ->mysql_col_len - len);
}
}
/* Cleanup */ /* Cleanup */
if (extern_field_heap) { if (extern_field_heap) {
mem_heap_free(extern_field_heap); mem_heap_free(extern_field_heap);
...@@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec( ...@@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
pad_char = '\0'; pad_char = '\0';
} }
memset(mysql_rec + templ->mysql_col_offset, pad_char, /* Handle UCS2 strings differently. As no new
templ->mysql_col_len); collations will be introduced in 4.1,
we hardcode the charset-collation codes here.
5.0 will use a different approach. */
if (templ->charset == 35
|| templ->charset == 90
|| (templ->charset >= 128
&& templ->charset <= 144)) {
/* There are two bytes per char, so the length
has to be an even number. */
ut_a(!(templ->mysql_col_len & 1));
data = mysql_rec + templ->mysql_col_offset;
len = templ->mysql_col_len;
/* Pad with 0x0020. */
while (len >= 2) {
*data++ = 0x00;
*data++ = 0x20;
len -= 2;
}
} else {
memset(mysql_rec + templ->mysql_col_offset,
pad_char, templ->mysql_col_len);
}
} }
} }
......
...@@ -2248,6 +2248,8 @@ build_template( ...@@ -2248,6 +2248,8 @@ build_template(
templ->mysql_col_len = (ulint) field->pack_length(); templ->mysql_col_len = (ulint) field->pack_length();
templ->type = get_innobase_type_from_mysql_type(field); templ->type = get_innobase_type_from_mysql_type(field);
templ->charset = dtype_get_charset_coll_noninline(
index->table->cols[i].type.prtype);
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
if (templ->type == DATA_BLOB) { if (templ->type == DATA_BLOB) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment