Commit e2f92010 authored by marko@hundin.mysql.fi's avatar marko@hundin.mysql.fi

InnoDB: Fixed bugs in the padding and trimming of trailing spaces

that affected the UCS2 character set. (Bug #7350)
parent 9311c8f5
...@@ -165,6 +165,17 @@ dtype_is_non_binary_string_type( ...@@ -165,6 +165,17 @@ dtype_is_non_binary_string_type(
return(FALSE); return(FALSE);
} }
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype) /* in: precise data type */
{
return(dtype_get_charset_coll(prtype));
}
/************************************************************************* /*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */ charset-collation code. */
......
...@@ -234,6 +234,13 @@ dtype_get_prtype( ...@@ -234,6 +234,13 @@ dtype_get_prtype(
dtype_t* type); dtype_t* type);
/************************************************************************* /*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */ Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype);/* in: precise data type */
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE UNIV_INLINE
ulint ulint
dtype_get_charset_coll( dtype_get_charset_coll(
......
...@@ -454,6 +454,8 @@ struct mysql_row_templ_struct { ...@@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
zero if column cannot be NULL */ zero if column cannot be NULL */
ulint type; /* column type in Innobase mtype ulint type; /* column type in Innobase mtype
numbers DATA_CHAR... */ numbers DATA_CHAR... */
ulint charset; /* MySQL charset-collation code
of the column, or zero */
ulint is_unsigned; /* if a column type is an integer ulint is_unsigned; /* if a column type is an integer
type and this field is != 0, then type and this field is != 0, then
it is an unsigned integer type */ it is an unsigned integer type */
......
...@@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format( ...@@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format(
} }
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL } else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|| type == DATA_BINARY) { || type == DATA_BINARY) {
/* Remove trailing spaces. */
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1, we hardcode the
charset-collation codes here. In 5.0, the logic will
be based on mbminlen. */
ulint cset = dtype_get_charset_coll(
dtype_get_prtype(dfield_get_type(dfield)));
ptr = row_mysql_read_var_ref(&col_len, mysql_data); ptr = row_mysql_read_var_ref(&col_len, mysql_data);
if (cset == 35/*ucs2_general_ci*/
/* Remove trailing spaces */ || cset == 90/*ucs2_bin*/
while (col_len > 0 && ptr[col_len - 1] == ' ') { || (cset >= 128/*ucs2_unicode_ci*/
col_len--; && cset <= 144/*ucs2_persian_ci*/)) {
} /* space=0x0020 */
/* Trim "half-chars", just in case. */
col_len &= ~1;
while (col_len >= 2 && ptr[col_len - 2] == 0x00
&& ptr[col_len - 1] == 0x20) {
col_len -= 2;
}
} else {
/* space=0x20 */
while (col_len > 0 && ptr[col_len - 1] == 0x20) {
col_len--;
}
}
} else if (type == DATA_BLOB) { } else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
} }
......
...@@ -261,22 +261,6 @@ cmp_whole_field( ...@@ -261,22 +261,6 @@ cmp_whole_field(
"InnoDB: comparison!\n"); "InnoDB: comparison!\n");
} }
/* MySQL does not pad the ends of strings with spaces in a
comparison. That would cause a foreign key check to fail for
non-latin1 character sets if we have different length columns.
To prevent that we remove trailing spaces here before doing
the comparison. NOTE that if we in the future map more MySQL
types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
code. */
while (a_length > 0 && a[a_length - 1] == ' ') {
a_length--;
}
while (b_length > 0 && b[b_length - 1] == ' ') {
b_length--;
}
return(innobase_mysql_cmp( return(innobase_mysql_cmp(
(int)(type->prtype & DATA_MYSQL_TYPE_MASK), (int)(type->prtype & DATA_MYSQL_TYPE_MASK),
(uint)dtype_get_charset_coll(type->prtype), (uint)dtype_get_charset_coll(type->prtype),
......
...@@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format( ...@@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
dest = row_mysql_store_var_len(dest, len); dest = row_mysql_store_var_len(dest, len);
ut_memcpy(dest, data, len); ut_memcpy(dest, data, len);
/* Pad with trailing spaces */
memset(dest + len, ' ', col_len - len);
/* ut_ad(col_len >= len + 2); No real var implemented in /* ut_ad(col_len >= len + 2); No real var implemented in
MySQL yet! */ MySQL yet! */
...@@ -2334,7 +2331,45 @@ row_sel_store_mysql_rec( ...@@ -2334,7 +2331,45 @@ row_sel_store_mysql_rec(
mysql_rec + templ->mysql_col_offset, mysql_rec + templ->mysql_col_offset,
templ->mysql_col_len, data, len, templ->mysql_col_len, data, len,
templ->type, templ->is_unsigned); templ->type, templ->is_unsigned);
if (templ->type == DATA_VARCHAR
|| templ->type == DATA_VARMYSQL
|| templ->type == DATA_BINARY) {
/* Pad with trailing spaces */
data = mysql_rec + templ->mysql_col_offset;
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1, we
hardcode the charset-collation codes here.
5.0 will use a different approach. */
if (templ->charset == 35
|| templ->charset == 90
|| (templ->charset >= 128
&& templ->charset <= 144)) {
/* space=0x0020 */
ulint col_len = templ->mysql_col_len;
ut_a(!(col_len & 1));
if (len & 1) {
/* A 0x20 has been stripped
from the column.
Pad it back. */
goto pad_0x20;
}
/* Pad the rest of the string
with 0x0020 */
while (len < col_len) {
data[len++] = 0x00;
pad_0x20:
data[len++] = 0x20;
}
} else {
/* space=0x20 */
memset(data + len, 0x20,
templ->mysql_col_len - len);
}
}
/* Cleanup */ /* Cleanup */
if (extern_field_heap) { if (extern_field_heap) {
mem_heap_free(extern_field_heap); mem_heap_free(extern_field_heap);
...@@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec( ...@@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
pad_char = '\0'; pad_char = '\0';
} }
memset(mysql_rec + templ->mysql_col_offset, pad_char, /* Handle UCS2 strings differently. As no new
templ->mysql_col_len); collations will be introduced in 4.1,
we hardcode the charset-collation codes here.
5.0 will use a different approach. */
if (templ->charset == 35
|| templ->charset == 90
|| (templ->charset >= 128
&& templ->charset <= 144)) {
/* There are two bytes per char, so the length
has to be an even number. */
ut_a(!(templ->mysql_col_len & 1));
data = mysql_rec + templ->mysql_col_offset;
len = templ->mysql_col_len;
/* Pad with 0x0020. */
while (len >= 2) {
*data++ = 0x00;
*data++ = 0x20;
len -= 2;
}
} else {
memset(mysql_rec + templ->mysql_col_offset,
pad_char, templ->mysql_col_len);
}
} }
} }
......
...@@ -2248,6 +2248,8 @@ build_template( ...@@ -2248,6 +2248,8 @@ build_template(
templ->mysql_col_len = (ulint) field->pack_length(); templ->mysql_col_len = (ulint) field->pack_length();
templ->type = get_innobase_type_from_mysql_type(field); templ->type = get_innobase_type_from_mysql_type(field);
templ->charset = dtype_get_charset_coll_noninline(
index->table->cols[i].type.prtype);
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG); templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
if (templ->type == DATA_BLOB) { if (templ->type == DATA_BLOB) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment