InnoDB: Fixed bugs in the padding and trimming of trailing spaces

that affected the UCS2 character set. (Bug #7350)

InnoDB: Fixed bugs in the padding and trimming of trailing spaces
that affected the UCS2 character set. (Bug #7350)
e2f92010 · marko@hundin.mysql.fi · 9311c8f5 · e2f92010 · e2f92010 · e2f92010
Commit e2f92010 authored Dec 17, 2004 by marko@hundin.mysql.fi
7 changed files
--- a/innobase/data/data0type.c
+++ b/innobase/data/data0type.c
@@ -165,6 +165,17 @@ dtype_is_non_binary_string_type(
 	return(FALSE);
 }

+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+
+ulint
+dtype_get_charset_coll_noninline(
+/*=============================*/
+	ulint	prtype)	/* in: precise data type */
+{
+	return(dtype_get_charset_coll(prtype));
+}
+
 /*************************************************************************
 Forms a precise type from the < 4.1.2 format precise type plus the
 charset-collation code. */

--- a/innobase/include/data0type.h
+++ b/innobase/include/data0type.h
@@ -234,6 +234,13 @@ dtype_get_prtype(
 	dtype_t*	type);
 /*************************************************************************
 Gets the MySQL charset-collation code for MySQL string types. */
+
+ulint
+dtype_get_charset_coll_noninline(
+/*=============================*/
+	ulint	prtype);/* in: precise data type */
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
 UNIV_INLINE
 ulint
 dtype_get_charset_coll(

--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
 					zero if column cannot be NULL */
 	ulint	type;			/* column type in Innobase mtype
 					numbers DATA_CHAR... */
+	ulint	charset;		/* MySQL charset-collation code
+					of the column, or zero */
 	ulint	is_unsigned;		/* if a column type is an integer
 					type and this field is != 0, then
 					it is an unsigned integer type */

--- a/innobase/include/row0mysql.ic
+++ b/innobase/include/row0mysql.ic
@@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format(
 		}
 	} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
 						|| type == DATA_BINARY) {
+		/* Remove trailing spaces. */
+
+		/* Handle UCS2 strings differently.  As no new
+		collations will be introduced in 4.1, we hardcode the
+		charset-collation codes here.  In 5.0, the logic will
+		be based on mbminlen. */
+		ulint	cset	= dtype_get_charset_coll(
+				dtype_get_prtype(dfield_get_type(dfield)));
 		ptr = row_mysql_read_var_ref(&col_len, mysql_data);
-		
-		/* Remove trailing spaces */
-		while (col_len > 0 && ptr[col_len - 1] == ' ') {
-			col_len--;
-		} 
+		if (cset == 35/*ucs2_general_ci*/
+				|| cset == 90/*ucs2_bin*/
+				|| (cset >= 128/*ucs2_unicode_ci*/
+				&& cset <= 144/*ucs2_persian_ci*/)) {
+			/* space=0x0020 */
+			/* Trim "half-chars", just in case. */
+			col_len &= ~1;
+
+			while (col_len >= 2 && ptr[col_len - 2] == 0x00
+					&& ptr[col_len - 1] == 0x20) {
+				col_len -= 2;
+			}
+		} else {
+			/* space=0x20 */
+			while (col_len > 0 && ptr[col_len - 1] == 0x20) {
+				col_len--;
+			}
+		}
 	} else if (type == DATA_BLOB) {
 		ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
 	}

--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -261,22 +261,6 @@ cmp_whole_field(
 "InnoDB: comparison!\n");
 		}		   

-		/* MySQL does not pad the ends of strings with spaces in a
-		comparison. That would cause a foreign key check to fail for
-		non-latin1 character sets if we have different length columns.
-		To prevent that we remove trailing spaces here before doing
-		the comparison. NOTE that if we in the future map more MySQL
-		types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
-		code. */
-
-		while (a_length > 0 && a[a_length - 1] == ' ') {
-		      a_length--;
-		}
-
-		while (b_length > 0 && b[b_length - 1] == ' ') {
-		      b_length--;
-		}
-
 		return(innobase_mysql_cmp(
 				(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
 				(uint)dtype_get_charset_coll(type->prtype),

--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
 		dest = row_mysql_store_var_len(dest, len);
 		ut_memcpy(dest, data, len);

-		/* Pad with trailing spaces */
-		memset(dest + len, ' ', col_len - len); 
-
 		/* ut_ad(col_len >= len + 2); No real var implemented in
 		MySQL yet! */
 		
@@ -2334,7 +2331,45 @@ row_sel_store_mysql_rec(
 				mysql_rec + templ->mysql_col_offset,
 				templ->mysql_col_len, data, len,
 				templ->type, templ->is_unsigned);
-				
+
+			if (templ->type == DATA_VARCHAR
+					|| templ->type == DATA_VARMYSQL
+					|| templ->type == DATA_BINARY) {
+				/* Pad with trailing spaces */
+				data = mysql_rec + templ->mysql_col_offset;
+
+				/* Handle UCS2 strings differently.  As no new
+				collations will be introduced in 4.1, we
+				hardcode the charset-collation codes here.
+				5.0 will use a different approach. */
+				if (templ->charset == 35
+						|| templ->charset == 90
+						|| (templ->charset >= 128
+						&& templ->charset <= 144)) {
+					/* space=0x0020 */
+					ulint	col_len = templ->mysql_col_len;
+
+					ut_a(!(col_len & 1));
+					if (len & 1) {
+						/* A 0x20 has been stripped
+						from the column.
+						Pad it back. */
+						goto pad_0x20;
+					}
+					/* Pad the rest of the string
+					with 0x0020 */
+					while (len < col_len) {
+						data[len++] = 0x00;
+					pad_0x20:
+						data[len++] = 0x20;
+					}
+				} else {
+					/* space=0x20 */
+					memset(data + len, 0x20,
+						templ->mysql_col_len - len);
+				}
+			}
+
 			/* Cleanup */
 			if (extern_field_heap) {
 				mem_heap_free(extern_field_heap);
@@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
 				pad_char = '\0';
 			}

-			memset(mysql_rec + templ->mysql_col_offset, pad_char,
-							templ->mysql_col_len);
+			/* Handle UCS2 strings differently.  As no new
+			collations will be introduced in 4.1,
+			we hardcode the charset-collation codes here.
+			5.0 will use a different approach. */
+			if (templ->charset == 35
+					|| templ->charset == 90
+					|| (templ->charset >= 128
+					&& templ->charset <= 144)) {
+				/* There are two bytes per char, so the length
+				has to be an even number. */
+				ut_a(!(templ->mysql_col_len & 1));
+				data = mysql_rec + templ->mysql_col_offset;
+				len = templ->mysql_col_len;
+				/* Pad with 0x0020. */
+				while (len >= 2) {
+					*data++ = 0x00;
+					*data++ = 0x20;
+					len -= 2;
+				}
+			} else {
+				memset(mysql_rec + templ->mysql_col_offset,
+					pad_char, templ->mysql_col_len);
+			}
 		}
 	} 


--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -2248,6 +2248,8 @@ build_template(

 		templ->mysql_col_len = (ulint) field->pack_length();
 		templ->type = get_innobase_type_from_mysql_type(field);
+		templ->charset = dtype_get_charset_coll_noninline(
+				index->table->cols[i].type.prtype);
 		templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);

 		if (templ->type == DATA_BLOB) {