Fixed bug #14896.

This bug in Field_string::cmp resulted in a wrong comparison with keys in partial indexes over multi-byte character fields. Given field a is declared as a varchar(16) collate utf8_unicode_ci INDEX(a(4)) gives us an example of such an index. Wrong key comparisons could lead to wrong result sets if the selected query execution plan used a range scan by a partial index over a utf8 character field. This also caused wrong results in many other cases.

Fixed bug #14896.
This bug in Field_string::cmp resulted in a wrong comparison with keys in partial indexes over multi-byte character fields. Given field a is declared as a varchar(16) collate utf8_unicode_ci INDEX(a(4)) gives us an example of such an index. Wrong key comparisons could lead to wrong result sets if the selected query execution plan used a range scan by a partial index over a utf8 character field. This also caused wrong results in many other cases.
355753ba · igor@rurik.mysql.com · f1afd178 · 355753ba · 355753ba · 355753ba
Commit 355753ba authored Jun 13, 2006 by igor@rurik.mysql.com
Show whitespace changes
Inline Side-by-side

Showing with 73 additions and 13 deletions

mysql-test/r/ctype_utf8.result mysql-test/r/ctype_utf8.result +40 -0

mysql-test/t/ctype_utf8.test mysql-test/t/ctype_utf8.test +26 -0

sql/field.cc sql/field.cc +7 -13

No files found.
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -1124,3 +1124,43 @@ check table t1;
 Table	Op	Msg_type	Msg_text
 test.t1	check	status	OK
 drop table t1;
+SET NAMES utf8;
+CREATE TABLE t1 (id int PRIMARY KEY,
+a varchar(16) collate utf8_unicode_ci NOT NULL default '',
+b int,
+f varchar(128) default 'XXX',
+INDEX (a(4))
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
+INSERT INTO t1(id, a, b) VALUES
+(1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30),
+(4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40),
+(7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50),
+(10, 'eeeee', 40), (11, 'bbbbbb', 60);
+SELECT id, a, b FROM t1;
+id	a	b
+1	cccc	50
+2	cccc	70
+3	cccc	30
+4	cccc	30
+5	cccc	20
+6	bbbbbb	40
+7	dddd	30
+8	aaaa	10
+9	aaaa	50
+10	eeeee	40
+11	bbbbbb	60
+SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb';
+id	a	b
+8	aaaa	10
+9	aaaa	50
+6	bbbbbb	40
+11	bbbbbb	60
+SELECT id, a FROM t1 WHERE a='bbbbbb';
+id	a
+6	bbbbbb
+11	bbbbbb
+SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b;
+id	a
+6	bbbbbb
+11	bbbbbb
+DROP TABLE t1;
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -926,4 +926,30 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
 check table t1;
 drop table t1;
+#
+# Bug#14896: Comparison with a key in a partial index over mb chararacter field
+#
+SET NAMES utf8;
+CREATE TABLE t1 (id int PRIMARY KEY,
+                 a varchar(16) collate utf8_unicode_ci NOT NULL default '',
+                 b int,
+                 f varchar(128) default 'XXX',
+                 INDEX (a(4))
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
+INSERT INTO t1(id, a, b) VALUES
+  (1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30),
+  (4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40),
+  (7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50),
+  (10, 'eeeee', 40), (11, 'bbbbbb', 60);
+SELECT id, a, b FROM t1;
+SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb';
+SELECT id, a FROM t1 WHERE a='bbbbbb';
+SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b;
+DROP TABLE t1;
 # End of 4.1 tests
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -5072,17 +5072,6 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
 {
  uint a_len, b_len;
-  if (field_charset->strxfrm_multiply > 1)
-  {
-    /*
-      We have to remove end space to be able to compare multi-byte-characters
-      like in latin_de 'ae' and 0xe4
-    */
-    return field_charset->coll->strnncollsp(field_charset,
-                                            (const uchar*) a_ptr, field_length,
-                                            (const uchar*) b_ptr,
-                                            field_length);
-  }
  if (field_charset->mbmaxlen != 1)
  {
    uint char_len= field_length/field_charset->mbmaxlen;
@@ -5091,7 +5080,12 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
  }
  else
    a_len= b_len= field_length;
-  return my_strnncoll(field_charset,(const uchar*) a_ptr, a_len,
+  /*
+    We have to remove end space to be able to compare multi-byte-characters
+    like in latin_de 'ae' and 0xe4
+  */
+  return field_charset->coll->strnncollsp(field_charset,
+                                          (const uchar*) a_ptr, a_len,
                                          (const uchar*) b_ptr, b_len);
 }