Bug#29333 myisam corruption with character set cp932 collate cp932_japanese_ci

Problem: wrong comparison with trailing space. This problem was fixed for all other character sets under terms of bug 7788 ""Table is full" occurs during a multitable update". ctype-cp932.c was forgotten. Fix: applying the same fix for ctype-cp932.c. (see ctype-sjis.c as an example of a previously correctly fixed file)

Bug#29333 myisam corruption with character set cp932 collate cp932_japanese_ci
Problem: wrong comparison with trailing space. This problem was fixed for all other character sets under terms of bug 7788 ""Table is full" occurs during a multitable update". ctype-cp932.c was forgotten. Fix: applying the same fix for ctype-cp932.c. (see ctype-sjis.c as an example of a previously correctly fixed file)
fd3cdea3 · bar@mysql.com/bar.myoffice.izhnet.ru · 825570f5 · fd3cdea3 · fd3cdea3 · fd3cdea3
Commit fd3cdea3 authored Jul 04, 2007 by bar@mysql.com/bar.myoffice.izhnet.ru
4 changed files
--- a/mysql-test/r/ctype_cp932.result
+++ b/mysql-test/r/ctype_cp932.result
@@ -11335,6 +11335,22 @@ cp932_bin	6109
 cp932_bin	61
 cp932_bin	6120
 drop table t1;
+create table t2 (a char(1));
+insert into t2 values ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
+insert into t2 values ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
+create table t1 (
+a varchar(2) character set cp932
+) engine=myisam;
+insert into t1
+select unhex(concat(t24.a, t23.a, t22.a, t21.a))
+from t2 t21, t2 t22, t2 t23, t2 t24;
+delete from t1 where a='';
+alter table t1 add key(a);
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
+drop table t2;
 create table t1 (col1 varchar(1)) character set cp932;
 insert into t1 values ('a');
 insert into t1 values ('ab');

--- a/mysql-test/t/ctype_cp932.test
+++ b/mysql-test/t/ctype_cp932.test
@@ -403,6 +403,28 @@ SET collation_connection='cp932_japanese_ci';
 SET collation_connection='cp932_bin';
 -- source include/ctype_filesort.inc

+#
+# Bug#29333 myisam corruption with
+# character set cp932 collate cp932_japanese_ci
+#
+create table t2 (a char(1));
+insert into t2 values ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
+insert into t2 values ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
+create table t1 (
+  a varchar(2) character set cp932
+) engine=myisam;
+--disable_warnings
+insert into t1
+select unhex(concat(t24.a, t23.a, t22.a, t21.a))
+from t2 t21, t2 t22, t2 t23, t2 t24;
+--enable_warnings
+delete from t1 where a='';
+alter table t1 add key(a);
+check table t1;
+drop table t1;
+drop table t2;
+
+
 #
 # Bug#12547: Inserting long string into varchar causes table crash in cp932
 #

--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -250,9 +250,16 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
  const uchar *a_end= a + a_length;
  const uchar *b_end= b + b_length;
  int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
+
+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+  diff_if_only_endspace_difference= 0;
+#endif
+
  if (!res && (a != a_end || b != b_end))
  {
-    int swap= 0;
+    int swap= 1;
+    if (diff_if_only_endspace_difference)
+      res= 1;                                   /* Assume 'a' is bigger */
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
@@ -263,11 +270,12 @@ static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
      a_end= b_end;
      a= b;
      swap= -1;				/* swap sign of result */
+      res= -res;
    }
    for (; a < a_end ; a++)
    {
-      if (*a != ' ')
-	return ((int) *a - (int) ' ') ^ swap;
+      if (*a != (uchar) ' ')
+	return (*a < (uchar) ' ') ? -swap : swap;
    }
  }
  return res;

--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2802,16 +2802,19 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs,
 static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, 
                                  const uchar *s, uint slen,
                                  const uchar *t, uint tlen,
-                                  my_bool diff_if_only_endspace_difference
-                                  __attribute__((unused)))
+                                  my_bool diff_if_only_endspace_difference)
 {
-  int s_res,t_res;
-  my_wc_t s_wc,t_wc;
-  const uchar *se= s+slen;
-  const uchar *te= t+tlen;
-  int save_diff = 0;
+  int s_res, t_res, res;
+  my_wc_t s_wc, t_wc;
+  const uchar *se= s + slen;
+  const uchar *te= t + tlen;
+  int save_diff= 0;
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;

+#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
+  diff_if_only_endspace_difference= 0;
+#endif
+    
  while ( s < se && t < te )
  {
    int plane;
@@ -2843,16 +2846,20 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
  
  slen= se-s;
  tlen= te-t;
+  res= 0;
  
  if (slen != tlen)
  {
-    int swap= 0;
+    int swap= 1;
+    if (diff_if_only_endspace_difference)
+      res= 1;                                   /* Assume 'a' is bigger */
    if (slen < tlen)
    {
      slen= tlen;
      s= t;
      se= te;
      swap= -1;
+      res= -res;
    }
    /*
      This following loop uses the fact that in UTF-8
@@ -2866,8 +2873,8 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
    */
    for ( ; s < se; s++)
    {
-      if (*s != ' ')
-        return ((int)*s -  (int) ' ') ^ swap;
+      if (*s != (uchar) ' ')
+        return (*s < (uchar) ' ') ? -swap : swap;
    }
  }
  return save_diff;