Merge tulin@bk-internal.mysql.com:/home/bk/mysql-4.1

into poseidon.ndb.mysql.com:/home/tomas/mysql-4.1

Merge tulin@bk-internal.mysql.com:/home/bk/mysql-4.1
into poseidon.ndb.mysql.com:/home/tomas/mysql-4.1
656d21a7 · unknown · 3c9d7379 · f9300c29 · 656d21a7 · 656d21a7
Commit 656d21a7 authored Oct 21, 2004 by unknown
4 changed files
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
 DROP TABLE IF EXISTS t1;
 set names utf8;
+set collation_connection=utf8_unicode_ci;
+select 'a' = 'a', 'a' = 'a ', 'a ' = 'a';
+'a' = 'a'	'a' = 'a '	'a ' = 'a'
+1	1	1
+select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a';
+'a\t' = 'a'	'a\t' < 'a'	'a\t' > 'a'
+0	1	0
+select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a ';
+'a\t' = 'a '	'a\t' < 'a '	'a\t' > 'a '
+0	1	0
+select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t';
+'a' = 'a\t'	'a' < 'a\t'	'a' > 'a\t'
+0	0	1
+select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t';
+'a ' = 'a\t'	'a ' < 'a\t'	'a ' > 'a\t'
+0	0	1
+select 'a  a' > 'a', 'a  \t' < 'a';
+'a  a' > 'a'	'a  \t' < 'a'
+1	1
+CREATE TABLE t (
+c char(20) NOT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
+INSERT INTO t VALUES ('a'),('ab'),('aba');
+ALTER TABLE t ADD INDEX (c);
+SELECT c FROM t WHERE c LIKE 'a%';
+c
+a
+ab
+aba
+DROP TABLE t;
 create table t1 (c1 char(10) character set utf8 collate utf8_bin);
 insert into t1 values ('A'),('a');
 insert into t1 values ('B'),('b');

--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@@ -7,8 +7,35 @@ DROP TABLE IF EXISTS t1;
 #
 # Test Unicode collations.
 #
-
 set names utf8; 
+
+#
+# Check trailing spaces
+#
+set collation_connection=utf8_unicode_ci;
+
+select 'a' = 'a', 'a' = 'a ', 'a ' = 'a';
+
+select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a';
+select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a ';
+
+select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t';
+select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t';
+
+select 'a  a' > 'a', 'a  \t' < 'a';
+
+#
+# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
+#
+CREATE TABLE t (
+  c char(20) NOT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
+INSERT INTO t VALUES ('a'),('ab'),('aba');
+ALTER TABLE t ADD INDEX (c);
+SELECT c FROM t WHERE c LIKE 'a%';
+#should find 3 rows but only found 2
+DROP TABLE t;
+
 create table t1 (c1 char(10) character set utf8 collate utf8_bin);

 #

--- a/strings/CHARSET_INFO.txt
+++ b/strings/CHARSET_INFO.txt
@@ -74,7 +74,16 @@ Conversion tables
  ctype      - pointer to array[257] of "type of characters"
               bit mask for each chatacter, e.g. if a 
               character is a digit or a letter or a separator, etc.
-  to_lower   - pointer to arrat[256] used in LCASE()
+
+               Monty 2004-10-21:
+                 If you look at the macros, we use ctype[(char)+1].
+                 ctype[0] is traditionally in most ctype libraries
+                 reserved for EOF (-1). The idea is that you can use
+                 the result from fgetc() directly with ctype[]. As
+                 we have to be compatible with external ctype[] versions,
+                 it's better to do it the same way as they do...
+
+  to_lower   - pointer to array[256] used in LCASE()
  to_upper   - pointer to array[256] used in UCASE()
  sort_order - pointer to array[256] used for strings comparison

@@ -137,7 +146,7 @@ following set of functions:
 Multibyte routines
 ------------------
 ismbchar()  - detects if the given string is a multibyte sequence
-mbcharlen() - retuturns length of multibyte sequence starting with
+mbcharlen() - returns length of multibyte sequence starting with
              the given character
 numchars()  - returns number of characters in the given string, e.g.
              in SQL function CHAR_LENGTH().

--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7053,6 +7053,28 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
    Works exactly the same with my_strnncoll_uca(),
    but ignores trailing spaces.

+    In the while() comparison these situations are possible:
+    1. (s_res>0) and (t_res>0) and (s_res == t_res)
+       Weights are the same so far, continue comparison
+    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+       A difference has been found, return.
+    3. (s_res>0) and (t_res<0)
+       We have reached the end of the second string, or found
+       an illegal multibyte sequence in the second string.
+       Compare the first string to an infinite array of
+       space characters until difference is found, or until
+       the end of the first string.
+    4. (s_res<0) and (t_res>0)   
+       We have reached the end of the first string, or found
+       an illegal multibyte sequence in the first string.
+       Compare the second string to an infinite array of
+       space characters until difference is found or until
+       the end of the second steing.
+    5. (s_res<0) and (t_res<0)
+       Both scanners returned -1. It means we have riched
+       the end-of-string of illegal-sequence in both strings
+       at the same time. Return 0, strings are equal.
+  
  RETURN
    Difference between two strings, according to the collation:
    0               - means strings are equal
@@ -7070,9 +7092,6 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
  int s_res;
  int t_res;
  
-  slen= cs->cset->lengthsp(cs, (char*) s, slen);
-  tlen= cs->cset->lengthsp(cs, (char*) t, tlen);
-  
  scanner_handler->init(&sscanner, cs, s, slen);
  scanner_handler->init(&tscanner, cs, t, tlen);
  
@@ -7080,6 +7099,37 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
  {
    s_res= scanner_handler->next(&sscanner);
    t_res= scanner_handler->next(&tscanner);
+
+    if (s_res > 0 && t_res < 0)
+    { 
+      /* Calculate weight for SPACE character */
+      t_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]];
+      
+      /* compare the first string to spaces */
+      do
+      {
+        if (s_res != t_res)
+          return (s_res - t_res);
+        s_res= scanner_handler->next(&sscanner);
+      } while (s_res > 0);
+      return 0;
+    }
+    
+    if (s_res < 0 && t_res > 0)
+    {
+      /* Calculate weight for SPACE character */
+      s_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]];
+      
+      /* compare the second string to spaces */
+      do
+      {
+        if (s_res != t_res)
+          return (s_res - t_res);
+        t_res= scanner_handler->next(&tscanner);
+      } while (t_res > 0);
+      return 0;
+    }
+    
  } while ( s_res == t_res && s_res >0);
  
  return ( s_res - t_res );