Backporting Bug#37129 LDML lacks <i> rule

7e8b208d · Alexander Barkov · aea5f15e · 7e8b208d · 7e8b208d · 7e8b208d
Commit 7e8b208d authored Nov 09, 2009 by Alexander Barkov
5 changed files
--- a/mysql-test/r/ctype_ldml.result
+++ b/mysql-test/r/ctype_ldml.result
--- a/mysql-test/std_data/Index.xml
+++ b/mysql-test/std_data/Index.xml
 <charsets>

  <charset name="utf8">
+    <collation name="utf8_phone_ci" id="352">
+      <rules>
+        <reset>\u0000</reset>
+          <i>\u0020</i> <!-- space -->
+          <i>\u0028</i> <!-- left parenthesis -->
+          <i>\u0029</i> <!-- right parenthesis -->
+          <i>\u002B</i> <!-- plus -->
+          <i>\u002D</i> <!-- hyphen -->
+      </rules>
+    </collation>
    <collation name="utf8_test_ci" id="353">
      <rules>
        <reset>a</reset>
-        <s>b</s>
+        <i>b</i>
      </rules>
    </collation>


--- a/mysql-test/t/ctype_ldml.test
+++ b/mysql-test/t/ctype_ldml.test
@@ -4,11 +4,30 @@
 drop table if exists t1;
 --enable_warnings

+--echo In the following tests we change the order of letter "b"
+--echo making it equal to letter "a", and check that it works
+--echo with all Unicode character sets
 set names utf8;

 --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
 show variables like 'character_sets_dir%';

+show collation like 'utf8_phone_ci';
+CREATE TABLE t1 (
+ name VARCHAR(64),
+ phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci
+);
+INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02');
+INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04');
+INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01');
+INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03');
+INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005');
+SELECT * FROM t1 ORDER BY phone;
+SELECT * FROM t1 WHERE phone='+7(912)800-80-01';
+SELECT * FROM t1 WHERE phone='79128008001';
+SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1';
+DROP TABLE t1;
+
 show collation like 'utf8_test_ci';
 create table t1 (c1 char(1) character set utf8 collate utf8_test_ci);
 insert into t1 values ('a');

--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7661,6 +7661,13 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
      goto ex;
    }
    
+    if (beg[0] == '=')
+    {
+      beg++;
+      rc= MY_COLL_LEXEM_DIFF;
+      goto ex;
+    }
+    
    if (beg[0] == '<')
    {
      for (beg++, lexem->diff= 1;
@@ -7821,6 +7828,10 @@ static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
          item.diff[1]= 0;
          item.diff[2]= 0;
        }
+        else if (lexem.diff == 0)
+        {
+          item.diff[0]= item.diff[1]= item.diff[2]= 0;
+        }
        if (nitems >= mitems)
        {
          my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");

--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -74,6 +74,7 @@ struct my_cs_file_section_st
 #define	_CS_DIFF1	19
 #define	_CS_DIFF2	20
 #define	_CS_DIFF3	21
+#define	_CS_IDENTICAL	22


 static struct my_cs_file_section_st sec[] =
@@ -108,6 +109,7 @@ static struct my_cs_file_section_st sec[] =
  {_CS_DIFF1,		"charsets/charset/collation/rules/p"},
  {_CS_DIFF2,		"charsets/charset/collation/rules/s"},
  {_CS_DIFF3,		"charsets/charset/collation/rules/t"},
+  {_CS_IDENTICAL,	"charsets/charset/collation/rules/i"},
  {0,	NULL}
 };

@@ -269,6 +271,7 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
  case _CS_DIFF1:
  case _CS_DIFF2:
  case _CS_DIFF3:
+  case _CS_IDENTICAL:
    {
      /*
        Convert collation description from
@@ -276,7 +279,7 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
        into ICU Collation Customization expression.
      */
      char arg[16];
-      const char *cmd[]= {"&","<","<<","<<<"};
+      const char *cmd[]= {"&","<","<<","<<<","="};
      i->cs.tailoring= i->tailoring;
      mstr(arg,attr,len,sizeof(arg)-1);
      if (i->tailoring_length + 20 < sizeof(i->tailoring))