MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx

Synchronizing sources in: - my_wildcmp_uca_impl() handling utf8_unicode_ci - my_wildcmp_unicode_impl() handling utf8_general_ci The latter has already had a fix for a similar MySQL bug in utf8_general_ci: Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0 So fix is now propagated to utf8_unicode_ci.

MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
Synchronizing sources in: - my_wildcmp_uca_impl() handling utf8_unicode_ci - my_wildcmp_unicode_impl() handling utf8_general_ci The latter has already had a fix for a similar MySQL bug in utf8_general_ci: Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0 So fix is now propagated to utf8_unicode_ci.
34f8a407 · Alexander Barkov · ae3fe14c · 34f8a407 · 34f8a407 · 34f8a407
Commit 34f8a407 authored Oct 15, 2018 by Alexander Barkov
4 changed files
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@@ -13571,5 +13571,26 @@ Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'oe') and (`test`.`t1`.`a` = 'oe'))
 DROP TABLE t1;
 #
+# MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
+#
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+name
+radio! test
+ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+name
+radio! test
+DROP TABLE t1;
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
+name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
+FROM t1;
+c1	c2
+1	1
+DROP TABLE t1;
+#
 # End of MariaDB-10.0 tests
 #
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@@ -617,6 +617,24 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' AND a='oe' COLLATE utf8_german2_c
 EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci AND a='oe';
 DROP TABLE t1;
+--echo #
+--echo # MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
+--echo #
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+DROP TABLE t1;
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
+       name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
+FROM t1;
+DROP TABLE t1;
 --echo #
 --echo # End of MariaDB-10.0 tests
 --echo #
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -21069,11 +21069,11 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
                        const char *wildstr,const char *wildend,
                        int escape, int w_one, int w_many, int recurse_level)
 {
-  int result= -1;			/* Not found, using wildcards */
+  int result= -1;                             /* Not found, using wildcards */
  my_wc_t s_wc, w_wc;
  int scan;
  my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
    return 1;
  while (wildstr != wildend)
@@ -21082,119 +21082,121 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
    {
      my_bool escaped= 0;
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-		       (const uchar*)wildend)) <= 0)
+                       (const uchar*)wildend)) <= 0)
-	return 1;
+        return 1;
-      if (w_wc == (my_wc_t)w_many)
+      if (w_wc == (my_wc_t) w_many)
      {
-        result= 1;				/* Found an anchor char */
+        result= 1;                                /* Found an anchor char */
        break;
      }
      wildstr+= scan;
-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape && wildstr < wildend)
      {
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-			(const uchar*)wildend)) <= 0)
+                         (const uchar*)wildend)) <= 0)
          return 1;
        wildstr+= scan;
        escaped= 1;
      }
      if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-      		       (const uchar*)str_end)) <= 0)
+                       (const uchar*)str_end)) <= 0)
        return 1;
      str+= scan;
-      if (!escaped && w_wc == (my_wc_t)w_one)
+      if (!escaped && w_wc == (my_wc_t) w_one)
      {
-        result= 1;				/* Found an anchor char */
+        result= 1;                                /* Found an anchor char */
      }
      else
      {
        if (my_uca_charcmp(cs,s_wc,w_wc))
-          return 1;
+          return 1;                               /* No match */
      }
      if (wildstr == wildend)
-	return (str != str_end);		/* Match if both are at end */
+        return (str != str_end);                  /* Match if both are at end */
    }
+    if (w_wc == (my_wc_t) w_many)
-    if (w_wc == (my_wc_t)w_many)
+    {                                             /* Found w_many */
-    {						/* Found w_many */
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-			 (const uchar*)wildend)) <= 0)
+                         (const uchar*)wildend)) <= 0)
          return 1;
-	if (w_wc == (my_wc_t)w_many)
+        if (w_wc == (my_wc_t) w_many)
-	{
+        {
-	  wildstr+= scan;
+          wildstr+= scan;
-	  continue;
+          continue;
-	} 
+        }
-	if (w_wc == (my_wc_t)w_one)
+        if (w_wc == (my_wc_t) w_one)
-	{
+        {
-	  wildstr+= scan;
+          wildstr+= scan;
-	  if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
+          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-			   (const uchar*)str_end)) <= 0)
+                           (const uchar*)str_end)) <= 0)
            return 1;
          str+= scan;
-	  continue;
+          continue;
-	}
+        }
-	break;					/* Not a wild character */
+        break;                                        /* Not a wild character */
      }
      if (wildstr == wildend)
-	return 0;				/* Ok if w_many is last */
+        return 0;                                /* Ok if w_many is last */
      if (str == str_end)
-	return -1;
+        return -1;
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-		       (const uchar*)wildend)) <= 0)
+                       (const uchar*)wildend)) <= 0)
        return 1;
+      wildstr+= scan;
-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape)
      {
-        wildstr+= scan;
+        if (wildstr < wildend)
-        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
+        {
-			 (const uchar*)wildend)) <= 0)
+          if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-          return 1;
+                           (const uchar*)wildend)) <= 0)
+            return 1;
+          wildstr+= scan;
+        }
      }
      while (1)
      {
        /* Skip until the first character from wildstr is found */
        while (str != str_end)
        {
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-			   (const uchar*)str_end)) <= 0)
+                           (const uchar*)str_end)) <= 0)
            return 1;
          if (!my_uca_charcmp(cs,s_wc,w_wc))
            break;
          str+= scan;
        }
        if (str == str_end)
          return -1;
+        str+= scan;
        result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend,
-                                    escape, w_one, w_many, recurse_level+1);
+                                    escape, w_one, w_many,
+                                    recurse_level + 1);
        if (result <= 0)
          return result;
+      }
-        str+= scan;
-      } 
    }
  }
  return (str != str_end ? 1 : 0);
 }
 int my_wildcmp_uca(CHARSET_INFO *cs,
                   const char *str,const char *str_end,
                   const char *wildstr,const char *wildend,
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
  int result= -1;                             /* Not found, using wildcards */
  my_wc_t s_wc, w_wc;
  int scan;
-  int (*mb_wc)(CHARSET_INFO *, my_wc_t *,
+  my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
-               const uchar *, const uchar *);
-  mb_wc= cs->cset->mb_wc;
  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
    return 1;
@@ -4430,12 +4428,12 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
        wildstr+= scan;
        escaped= 1;
      }
      if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
                       (const uchar*)str_end)) <= 0)
        return 1;
      str+= scan;
      if (!escaped && w_wc == (my_wc_t) w_one)
      {
        result= 1;                                /* Found an anchor char */
@@ -4453,86 +4451,84 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
      if (wildstr == wildend)
        return (str != str_end);                  /* Match if both are at end */
    }
    if (w_wc == (my_wc_t) w_many)
    {                                             /* Found w_many */
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                         (const uchar*)wildend)) <= 0)
          return 1;
-        if (w_wc == (my_wc_t)w_many)
+        if (w_wc == (my_wc_t) w_many)
        {
          wildstr+= scan;
          continue;
        } 
-        if (w_wc == (my_wc_t)w_one)
+        if (w_wc == (my_wc_t) w_one)
        {
          wildstr+= scan;
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-                           (const uchar*)str_end)) <=0)
+                           (const uchar*)str_end)) <= 0)
            return 1;
          str+= scan;
          continue;
        }
        break;                                        /* Not a wild character */
      }
      if (wildstr == wildend)
        return 0;                                /* Ok if w_many is last */
      if (str == str_end)
        return -1;
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-                       (const uchar*)wildend)) <=0)
+                       (const uchar*)wildend)) <= 0)
        return 1;
      wildstr+= scan;
-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape)
      {
        if (wildstr < wildend)
        {
          if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-                           (const uchar*)wildend)) <=0)
+                           (const uchar*)wildend)) <= 0)
            return 1;
          wildstr+= scan;
        }
      }
      while (1)
      {
        /* Skip until the first character from wildstr is found */
        while (str != str_end)
        {
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-                           (const uchar*)str_end)) <=0)
+                           (const uchar*)str_end)) <= 0)
            return 1;
          if (weights)
          {
            my_tosort_unicode(weights, &s_wc, cs->state);
            my_tosort_unicode(weights, &w_wc, cs->state);
          }
          if (s_wc == w_wc)
            break;
          str+= scan;
        }
        if (str == str_end)
          return -1;
        str+= scan;
        result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend,
                                        escape, w_one, w_many,
                                        weights, recurse_level + 1);
        if (result <= 0)
          return result;
-      } 
+      }
    }
  }
  return (str != str_end ? 1 : 0);