Commit 34f8a407 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx

Synchronizing sources in:
- my_wildcmp_uca_impl() handling utf8_unicode_ci
- my_wildcmp_unicode_impl() handling utf8_general_ci

The latter has already had a fix for a similar MySQL bug in utf8_general_ci:
Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0
So fix is now propagated to utf8_unicode_ci.
parent ae3fe14c
...@@ -13571,5 +13571,26 @@ Warnings: ...@@ -13571,5 +13571,26 @@ Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'oe') and (`test`.`t1`.`a` = 'oe')) Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'oe') and (`test`.`t1`.`a` = 'oe'))
DROP TABLE t1; DROP TABLE t1;
# #
# MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
#
CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
INSERT INTO t1 VALUES ('radio! test');
SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
name
radio! test
ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
name
radio! test
DROP TABLE t1;
CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
INSERT INTO t1 VALUES ('radio! test');
SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
FROM t1;
c1 c2
1 1
DROP TABLE t1;
#
# End of MariaDB-10.0 tests # End of MariaDB-10.0 tests
# #
...@@ -617,6 +617,24 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' AND a='oe' COLLATE utf8_german2_c ...@@ -617,6 +617,24 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' AND a='oe' COLLATE utf8_german2_c
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci AND a='oe'; EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci AND a='oe';
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
--echo #
CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
INSERT INTO t1 VALUES ('radio! test');
SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
DROP TABLE t1;
CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
INSERT INTO t1 VALUES ('radio! test');
SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
FROM t1;
DROP TABLE t1;
--echo # --echo #
--echo # End of MariaDB-10.0 tests --echo # End of MariaDB-10.0 tests
--echo # --echo #
...@@ -21069,11 +21069,11 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs, ...@@ -21069,11 +21069,11 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
const char *wildstr,const char *wildend, const char *wildstr,const char *wildend,
int escape, int w_one, int w_many, int recurse_level) int escape, int w_one, int w_many, int recurse_level)
{ {
int result= -1; /* Not found, using wildcards */ int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc; my_wc_t s_wc, w_wc;
int scan; int scan;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
if (my_string_stack_guard && my_string_stack_guard(recurse_level)) if (my_string_stack_guard && my_string_stack_guard(recurse_level))
return 1; return 1;
while (wildstr != wildend) while (wildstr != wildend)
...@@ -21082,119 +21082,121 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs, ...@@ -21082,119 +21082,121 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
{ {
my_bool escaped= 0; my_bool escaped= 0;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0) (const uchar*)wildend)) <= 0)
return 1; return 1;
if (w_wc == (my_wc_t)w_many) if (w_wc == (my_wc_t) w_many)
{ {
result= 1; /* Found an anchor char */ result= 1; /* Found an anchor char */
break; break;
} }
wildstr+= scan; wildstr+= scan;
if (w_wc == (my_wc_t)escape) if (w_wc == (my_wc_t) escape && wildstr < wildend)
{ {
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0) (const uchar*)wildend)) <= 0)
return 1; return 1;
wildstr+= scan; wildstr+= scan;
escaped= 1; escaped= 1;
} }
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0) (const uchar*)str_end)) <= 0)
return 1; return 1;
str+= scan; str+= scan;
if (!escaped && w_wc == (my_wc_t)w_one) if (!escaped && w_wc == (my_wc_t) w_one)
{ {
result= 1; /* Found an anchor char */ result= 1; /* Found an anchor char */
} }
else else
{ {
if (my_uca_charcmp(cs,s_wc,w_wc)) if (my_uca_charcmp(cs,s_wc,w_wc))
return 1; return 1; /* No match */
} }
if (wildstr == wildend) if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */ return (str != str_end); /* Match if both are at end */
} }
if (w_wc == (my_wc_t) w_many)
if (w_wc == (my_wc_t)w_many) { /* Found w_many */
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */ /* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; ) for ( ; wildstr != wildend ; )
{ {
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0) (const uchar*)wildend)) <= 0)
return 1; return 1;
if (w_wc == (my_wc_t)w_many) if (w_wc == (my_wc_t) w_many)
{ {
wildstr+= scan; wildstr+= scan;
continue; continue;
} }
if (w_wc == (my_wc_t)w_one) if (w_wc == (my_wc_t) w_one)
{ {
wildstr+= scan; wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0) (const uchar*)str_end)) <= 0)
return 1; return 1;
str+= scan; str+= scan;
continue; continue;
} }
break; /* Not a wild character */ break; /* Not a wild character */
} }
if (wildstr == wildend) if (wildstr == wildend)
return 0; /* Ok if w_many is last */ return 0; /* Ok if w_many is last */
if (str == str_end) if (str == str_end)
return -1; return -1;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0) (const uchar*)wildend)) <= 0)
return 1; return 1;
wildstr+= scan;
if (w_wc == (my_wc_t)escape)
if (w_wc == (my_wc_t) escape)
{ {
wildstr+= scan; if (wildstr < wildend)
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, {
(const uchar*)wildend)) <= 0) if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
return 1; (const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
}
} }
while (1) while (1)
{ {
/* Skip until the first character from wildstr is found */ /* Skip until the first character from wildstr is found */
while (str != str_end) while (str != str_end)
{ {
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0) (const uchar*)str_end)) <= 0)
return 1; return 1;
if (!my_uca_charcmp(cs,s_wc,w_wc)) if (!my_uca_charcmp(cs,s_wc,w_wc))
break; break;
str+= scan; str+= scan;
} }
if (str == str_end) if (str == str_end)
return -1; return -1;
str+= scan;
result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, recurse_level+1); escape, w_one, w_many,
recurse_level + 1);
if (result <= 0) if (result <= 0)
return result; return result;
}
str+= scan;
}
} }
} }
return (str != str_end ? 1 : 0); return (str != str_end ? 1 : 0);
} }
int my_wildcmp_uca(CHARSET_INFO *cs, int my_wildcmp_uca(CHARSET_INFO *cs,
const char *str,const char *str_end, const char *str,const char *str_end,
const char *wildstr,const char *wildend, const char *wildstr,const char *wildend,
...@@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, ...@@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
int result= -1; /* Not found, using wildcards */ int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc; my_wc_t s_wc, w_wc;
int scan; int scan;
int (*mb_wc)(CHARSET_INFO *, my_wc_t *, my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *, const uchar *);
mb_wc= cs->cset->mb_wc;
if (my_string_stack_guard && my_string_stack_guard(recurse_level)) if (my_string_stack_guard && my_string_stack_guard(recurse_level))
return 1; return 1;
...@@ -4430,12 +4428,12 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, ...@@ -4430,12 +4428,12 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
wildstr+= scan; wildstr+= scan;
escaped= 1; escaped= 1;
} }
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0) (const uchar*)str_end)) <= 0)
return 1; return 1;
str+= scan; str+= scan;
if (!escaped && w_wc == (my_wc_t) w_one) if (!escaped && w_wc == (my_wc_t) w_one)
{ {
result= 1; /* Found an anchor char */ result= 1; /* Found an anchor char */
...@@ -4453,86 +4451,84 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs, ...@@ -4453,86 +4451,84 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
if (wildstr == wildend) if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */ return (str != str_end); /* Match if both are at end */
} }
if (w_wc == (my_wc_t) w_many) if (w_wc == (my_wc_t) w_many)
{ /* Found w_many */ { /* Found w_many */
/* Remove any '%' and '_' from the wild search string */ /* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; ) for ( ; wildstr != wildend ; )
{ {
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0) (const uchar*)wildend)) <= 0)
return 1; return 1;
if (w_wc == (my_wc_t)w_many) if (w_wc == (my_wc_t) w_many)
{ {
wildstr+= scan; wildstr+= scan;
continue; continue;
} }
if (w_wc == (my_wc_t)w_one) if (w_wc == (my_wc_t) w_one)
{ {
wildstr+= scan; wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <=0) (const uchar*)str_end)) <= 0)
return 1; return 1;
str+= scan; str+= scan;
continue; continue;
} }
break; /* Not a wild character */ break; /* Not a wild character */
} }
if (wildstr == wildend) if (wildstr == wildend)
return 0; /* Ok if w_many is last */ return 0; /* Ok if w_many is last */
if (str == str_end) if (str == str_end)
return -1; return -1;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <=0) (const uchar*)wildend)) <= 0)
return 1; return 1;
wildstr+= scan; wildstr+= scan;
if (w_wc == (my_wc_t)escape) if (w_wc == (my_wc_t) escape)
{ {
if (wildstr < wildend) if (wildstr < wildend)
{ {
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <=0) (const uchar*)wildend)) <= 0)
return 1; return 1;
wildstr+= scan; wildstr+= scan;
} }
} }
while (1) while (1)
{ {
/* Skip until the first character from wildstr is found */ /* Skip until the first character from wildstr is found */
while (str != str_end) while (str != str_end)
{ {
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <=0) (const uchar*)str_end)) <= 0)
return 1; return 1;
if (weights) if (weights)
{ {
my_tosort_unicode(weights, &s_wc, cs->state); my_tosort_unicode(weights, &s_wc, cs->state);
my_tosort_unicode(weights, &w_wc, cs->state); my_tosort_unicode(weights, &w_wc, cs->state);
} }
if (s_wc == w_wc) if (s_wc == w_wc)
break; break;
str+= scan; str+= scan;
} }
if (str == str_end) if (str == str_end)
return -1; return -1;
str+= scan; str+= scan;
result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend, result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, escape, w_one, w_many,
weights, recurse_level + 1); weights, recurse_level + 1);
if (result <= 0) if (result <= 0)
return result; return result;
} }
} }
} }
return (str != str_end ? 1 : 0); return (str != str_end ? 1 : 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment