Commit 5267ec8a authored by unknown's avatar unknown

Bug #6040 can't retrieve records with umlaut characters in case insensitive manner

parent 2310f00a
...@@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *, ...@@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *,
const char *s, uint s_length, const char *s, uint s_length,
my_match_t *match, uint nmatch); my_match_t *match, uint nmatch);
int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO **weights);
extern my_bool my_parse_charset_xml(const char *bug, uint len, extern my_bool my_parse_charset_xml(const char *bug, uint len,
int (*add)(CHARSET_INFO *cs)); int (*add)(CHARSET_INFO *cs));
......
...@@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin; ...@@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%') _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1 1
select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8);
convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
SELECT 'a' = 'a '; SELECT 'a' = 'a ';
'a' = 'a ' 'a' = 'a '
1 1
......
...@@ -33,6 +33,14 @@ select 'A' like 'a'; ...@@ -33,6 +33,14 @@ select 'A' like 'a';
select 'A' like 'a' collate utf8_bin; select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
# Bug #6040: can't retrieve records with umlaut
# characters in case insensitive manner.
# Case insensitive search LIKE comparison
# was broken for multibyte characters:
select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8);
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
# #
# Check the following: # Check the following:
# "a" == "a " # "a" == "a "
......
...@@ -1231,172 +1231,14 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), ...@@ -1231,172 +1231,14 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
} }
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static
int my_wildcmp_ucs2(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO **weights)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan, plane;
while (wildstr != wildend)
{
while (1)
{
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
(const uchar*)wildend);
if (scan <= 0)
return 1;
if (w_wc == (my_wc_t)escape)
{
wildstr+= scan;
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
(const uchar*)wildend);
if (scan <= 0)
return 1;
}
if (w_wc == (my_wc_t)w_many)
{
result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
if (scan <=0)
return 1;
str+= scan;
if (w_wc == (my_wc_t)w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (weights)
{
plane=(s_wc>>8) & 0xFF;
s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
plane=(w_wc>>8) & 0xFF;
w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
}
if (s_wc != w_wc)
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t)w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
(const uchar*)wildend);
if (scan <= 0)
return 1;
if (w_wc == (my_wc_t)w_many)
{
wildstr+= scan;
continue;
}
if (w_wc == (my_wc_t)w_one)
{
wildstr+= scan;
scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end);
if (scan <=0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
(const uchar*)wildend);
if (scan <= 0)
return 1;
if (w_wc == (my_wc_t)escape)
{
wildstr+= scan;
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr,
(const uchar*)wildend);
if (scan <= 0)
return 1;
}
while (1)
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str,
(const uchar*)str_end);
if (scan <= 0)
return 1;
if (weights)
{
plane=(s_wc>>8) & 0xFF;
s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
plane=(w_wc>>8) & 0xFF;
w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
}
if (s_wc == w_wc)
break;
str+= scan;
}
if (str == str_end)
return -1;
result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
w_one,w_many,weights);
if (result <= 0)
return result;
str+= scan;
}
}
}
return (str != str_end ? 1 : 0);
}
static static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end, const char *str,const char *str_end,
const char *wildstr,const char *wildend, const char *wildstr,const char *wildend,
int escape, int w_one, int w_many) int escape, int w_one, int w_many)
{ {
return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane); escape,w_one,w_many,uni_plane);
} }
...@@ -1406,8 +1248,8 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, ...@@ -1406,8 +1248,8 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const char *wildstr,const char *wildend, const char *wildstr,const char *wildend,
int escape, int w_one, int w_many) int escape, int w_one, int w_many)
{ {
return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend, return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,NULL); escape,w_one,w_many,NULL);
} }
......
...@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={ ...@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={
}; };
/*
** Compare string against string with wildcard
** This function is used in UTF8 and UCS2
**
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO **weights)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan, plane;
int (*mb_wc)(struct charset_info_st *cs, my_wc_t *wc,
const unsigned char *s,const unsigned char *e);
mb_wc= cs->cset->mb_wc;
while (wildstr != wildend)
{
while (1)
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t)escape)
{
wildstr+= scan;
if ((scan= mb_wc(cs,&w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
}
if (w_wc == (my_wc_t)w_many)
{
result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <=0)
return 1;
str+= scan;
if (w_wc == (my_wc_t)w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (weights)
{
plane=(s_wc>>8) & 0xFF;
s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
plane=(w_wc>>8) & 0xFF;
w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
}
if (s_wc != w_wc)
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t)w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t)w_many)
{
wildstr+= scan;
continue;
}
if (w_wc == (my_wc_t)w_one)
{
wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <=0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <=0)
return 1;
if (w_wc == (my_wc_t)escape)
{
wildstr+= scan;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <=0)
return 1;
}
while (1)
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <=0)
return 1;
if (weights)
{
plane=(s_wc>>8) & 0xFF;
s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
plane=(w_wc>>8) & 0xFF;
w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
}
if (s_wc == w_wc)
break;
str+= scan;
}
if (str == str_end)
return -1;
result= my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many,
weights);
if (result <= 0)
return result;
str+= scan;
}
}
}
return (str != str_end ? 1 : 0);
}
#endif #endif
...@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) ...@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
return my_strncasecmp_utf8(cs, s, t, len); return my_strncasecmp_utf8(cs, s, t, len);
} }
static
int my_wildcmp_utf8(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
static int my_strnxfrm_utf8(CHARSET_INFO *cs, static int my_strnxfrm_utf8(CHARSET_INFO *cs,
uchar *dst, uint dstlen, uchar *dst, uint dstlen,
const uchar *src, uint srclen) const uchar *src, uint srclen)
...@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = ...@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncollsp_utf8, my_strnncollsp_utf8,
my_strnxfrm_utf8, my_strnxfrm_utf8,
my_like_range_mb, my_like_range_mb,
my_wildcmp_mb, my_wildcmp_utf8,
my_strcasecmp_utf8, my_strcasecmp_utf8,
my_instr_mb, my_instr_mb,
my_hash_sort_utf8 my_hash_sort_utf8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment