Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
1596d465
Commit
1596d465
authored
Oct 18, 2004
by
bar@mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Bug #6040 can't retrieve records with umlaut characters in case insensitive manner
parent
a95c1173
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
193 additions
and
163 deletions
+193
-163
include/m_ctype.h
include/m_ctype.h
+5
-0
mysql-test/r/ctype_utf8.result
mysql-test/r/ctype_utf8.result
+9
-0
mysql-test/t/ctype_utf8.test
mysql-test/t/ctype_utf8.test
+8
-0
strings/ctype-ucs2.c
strings/ctype-ucs2.c
+4
-162
strings/ctype-utf8.c
strings/ctype-utf8.c
+167
-1
No files found.
include/m_ctype.h
View file @
1596d465
...
...
@@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *,
const
char
*
s
,
uint
s_length
,
my_match_t
*
match
,
uint
nmatch
);
int
my_wildcmp_unicode
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
,
MY_UNICASE_INFO
**
weights
);
extern
my_bool
my_parse_charset_xml
(
const
char
*
bug
,
uint
len
,
int
(
*
add
)(
CHARSET_INFO
*
cs
));
...
...
mysql-test/r/ctype_utf8.result
View file @
1596d465
...
...
@@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1
select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8);
convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
SELECT 'a' = 'a ';
'a' = 'a '
1
...
...
mysql-test/t/ctype_utf8.test
View file @
1596d465
...
...
@@ -33,6 +33,14 @@ select 'A' like 'a';
select
'A'
like
'a'
collate
utf8_bin
;
select
_utf8
0xD0B0D0B1D0B2
like
concat
(
_utf8
'%'
,
_utf8
0xD0B1
,
_utf8
'%'
);
# Bug #6040: can't retrieve records with umlaut
# characters in case insensitive manner.
# Case insensitive search LIKE comparison
# was broken for multibyte characters:
select
convert
(
_latin1
'Gnter Andr'
using
utf8
)
like
CONVERT
(
_latin1
'GNTER%'
USING
utf8
);
select
CONVERT
(
_koi8r
''
USING
utf8
)
LIKE
CONVERT
(
_koi8r
''
USING
utf8
);
select
CONVERT
(
_koi8r
''
USING
utf8
)
LIKE
CONVERT
(
_koi8r
''
USING
utf8
);
#
# Check the following:
# "a" == "a "
...
...
strings/ctype-ucs2.c
View file @
1596d465
...
...
@@ -1231,172 +1231,14 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static
int
my_wildcmp_ucs2
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
,
MY_UNICASE_INFO
**
weights
)
{
int
result
=
-
1
;
/* Not found, using wildcards */
my_wc_t
s_wc
,
w_wc
;
int
scan
,
plane
;
while
(
wildstr
!=
wildend
)
{
while
(
1
)
{
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
result
=
1
;
/* Found an anchor char */
break
;
}
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
);
if
(
scan
<=
0
)
return
1
;
str
+=
scan
;
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
result
=
1
;
/* Found an anchor char */
}
else
{
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
!=
w_wc
)
return
1
;
/* No match */
}
if
(
wildstr
==
wildend
)
return
(
str
!=
str_end
);
/* Match if both are at end */
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
/* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for
(
;
wildstr
!=
wildend
;
)
{
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
wildstr
+=
scan
;
continue
;
}
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
);
if
(
scan
<=
0
)
return
1
;
str
+=
scan
;
continue
;
}
break
;
/* Not a wild character */
}
if
(
wildstr
==
wildend
)
return
0
;
/* Ok if w_many is last */
if
(
str
==
str_end
)
return
-
1
;
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
}
while
(
1
)
{
/* Skip until the first character from wildstr is found */
while
(
str
!=
str_end
)
{
scan
=
my_ucs2_uni
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
);
if
(
scan
<=
0
)
return
1
;
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
==
w_wc
)
break
;
str
+=
scan
;
}
if
(
str
==
str_end
)
return
-
1
;
result
=
my_wildcmp_ucs2
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
weights
);
if
(
result
<=
0
)
return
result
;
str
+=
scan
;
}
}
}
return
(
str
!=
str_end
?
1
:
0
);
}
static
int
my_wildcmp_ucs2_ci
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
)
{
return
my_wildcmp_u
cs2
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
uni_plane
);
return
my_wildcmp_u
nicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
uni_plane
);
}
...
...
@@ -1406,8 +1248,8 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
)
{
return
my_wildcmp_u
cs2
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
NULL
);
return
my_wildcmp_u
nicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
NULL
);
}
...
...
strings/ctype-utf8.c
View file @
1596d465
...
...
@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={
};
/*
** Compare string against string with wildcard
** This function is used in UTF8 and UCS2
**
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
int
my_wildcmp_unicode
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
,
MY_UNICASE_INFO
**
weights
)
{
int
result
=
-
1
;
/* Not found, using wildcards */
my_wc_t
s_wc
,
w_wc
;
int
scan
,
plane
;
int
(
*
mb_wc
)(
struct
charset_info_st
*
cs
,
my_wc_t
*
wc
,
const
unsigned
char
*
s
,
const
unsigned
char
*
e
);
mb_wc
=
cs
->
cset
->
mb_wc
;
while
(
wildstr
!=
wildend
)
{
while
(
1
)
{
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
result
=
1
;
/* Found an anchor char */
break
;
}
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
))
<=
0
)
return
1
;
str
+=
scan
;
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
result
=
1
;
/* Found an anchor char */
}
else
{
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
!=
w_wc
)
return
1
;
/* No match */
}
if
(
wildstr
==
wildend
)
return
(
str
!=
str_end
);
/* Match if both are at end */
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
/* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for
(
;
wildstr
!=
wildend
;
)
{
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
wildstr
+=
scan
;
continue
;
}
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
))
<=
0
)
return
1
;
str
+=
scan
;
continue
;
}
break
;
/* Not a wild character */
}
if
(
wildstr
==
wildend
)
return
0
;
/* Ok if w_many is last */
if
(
str
==
str_end
)
return
-
1
;
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
}
while
(
1
)
{
/* Skip until the first character from wildstr is found */
while
(
str
!=
str_end
)
{
if
((
scan
=
mb_wc
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
))
<=
0
)
return
1
;
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
==
w_wc
)
break
;
str
+=
scan
;
}
if
(
str
==
str_end
)
return
-
1
;
result
=
my_wildcmp_unicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
weights
);
if
(
result
<=
0
)
return
result
;
str
+=
scan
;
}
}
}
return
(
str
!=
str_end
?
1
:
0
);
}
#endif
...
...
@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
return
my_strncasecmp_utf8
(
cs
,
s
,
t
,
len
);
}
static
int
my_wildcmp_utf8
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
)
{
return
my_wildcmp_unicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
uni_plane
);
}
static
int
my_strnxfrm_utf8
(
CHARSET_INFO
*
cs
,
uchar
*
dst
,
uint
dstlen
,
const
uchar
*
src
,
uint
srclen
)
...
...
@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncollsp_utf8
,
my_strnxfrm_utf8
,
my_like_range_mb
,
my_wildcmp_
mb
,
my_wildcmp_
utf8
,
my_strcasecmp_utf8
,
my_instr_mb
,
my_hash_sort_utf8
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment