Commit e3b94d4e authored by bar@mysql.com's avatar bar@mysql.com

Bug #6737: REGEXP gives wrong result with case sensitive collation:

- A new flag MY_CS_CSSORT was introduced for case sensitivity.
- Item_func_regexp doesn't substiture ICASE not only
  for binary collations but for case sensitive collations as well. 
parent 7f5661ae
......@@ -63,7 +63,7 @@ typedef struct unicase_info_st
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CHARSET_UNDEFINED 0
......
......@@ -296,3 +296,12 @@ FD C3BD FD 1
FE C3BE FE 1
FF C3BF FF 1
DROP TABLE t1;
select 'a' regexp 'A' collate latin1_general_ci;
'a' regexp 'A' collate latin1_general_ci
1
select 'a' regexp 'A' collate latin1_general_cs;
'a' regexp 'A' collate latin1_general_cs
0
select 'a' regexp 'A' collate latin1_bin;
'a' regexp 'A' collate latin1_bin
0
......@@ -53,3 +53,10 @@ SELECT
hex(@l:=convert(@u using latin1)),
a=@l FROM t1;
DROP TABLE t1;
#
# Bug #6737: REGEXP gives wrong result with case sensitive collation
#
select 'a' regexp 'A' collate latin1_general_ci;
select 'a' regexp 'A' collate latin1_general_cs;
select 'a' regexp 'A' collate latin1_bin;
......@@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
}
else
{
uchar *sort_order= all_charsets[cs->number]->sort_order;
simple_cs_init_functions(all_charsets[cs->number]);
new->mbminlen= 1;
new->mbmaxlen= 1;
......@@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
all_charsets[cs->number]->state |= MY_CS_LOADED;
}
all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
/*
Check if case sensitive sort order: A < a < B.
We need MY_CS_FLAG for regex library, and for
case sensitivity flag for 5.0 client protocol,
to support isCaseSensitive() method in JDBC driver
*/
if (sort_order && sort_order['A'] < sort_order['a'] &&
sort_order['a'] < sort_order['B'])
all_charsets[cs->number]->state|= MY_CS_CSSORT;
}
}
else
......
......@@ -2364,8 +2364,9 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
return 0;
}
int error;
if ((error=regcomp(&preg,res->c_ptr(),
(cmp_collation.collation->state & MY_CS_BINSORT) ?
if ((error= regcomp(&preg,res->c_ptr(),
((cmp_collation.collation->state & MY_CS_BINSORT) ||
(cmp_collation.collation->state & MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation)))
......@@ -2416,7 +2417,8 @@ longlong Item_func_regex::val_int()
regex_compiled=0;
}
if (regcomp(&preg,res2->c_ptr(),
(cmp_collation.collation->state & MY_CS_BINSORT) ?
((cmp_collation.collation->state & MY_CS_BINSORT) ||
(cmp_collation.collation->state & MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation))
......
......@@ -590,7 +590,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
CHARSET_INFO my_charset_latin2_czech_ci =
{
2,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
"latin2", /* cs name */
"latin2_czech_cs", /* name */
"", /* comment */
......
......@@ -625,7 +625,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
CHARSET_INFO my_charset_cp1250_czech_ci =
{
34,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
"cp1250", /* cs name */
"cp1250_czech_cs", /* name */
"", /* comment */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment