Commit e3b94d4e authored by bar@mysql.com's avatar bar@mysql.com

Bug #6737: REGEXP gives wrong result with case sensitive collation:

- A new flag MY_CS_CSSORT was introduced for case sensitivity.
- Item_func_regexp doesn't substiture ICASE not only
  for binary collations but for case sensitive collations as well. 
parent 7f5661ae
...@@ -63,7 +63,7 @@ typedef struct unicase_info_st ...@@ -63,7 +63,7 @@ typedef struct unicase_info_st
#define MY_CS_UNICODE 128 /* is a charset is full unicode */ #define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_READY 256 /* if a charset is initialized */ #define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CHARSET_UNDEFINED 0 #define MY_CHARSET_UNDEFINED 0
......
...@@ -296,3 +296,12 @@ FD C3BD FD 1 ...@@ -296,3 +296,12 @@ FD C3BD FD 1
FE C3BE FE 1 FE C3BE FE 1
FF C3BF FF 1 FF C3BF FF 1
DROP TABLE t1; DROP TABLE t1;
select 'a' regexp 'A' collate latin1_general_ci;
'a' regexp 'A' collate latin1_general_ci
1
select 'a' regexp 'A' collate latin1_general_cs;
'a' regexp 'A' collate latin1_general_cs
0
select 'a' regexp 'A' collate latin1_bin;
'a' regexp 'A' collate latin1_bin
0
...@@ -53,3 +53,10 @@ SELECT ...@@ -53,3 +53,10 @@ SELECT
hex(@l:=convert(@u using latin1)), hex(@l:=convert(@u using latin1)),
a=@l FROM t1; a=@l FROM t1;
DROP TABLE t1; DROP TABLE t1;
#
# Bug #6737: REGEXP gives wrong result with case sensitive collation
#
select 'a' regexp 'A' collate latin1_general_ci;
select 'a' regexp 'A' collate latin1_general_cs;
select 'a' regexp 'A' collate latin1_bin;
...@@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs) ...@@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
} }
else else
{ {
uchar *sort_order= all_charsets[cs->number]->sort_order;
simple_cs_init_functions(all_charsets[cs->number]); simple_cs_init_functions(all_charsets[cs->number]);
new->mbminlen= 1; new->mbminlen= 1;
new->mbmaxlen= 1; new->mbmaxlen= 1;
...@@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs) ...@@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
all_charsets[cs->number]->state |= MY_CS_LOADED; all_charsets[cs->number]->state |= MY_CS_LOADED;
} }
all_charsets[cs->number]->state|= MY_CS_AVAILABLE; all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
/*
Check if case sensitive sort order: A < a < B.
We need MY_CS_FLAG for regex library, and for
case sensitivity flag for 5.0 client protocol,
to support isCaseSensitive() method in JDBC driver
*/
if (sort_order && sort_order['A'] < sort_order['a'] &&
sort_order['a'] < sort_order['B'])
all_charsets[cs->number]->state|= MY_CS_CSSORT;
} }
} }
else else
......
...@@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref) ...@@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
return 0; return 0;
} }
int error; int error;
if ((error=regcomp(&preg,res->c_ptr(), if ((error= regcomp(&preg,res->c_ptr(),
(cmp_collation.collation->state & MY_CS_BINSORT) ? ((cmp_collation.collation->state & MY_CS_BINSORT) ||
REG_EXTENDED | REG_NOSUB : (cmp_collation.collation->state & MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB | REG_ICASE, REG_EXTENDED | REG_NOSUB :
cmp_collation.collation))) REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation)))
{ {
(void) regerror(error,&preg,buff,sizeof(buff)); (void) regerror(error,&preg,buff,sizeof(buff));
my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff); my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
...@@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int() ...@@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int()
regex_compiled=0; regex_compiled=0;
} }
if (regcomp(&preg,res2->c_ptr(), if (regcomp(&preg,res2->c_ptr(),
(cmp_collation.collation->state & MY_CS_BINSORT) ? ((cmp_collation.collation->state & MY_CS_BINSORT) ||
REG_EXTENDED | REG_NOSUB : (cmp_collation.collation->state & MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB | REG_ICASE, REG_EXTENDED | REG_NOSUB :
cmp_collation.collation)) REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation))
{ {
null_value=1; null_value=1;
return 0; return 0;
......
...@@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = ...@@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
CHARSET_INFO my_charset_latin2_czech_ci = CHARSET_INFO my_charset_latin2_czech_ci =
{ {
2,0,0, /* number */ 2,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
"latin2", /* cs name */ "latin2", /* cs name */
"latin2_czech_cs", /* name */ "latin2_czech_cs", /* name */
"", /* comment */ "", /* comment */
NULL, /* tailoring */ NULL, /* tailoring */
ctype_czech, ctype_czech,
to_lower_czech, to_lower_czech,
to_upper_czech, to_upper_czech,
......
...@@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler = ...@@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
CHARSET_INFO my_charset_cp1250_czech_ci = CHARSET_INFO my_charset_cp1250_czech_ci =
{ {
34,0,0, /* number */ 34,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
"cp1250", /* cs name */ "cp1250", /* cs name */
"cp1250_czech_cs", /* name */ "cp1250_czech_cs", /* name */
"", /* comment */ "", /* comment */
NULL, /* tailoring */ NULL, /* tailoring */
ctype_win1250ch, ctype_win1250ch,
to_lower_win1250ch, to_lower_win1250ch,
to_upper_win1250ch, to_upper_win1250ch,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment