Commit d2f7fe35 authored by unknown's avatar unknown

Bug#20471 LIKE search fails with indexed utf8 char column

The main problem was already fixed by Igor under terms of 16674.
Adding some additional minor fixes and tests.


include/m_ctype.h:
  Adding reference to CHARSET_INFO.txt
mysql-test/r/ctype_utf8.result:
  Adding test case
mysql-test/t/ctype_utf8.test:
  Adding test case
strings/CHARSET_INFO.txt:
  Adding comment about max_sort_char
strings/ctype-mb.c:
  Restiring that non-Unicode character sets use 0xFF as pad character
  for max_str. Only Unicode character sets use wc_mb.
strings/ctype-utf8.c:
  Fixed that max_sort_char for UTF8 from U+00FF to U+FFFF.
parent b53e47a1
...@@ -108,6 +108,8 @@ enum my_lex_states ...@@ -108,6 +108,8 @@ enum my_lex_states
struct charset_info_st; struct charset_info_st;
/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct my_collation_handler_st typedef struct my_collation_handler_st
{ {
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
...@@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; ...@@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct my_charset_handler_st typedef struct my_charset_handler_st
{ {
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
...@@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler; ...@@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
extern MY_CHARSET_HANDLER my_charset_ucs2_handler; extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct charset_info_st typedef struct charset_info_st
{ {
uint number; uint number;
......
...@@ -1124,6 +1124,81 @@ check table t1; ...@@ -1124,6 +1124,81 @@ check table t1;
Table Op Msg_type Msg_text Table Op Msg_type Msg_text
test.t1 check status OK test.t1 check status OK
drop table t1; drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
before_delete_general_ci
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
after_delete_general_ci
ペテルグル
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
before_delete_unicode_ci
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
after_delete_unicode_ci
ペテルグル
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
before_delete_bin
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
after_delete_bin
ペテルグル
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_general_ci;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
gci1
さしすせそかきくけこあいうえお
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
gci2
あいうえおかきくけこさしすせそ
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_unicode_ci;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
uci1
さしすせそかきくけこあいうえお
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
uci2
あいうえおかきくけこさしすせそ
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_bin;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
bin1
さしすせそかきくけこあいうえお
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
bin2
あいうえおかきくけこさしすせそ
drop table t1;
SET NAMES utf8; SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY, CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '', a varchar(16) collate utf8_unicode_ci NOT NULL default '',
......
...@@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb ...@@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
check table t1; check table t1;
drop table t1; drop table t1;
#
# Bug#20471 LIKE search fails with indexed utf8 char column
#
set names utf8;
create table t1 (s1 char(5) character set utf8);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
drop table t1;
# additional tests from duplicate bug#20744 MySQL return no result
set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_general_ci;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;
set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_unicode_ci;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;
set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_bin;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;
# #
# Bug#14896: Comparison with a key in a partial index over mb chararacter field # Bug#14896: Comparison with a key in a partial index over mb chararacter field
# #
......
...@@ -33,7 +33,7 @@ typedef struct charset_info_st ...@@ -33,7 +33,7 @@ typedef struct charset_info_st
uint strxfrm_multiply; uint strxfrm_multiply;
uint mbminlen; uint mbminlen;
uint mbmaxlen; uint mbmaxlen;
char max_sort_char; /* For LIKE optimization */ uint16 max_sort_char; /* For LIKE optimization */
MY_CHARSET_HANDLER *cset; MY_CHARSET_HANDLER *cset;
MY_COLLATION_HANDLER *coll; MY_COLLATION_HANDLER *coll;
...@@ -134,7 +134,15 @@ Misc fields ...@@ -134,7 +134,15 @@ Misc fields
mbmaxlen - maximum multibyte sequence length. mbmaxlen - maximum multibyte sequence length.
1 for 8bit charsets. Can be also 2 or 3. 1 for 8bit charsets. Can be also 2 or 3.
max_sort_char - for LIKE range
in case of 8bit character sets - native code
of maximum character (max_str pad byte);
in case of UTF8 and UCS2 - Unicode code of the maximum
possible character (usually U+FFFF). This code is
converted to multibyte representation (usually 0xEFBFBF)
and then used as a pad sequence for max_str.
in case of other multibyte character sets -
max_str pad byte (usually 0xFF).
MY_CHARSET_HANDLER MY_CHARSET_HANDLER
================== ==================
......
...@@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
/* /*
Write max key: create a buffer with multibyte Write max key:
- for non-Unicode character sets:
just set to 255.
- for Unicode character set (utf-8):
create a buffer with multibyte
representation of the max_sort_char character, representation of the max_sort_char character,
and copy it into max_str in a loop. and copy it into max_str in a loop.
*/ */
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
{ {
char buf[10]; char buf[10];
char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, char buflen;
if (!(cs->state & MY_CS_UNICODE))
{
bfill(str, end - str, 255);
return;
}
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
(uchar*) buf + sizeof(buf)); (uchar*) buf + sizeof(buf));
DBUG_ASSERT(buflen > 0); DBUG_ASSERT(buflen > 0);
do do
{ {
...@@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler = ...@@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
my_strnncoll_mb_bin, my_strnncoll_mb_bin,
my_strnncollsp_mb_bin, my_strnncollsp_mb_bin,
my_strnxfrm_mb_bin, my_strnxfrm_mb_bin,
my_like_range_simple, my_like_range_mb,
my_wildcmp_mb_bin, my_wildcmp_mb_bin,
my_strcasecmp_mb_bin, my_strcasecmp_mb_bin,
my_instr_mb, my_instr_mb,
......
...@@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin= ...@@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin=
1, /* mbminlen */ 1, /* mbminlen */
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, /* min_sort_char */ 0, /* min_sort_char */
255, /* max_sort_char */ 0xFFFF, /* max_sort_char */
0, /* escape_with_backslash_is_dangerous */ 0, /* escape_with_backslash_is_dangerous */
&my_charset_utf8_handler, &my_charset_utf8_handler,
&my_collation_mb_bin_handler &my_collation_mb_bin_handler
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment