Commit 626dd5e8 authored by Michael Widenius's avatar Michael Widenius

Applied patch from to fix some problems with Croatian character set and LIKE queries

Author: Alexander Barkov
License: GPL

mysql-test/t/ctype_ucs.test:
  Added test case for Croatina character set
parent 65ca2521
...@@ -1211,3 +1211,47 @@ HEX(DAYNAME(19700101)) ...@@ -1211,3 +1211,47 @@ HEX(DAYNAME(19700101))
0427043504420432043504400433 0427043504420432043504400433
SET character_set_connection=latin1; SET character_set_connection=latin1;
End of 5.0 tests End of 5.0 tests
Start of 5.1 tests
SET NAMES utf8;
CREATE TABLE t1 (
a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
key(a)
);
INSERT INTO t1 VALUES
('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 30 Using where; Using index
SELECT * FROM t1 WHERE a LIKE 'c%';
a
ca
cc
cz
ch
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
SELECT * FROM t1 WHERE a LIKE 'ch%';
a
ch
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
SELECT hex(concat('d',_ucs2 0x017E,'%'));
hex(concat('d',_ucs2 0x017E,'%'))
0064017E0025
EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
hex(a)
0064017E
DROP TABLE t1;
...@@ -723,3 +723,34 @@ SELECT HEX(DAYNAME(19700101)); ...@@ -723,3 +723,34 @@ SELECT HEX(DAYNAME(19700101));
SET character_set_connection=latin1; SET character_set_connection=latin1;
--echo End of 5.0 tests --echo End of 5.0 tests
--echo Start of 5.1 tests
#
# Checking my_like_range_ucs2
#
SET NAMES utf8;
CREATE TABLE t1 (
a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
key(a)
);
INSERT INTO t1 VALUES
('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
# This one should scan only one row
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
# This one should scan many rows: 'c' is a contraction head
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
SELECT * FROM t1 WHERE a LIKE 'c%';
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
SELECT * FROM t1 WHERE a LIKE 'ch%';
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
SELECT hex(concat('d',_ucs2 0x017E,'%'));
EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
DROP TABLE t1;
...@@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
} }
} }
static inline my_wc_t
ucs2_to_wc(const uchar *ptr)
{
return (((uint) ptr[0]) << 8) + ptr[1];
}
/* /*
** Calculate min_str and max_str that ranges a LIKE string. ** Calculate min_str and max_str that ranges a LIKE string.
** Arguments: ** Arguments:
...@@ -1531,6 +1539,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, ...@@ -1531,6 +1539,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
; ptr+=2, charlen--) ; ptr+=2, charlen--)
{ {
my_wc_t wc;
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end) if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
{ {
ptr+=2; /* Skip escape */ ptr+=2; /* Skip escape */
...@@ -1567,9 +1576,9 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, ...@@ -1567,9 +1576,9 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
} }
if (have_contractions && ptr + 3 < end && if (have_contractions && ptr + 3 < end &&
ptr[0] == '\0' && my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
my_uca_can_be_contraction_head(cs, (uchar) ptr[1]))
{ {
my_wc_t wc2;
/* Contraction head found */ /* Contraction head found */
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many)) if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
{ {
...@@ -1581,9 +1590,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, ...@@ -1581,9 +1590,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
Check if the second letter can be contraction part, Check if the second letter can be contraction part,
and if two letters really produce a contraction. and if two letters really produce a contraction.
*/ */
if (ptr[2] == '\0' && if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) && my_uca_contraction2_weight(cs, wc , wc2))
my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3]))
{ {
/* Contraction found */ /* Contraction found */
if (charlen == 1 || min_str + 2 >= min_end) if (charlen == 1 || min_str + 2 >= min_end)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment