Commit e3dee8a7 authored by Alexander Barkov's avatar Alexander Barkov

Bug#57737 Character sets: search fails with like, contraction, index

Problem: LIKE over an indexed column optimized away good results,
because my_like_range_utf32/utf16 returned wrong ranges for contractions.
Contraction related code was missing in my_like_range_utf32/utf16,
but did exist in my_like_range_ucs2/utf8.
It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess).

Fix:
The patch removes individual functions my_like_range_ucs2,
my_like_range_utf16, my_like_range_utf32 and introduces a single function
my_like_range_generic() instead. The new function handles contractions
correctly. It can handle any character set with cs->min_sort_char and
cs->max_sort_char represented in Unicode code points.

added:
  @ mysql-test/include/ctype_czech.inc
  @ mysql-test/include/ctype_like_ignorable.inc
  @ mysql-test/r/ctype_like_range.result
  @ mysql-test/t/ctype_like_range.test
  Adding tests


modified:

  @ include/m_ctype.h
  - Adding helper functions for contractions.
  - Prototypes: removing ucs2,utf16,utf32 functions, adding generic function.
  @ mysql-test/r/ctype_uca.result
  @ mysql-test/r/ctype_utf16_uca.result
  @ mysql-test/r/ctype_utf32_uca.result
  @ mysql-test/t/ctype_uca.test
  @ mysql-test/t/ctype_utf16_uca.test
  @ mysql-test/t/ctype_utf32_uca.test
  - Adding tests.

  @ strings/ctype-mb.c
  - Pad function did not put the last character.
  - Implementing my_like_range_generic() - an universal replacement
    for three separate functions
    my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(),
    with correct contraction handling.

  @ strings/ctype-ucs2.c
  - my_fill_mb2 did not put the high byte, as previously
    it was used to put only characters in ASCII range.
    Now it puts high byte as well
    (needed to pupulate cs->max_sort_char correctly).
  - Adding DBUG_ASSERT()
  - Removing character set specific functions:
    my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32().
  - Using my_like_range_generic() instead of the old functions.

  @ strings/ctype-uca.c
  - Using generic function instead of the old character set specific ones.

  @ sql/item_create.cc
  @ sql/item_strfunc.cc
  @ sql/item_strfunc.h
  - Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX,
    available only in debug build to make sure like_range()
    works correctly for all character sets and collations. 
parent ce441751
......@@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
#define MY_UTF8MB4 "utf8mb4"
/* Helper functions to handle contraction */
static inline my_bool
my_cs_have_contractions(CHARSET_INFO *cs)
{
return cs->contractions != NULL;
}
static inline my_bool
my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
{
return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
}
static inline my_bool
my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
{
return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
}
static inline uint16*
my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40];
}
/* declarations for simple charsets */
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
const uchar *, size_t);
......@@ -430,6 +456,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill);
/* For 8-bit character set */
my_bool my_like_range_simple(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
......@@ -437,6 +464,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
/* For ASCII-based multi-byte character sets with mbminlen=1 */
my_bool my_like_range_mb(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
......@@ -444,26 +472,13 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf16(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf32(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
my_bool my_like_range_generic(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,
......
SELECT @@collation_connection;
--echo #
--echo # Bug#57737 Character sets: search fails with like, contraction, index
--echo #
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
DROP TABLE t1;
SELECT @@collation_connection;
--echo #
--echo # Bug#57737 Character sets: search fails with like, contraction, index
--echo # Part#2 - ignorable characters
--echo #
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
DROP TABLE t1;
This diff is collapsed.
......@@ -2888,3 +2888,101 @@ a hex(b) c
DROP TABLE t1;
set names utf8;
End for 5.0 tests
#
# Start of 5.5 tests
#
SET collation_connection=utf8_czech_ci;
SELECT @@collation_connection;
@@collation_connection
utf8_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
utf8_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
61000000000009
61
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
61000000000009
61
DROP TABLE t1;
SET collation_connection=ucs2_czech_ci;
SELECT @@collation_connection;
@@collation_connection
ucs2_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
ucs2_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
DROP TABLE t1;
#
# End of 5.5 tests
#
......@@ -2368,6 +2368,52 @@ NULL
NULL
NULL
drop table t1;
SET collation_connection=utf16_czech_ci;
SELECT @@collation_connection;
@@collation_connection
utf16_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
utf16_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
DROP TABLE t1;
#
# End of 5.5 tests
#
......@@ -2368,6 +2368,52 @@ NULL
NULL
NULL
drop table t1;
SET collation_connection=utf32_czech_ci;
SELECT @@collation_connection;
@@collation_connection
utf32_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
utf32_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
00000061000000000000000000000000000000000000000000000009
00000061
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
00000061000000000000000000000000000000000000000000000009
00000061
DROP TABLE t1;
#
# End of 5.5 tests
#
--source include/have_debug.inc
--source include/have_ucs2.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
DROP VIEW IF EXISTS v1;
--enable_warnings
CREATE TABLE t1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a VARBINARY(32));
INSERT INTO t1 (a) VALUES (''),('_'),('%'),('\_'),('\%'),('\\');
INSERT INTO t1 (a) VALUES ('a'),('c');
INSERT INTO t1 (a) VALUES ('a_'),('c_');
INSERT INTO t1 (a) VALUES ('a%'),('c%');
INSERT INTO t1 (a) VALUES ('aa'),('cc'),('ch');
INSERT INTO t1 (a) VALUES ('aa_'),('cc_'),('ch_');
INSERT INTO t1 (a) VALUES ('aa%'),('cc%'),('ch%');
INSERT INTO t1 (a) VALUES ('aaa'),('ccc'),('cch');
INSERT INTO t1 (a) VALUES ('aaa_'),('ccc_'),('cch_');
INSERT INTO t1 (a) VALUES ('aaa%'),('ccc%'),('cch%');
INSERT INTO t1 (a) VALUES ('aaaaaaaaaaaaaaaaaaaa');
CREATE VIEW v1 AS
SELECT id, 'a' AS name, a AS val FROM t1
UNION
SELECT id, 'mn', HEX(LIKE_RANGE_MIN(a, 16)) AS min FROM t1
UNION
SELECT id, 'mx', HEX(LIKE_RANGE_MAX(a, 16)) AS max FROM t1
UNION
SELECT id, 'sp', REPEAT('-', 32) AS sep FROM t1
ORDER BY id, name;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET latin1;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_danish_ci;
SELECT * FROM v1;
DROP VIEW v1;
DROP TABLE t1;
......@@ -545,3 +545,19 @@ set collation_connection=ucs2_unicode_ci;
set names utf8;
-- echo End for 5.0 tests
--echo #
--echo # Start of 5.5 tests
--echo #
#
# Test my_like_range and contractions
#
SET collation_connection=utf8_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
SET collation_connection=ucs2_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
--echo #
--echo # End of 5.5 tests
--echo #
......@@ -284,6 +284,13 @@ DROP TABLE IF EXISTS t1;
set collation_connection=utf16_unicode_ci;
--source include/ctype_regex.inc
#
# Test my_like_range and contractions
#
SET collation_connection=utf16_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
--echo #
--echo # End of 5.5 tests
......
......@@ -286,6 +286,14 @@ set collation_connection=utf32_unicode_ci;
--source include/ctype_regex.inc
#
# Test my_like_range and contractions
#
SET collation_connection=utf32_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
--echo #
--echo # End of 5.5 tests
--echo #
......@@ -1330,6 +1330,34 @@ protected:
};
#ifndef DBUG_OFF
class Create_func_like_range_min : public Create_func_arg2
{
public:
virtual Item *create(THD *thd, Item *arg1, Item *arg2);
static Create_func_like_range_min s_singleton;
protected:
Create_func_like_range_min() {}
virtual ~Create_func_like_range_min() {}
};
class Create_func_like_range_max : public Create_func_arg2
{
public:
virtual Item *create(THD *thd, Item *arg1, Item *arg2);
static Create_func_like_range_max s_singleton;
protected:
Create_func_like_range_max() {}
virtual ~Create_func_like_range_max() {}
};
#endif
class Create_func_ln : public Create_func_arg1
{
public:
......@@ -3836,6 +3864,26 @@ Create_func_length::create(THD *thd, Item *arg1)
}
#ifndef DBUG_OFF
Create_func_like_range_min Create_func_like_range_min::s_singleton;
Item*
Create_func_like_range_min::create(THD *thd, Item *arg1, Item *arg2)
{
return new (thd->mem_root) Item_func_like_range_min(arg1, arg2);
}
Create_func_like_range_max Create_func_like_range_max::s_singleton;
Item*
Create_func_like_range_max::create(THD *thd, Item *arg1, Item *arg2)
{
return new (thd->mem_root) Item_func_like_range_max(arg1, arg2);
}
#endif
Create_func_ln Create_func_ln::s_singleton;
Item*
......@@ -4924,6 +4972,10 @@ static Native_func_registry func_array[] =
{ { C_STRING_WITH_LEN("LCASE") }, BUILDER(Create_func_lcase)},
{ { C_STRING_WITH_LEN("LEAST") }, BUILDER(Create_func_least)},
{ { C_STRING_WITH_LEN("LENGTH") }, BUILDER(Create_func_length)},
#ifndef DBUG_OFF
{ { C_STRING_WITH_LEN("LIKE_RANGE_MIN") }, BUILDER(Create_func_like_range_min)},
{ { C_STRING_WITH_LEN("LIKE_RANGE_MAX") }, BUILDER(Create_func_like_range_max)},
#endif
{ { C_STRING_WITH_LEN("LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
{ { C_STRING_WITH_LEN("LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
{ { C_STRING_WITH_LEN("LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
......
......@@ -3128,6 +3128,41 @@ String *Item_func_unhex::val_str(String *str)
}
#ifndef DBUG_OFF
String *Item_func_like_range::val_str(String *str)
{
DBUG_ASSERT(fixed == 1);
longlong nbytes= args[1]->val_int();
String *res= args[0]->val_str(str);
size_t min_len, max_len;
CHARSET_INFO *cs= collation.collation;
if (!res || args[0]->null_value || args[1]->null_value ||
nbytes < 0 || nbytes > MAX_BLOB_WIDTH ||
min_str.alloc(nbytes) || max_str.alloc(nbytes))
goto err;
null_value=0;
if (cs->coll->like_range(cs, res->ptr(), res->length(),
'\\', '_', '%', nbytes,
(char*) min_str.ptr(), (char*) max_str.ptr(),
&min_len, &max_len))
goto err;
min_str.set_charset(collation.collation);
max_str.set_charset(collation.collation);
min_str.length(min_len);
max_str.length(max_len);
return is_min ? &min_str : &max_str;
err:
null_value= 1;
return 0;
}
#endif
void Item_func_binary::print(String *str, enum_query_type query_type)
{
str->append(STRING_WITH_LEN("cast("));
......
......@@ -657,6 +657,46 @@ public:
};
#ifndef DBUG_OFF
class Item_func_like_range :public Item_str_func
{
protected:
String min_str;
String max_str;
const bool is_min;
public:
Item_func_like_range(Item *a, Item *b, bool is_min_arg)
:Item_str_func(a, b), is_min(is_min_arg)
{ maybe_null= 1; }
String *val_str(String *);
void fix_length_and_dec()
{
collation.set(args[0]->collation);
decimals=0;
max_length= MAX_BLOB_WIDTH;
}
};
class Item_func_like_range_min :public Item_func_like_range
{
public:
Item_func_like_range_min(Item *a, Item *b)
:Item_func_like_range(a, b, true) { }
const char *func_name() const { return "like_range_min"; }
};
class Item_func_like_range_max :public Item_func_like_range
{
public:
Item_func_like_range_max(Item *a, Item *b)
:Item_func_like_range(a, b, false) { }
const char *func_name() const { return "like_range_max"; }
};
#endif
class Item_func_binary :public Item_str_func
{
public:
......
......@@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
DBUG_ASSERT(buflen > 0);
do
{
if ((str + buflen) < end)
if ((str + buflen) <= end)
{
/* Enough space for the characer */
memcpy(str, buf, buflen);
......@@ -802,6 +802,192 @@ fill_max_and_min:
}
/**
Calculate min_str and max_str that ranges a LIKE string.
Generic function, currently used for ucs2, utf16, utf32,
but should be suitable for any other character sets with
cs->min_sort_char and cs->max_sort_char represented in
Unicode code points.
@param cs Character set and collation pointer
@param ptr Pointer to LIKE pattern.
@param ptr_length Length of LIKE pattern.
@param escape Escape character pattern, typically '\'.
@param w_one 'One character' pattern, typically '_'.
@param w_many 'Many characters' pattern, typically '%'.
@param res_length Length of min_str and max_str.
@param[out] min_str Smallest string that ranges LIKE.
@param[out] max_str Largest string that ranges LIKE.
@param[out] min_len Length of min_str
@param[out] max_len Length of max_str
@return Optimization status.
@retval FALSE if LIKE pattern can be optimized
@rerval TRUE if LIKE can't be optimized.
*/
my_bool
my_like_range_generic(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
{
const char *end= ptr + ptr_length;
const char *min_org= min_str;
const char *max_org= max_str;
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
size_t charlen= res_length / cs->mbmaxlen;
size_t res_length_diff;
my_bool have_contractions= my_cs_have_contractions(cs);
for ( ; charlen > 0; charlen--)
{
my_wc_t wc, wc2;
int res;
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
{
if (res == MY_CS_ILSEQ) /* Bad sequence */
return TRUE; /* min_length and max_length are not important */
break; /* End of the string */
}
ptr+= res;
if (wc == (my_wc_t) escape)
{
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
{
if (res == MY_CS_ILSEQ)
return TRUE; /* min_length and max_length are not important */
/*
End of the string: Escape is the last character.
Put escape as a normal character.
We'll will leave the loop on the next iteration.
*/
}
else
ptr+= res;
/* Put escape character to min_str and max_str */
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths; /* No space */
min_str+= res;
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths; /* No space */
max_str+= res;
continue;
}
else if (wc == (my_wc_t) w_one)
{
if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths;
min_str+= res;
if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths;
max_str+= res;
continue;
}
else if (wc == (my_wc_t) w_many)
{
/*
Calculate length of keys:
a\min\min... is the smallest possible string
a\max\max... is the biggest possible string
*/
*min_length= ((cs->state & MY_CS_BINSORT) ?
(size_t) (min_str - min_org) :
res_length);
*max_length= res_length;
goto pad_min_max;
}
if (have_contractions &&
my_cs_can_be_contraction_head(cs, wc) &&
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
{
uint16 *weight;
if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
{
/* Contraction head followed by a wildcard */
*min_length= *max_length= res_length;
goto pad_min_max;
}
if (my_cs_can_be_contraction_tail(cs, wc2) &&
(weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
{
/* Contraction found */
if (charlen == 1)
{
/* contraction does not fit to result */
*min_length= *max_length= res_length;
goto pad_min_max;
}
ptr+= res;
charlen--;
/* Put contraction head */
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths;
min_str+= res;
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths;
max_str+= res;
wc= wc2; /* Prepare to put contraction tail */
}
}
/* Normal character, or contraction tail */
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths;
min_str+= res;
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths;
max_str+= res;
}
pad_set_lengths:
*min_length= (size_t) (min_str - min_org);
*max_length= (size_t) (max_str - max_org);
pad_min_max:
/*
Fill up max_str and min_str to res_length.
fill() cannot set incomplete characters and
requires that "length" argument is divisible to mbminlen.
Make sure to call fill() with proper "length" argument.
*/
res_length_diff= res_length % cs->mbminlen;
cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
cs->min_sort_char);
cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
cs->max_sort_char);
/* In case of incomplete characters set the remainder to 0x00's */
if (res_length_diff)
{
/* Example: odd res_length for ucs2 */
memset(min_end - res_length_diff, 0, res_length_diff);
memset(max_end - res_length_diff, 0, res_length_diff);
}
return FALSE;
}
int
my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
......
......@@ -8127,7 +8127,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
my_strnncollsp_ucs2_uca,
my_strnxfrm_ucs2_uca,
my_strnxfrmlen_simple,
my_like_range_ucs2,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
......@@ -10134,7 +10134,7 @@ MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_simple,
my_like_range_utf32,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
......@@ -10801,7 +10801,7 @@ MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_simple,
my_like_range_utf16,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment