Bug#57737 Character sets: search fails with like, contraction, index

Problem: LIKE over an indexed column optimized away good results, because my_like_range_utf32/utf16 returned wrong ranges for contractions. Contraction related code was missing in my_like_range_utf32/utf16, but did exist in my_like_range_ucs2/utf8. It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess). Fix: The patch removes individual functions my_like_range_ucs2, my_like_range_utf16, my_like_range_utf32 and introduces a single function my_like_range_generic() instead. The new function handles contractions correctly. It can handle any character set with cs->min_sort_char and cs->max_sort_char represented in Unicode code points. added: @ mysql-test/include/ctype_czech.inc @ mysql-test/include/ctype_like_ignorable.inc @ mysql-test/r/ctype_like_range.result @ mysql-test/t/ctype_like_range.test Adding tests modified: @ include/m_ctype.h - Adding helper functions for contractions. - Prototypes: removing ucs2,utf16,utf32 functions, adding generic function. @ mysql-test/r/ctype_uca.result @ mysql-test/r/ctype_utf16_uca.result @ mysql-test/r/ctype_utf32_uca.result @ mysql-test/t/ctype_uca.test @ mysql-test/t/ctype_utf16_uca.test @ mysql-test/t/ctype_utf32_uca.test - Adding tests. @ strings/ctype-mb.c - Pad function did not put the last character. - Implementing my_like_range_generic() - an universal replacement for three separate functions my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(), with correct contraction handling. @ strings/ctype-ucs2.c - my_fill_mb2 did not put the high byte, as previously it was used to put only characters in ASCII range. Now it puts high byte as well (needed to pupulate cs->max_sort_char correctly). - Adding DBUG_ASSERT() - Removing character set specific functions: my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(). - Using my_like_range_generic() instead of the old functions. @ strings/ctype-uca.c - Using generic function instead of the old character set specific ones. @ sql/item_create.cc @ sql/item_strfunc.cc @ sql/item_strfunc.h - Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX, available only in debug build to make sure like_range() works correctly for all character sets and collations.

Bug#57737 Character sets: search fails with like, contraction, index
Problem: LIKE over an indexed column optimized away good results, because my_like_range_utf32/utf16 returned wrong ranges for contractions. Contraction related code was missing in my_like_range_utf32/utf16, but did exist in my_like_range_ucs2/utf8. It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess). Fix: The patch removes individual functions my_like_range_ucs2, my_like_range_utf16, my_like_range_utf32 and introduces a single function my_like_range_generic() instead. The new function handles contractions correctly. It can handle any character set with cs->min_sort_char and cs->max_sort_char represented in Unicode code points. added: @ mysql-test/include/ctype_czech.inc @ mysql-test/include/ctype_like_ignorable.inc @ mysql-test/r/ctype_like_range.result @ mysql-test/t/ctype_like_range.test Adding tests modified: @ include/m_ctype.h - Adding helper functions for contractions. - Prototypes: removing ucs2,utf16,utf32 functions, adding generic function. @ mysql-test/r/ctype_uca.result @ mysql-test/r/ctype_utf16_uca.result @ mysql-test/r/ctype_utf32_uca.result @ mysql-test/t/ctype_uca.test @ mysql-test/t/ctype_utf16_uca.test @ mysql-test/t/ctype_utf32_uca.test - Adding tests. @ strings/ctype-mb.c - Pad function did not put the last character. - Implementing my_like_range_generic() - an universal replacement for three separate functions my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(), with correct contraction handling. @ strings/ctype-ucs2.c - my_fill_mb2 did not put the high byte, as previously it was used to put only characters in ASCII range. Now it puts high byte as well (needed to pupulate cs->max_sort_char correctly). - Adding DBUG_ASSERT() - Removing character set specific functions: my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(). - Using my_like_range_generic() instead of the old functions. @ strings/ctype-uca.c - Using generic function instead of the old character set specific ones. @ sql/item_create.cc @ sql/item_strfunc.cc @ sql/item_strfunc.h - Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX, available only in debug build to make sure like_range() works correctly for all character sets and collations.
e3dee8a7 · Alexander Barkov · ce441751 · e3dee8a7 · e3dee8a7 · e3dee8a7
Commit e3dee8a7 authored Nov 26, 2010 by Alexander Barkov
17 changed files
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
 #define MY_UTF8MB4                 "utf8mb4"


+/* Helper functions to handle contraction */
+static inline my_bool
+my_cs_have_contractions(CHARSET_INFO *cs)
+{
+  return cs->contractions != NULL;
+}
+
+static inline my_bool
+my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
+{
+  return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
+}
+
+static inline my_bool
+my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
+{
+  return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
+}
+
+static inline uint16*
+my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
+{
+  return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40];
+}
+
+
 /* declarations for simple charsets */
 extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
                                 const uchar *, size_t); 
@@ -430,6 +456,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,

 void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill);

+/* For 8-bit character set */
 my_bool  my_like_range_simple(CHARSET_INFO *cs,
 			      const char *ptr, size_t ptr_length,
 			      pbool escape, pbool w_one, pbool w_many,
@@ -437,6 +464,7 @@ my_bool  my_like_range_simple(CHARSET_INFO *cs,
 			      char *min_str, char *max_str,
 			      size_t *min_length, size_t *max_length);

+/* For ASCII-based multi-byte character sets with mbminlen=1 */
 my_bool  my_like_range_mb(CHARSET_INFO *cs,
 			  const char *ptr, size_t ptr_length,
 			  pbool escape, pbool w_one, pbool w_many,
@@ -444,26 +472,13 @@ my_bool  my_like_range_mb(CHARSET_INFO *cs,
 			  char *min_str, char *max_str,
 			  size_t *min_length, size_t *max_length);

-my_bool  my_like_range_ucs2(CHARSET_INFO *cs,
-			    const char *ptr, size_t ptr_length,
-			    pbool escape, pbool w_one, pbool w_many,
-			    size_t res_length,
-			    char *min_str, char *max_str,
-			    size_t *min_length, size_t *max_length);
-
-my_bool  my_like_range_utf16(CHARSET_INFO *cs,
-			     const char *ptr, size_t ptr_length,
-			     pbool escape, pbool w_one, pbool w_many,
-			     size_t res_length,
-			     char *min_str, char *max_str,
-			     size_t *min_length, size_t *max_length);
-
-my_bool  my_like_range_utf32(CHARSET_INFO *cs,
-			     const char *ptr, size_t ptr_length,
-			     pbool escape, pbool w_one, pbool w_many,
-			     size_t res_length,
-			     char *min_str, char *max_str,
-			     size_t *min_length, size_t *max_length);
+/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
+my_bool  my_like_range_generic(CHARSET_INFO *cs,
+                               const char *ptr, size_t ptr_length,
+                               pbool escape, pbool w_one, pbool w_many,
+                               size_t res_length,
+                               char *min_str, char *max_str,
+                               size_t *min_length, size_t *max_length);

 int my_wildcmp_8bit(CHARSET_INFO *,
 		    const char *str,const char *str_end,

--- a/mysql-test/include/ctype_czech.inc
+++ b/mysql-test/include/ctype_czech.inc
+SELECT @@collation_connection;
+--echo #
+--echo # Bug#57737 Character sets: search fails with like, contraction, index
+--echo #
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+DROP TABLE t1;
--- a/mysql-test/include/ctype_like_ignorable.inc
+++ b/mysql-test/include/ctype_like_ignorable.inc
+SELECT @@collation_connection;
+--echo #
+--echo # Bug#57737 Character sets: search fails with like, contraction, index
+--echo # Part#2 - ignorable characters
+--echo #
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+DROP TABLE t1;
--- a/mysql-test/r/ctype_like_range.result
+++ b/mysql-test/r/ctype_like_range.result
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@@ -2888,3 +2888,101 @@ a	hex(b)	c
 DROP TABLE t1;
 set names utf8;
 End for 5.0 tests
+#
+# Start of 5.5 tests
+#
+SET collation_connection=utf8_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+utf8_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
+SELECT @@collation_connection;
+@@collation_connection
+utf8_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+# Part#2 - ignorable characters
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+61000000000009
+61
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+61000000000009
+61
+DROP TABLE t1;
+SET collation_connection=ucs2_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+ucs2_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
+SELECT @@collation_connection;
+@@collation_connection
+ucs2_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+# Part#2 - ignorable characters
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+0061000000000000000000000009
+0061
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+0061000000000000000000000009
+0061
+DROP TABLE t1;
+#
+# End of 5.5 tests
+#
--- a/mysql-test/r/ctype_utf16_uca.result
+++ b/mysql-test/r/ctype_utf16_uca.result
@@ -2368,6 +2368,52 @@ NULL
 NULL
 NULL
 drop table t1;
+SET collation_connection=utf16_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+utf16_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
+SELECT @@collation_connection;
+@@collation_connection
+utf16_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+# Part#2 - ignorable characters
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+0061000000000000000000000009
+0061
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+0061000000000000000000000009
+0061
+DROP TABLE t1;
 #
 # End of 5.5 tests
 #
--- a/mysql-test/r/ctype_utf32_uca.result
+++ b/mysql-test/r/ctype_utf32_uca.result
@@ -2368,6 +2368,52 @@ NULL
 NULL
 NULL
 drop table t1;
+SET collation_connection=utf32_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+utf32_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
+SELECT @@collation_connection;
+@@collation_connection
+utf32_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+# Part#2 - ignorable characters
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+00000061000000000000000000000000000000000000000000000009
+00000061
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
+HEX(s1)
+00000061000000000000000000000000000000000000000000000009
+00000061
+DROP TABLE t1;
 #
 # End of 5.5 tests
 #
--- a/mysql-test/t/ctype_like_range.test
+++ b/mysql-test/t/ctype_like_range.test
+--source include/have_debug.inc
+--source include/have_ucs2.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+DROP VIEW IF EXISTS v1;
+--enable_warnings
+
+CREATE TABLE t1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a VARBINARY(32));
+INSERT INTO t1 (a) VALUES (''),('_'),('%'),('\_'),('\%'),('\\');
+INSERT INTO t1 (a) VALUES ('a'),('c');
+INSERT INTO t1 (a) VALUES ('a_'),('c_');
+INSERT INTO t1 (a) VALUES ('a%'),('c%');
+INSERT INTO t1 (a) VALUES ('aa'),('cc'),('ch');
+INSERT INTO t1 (a) VALUES ('aa_'),('cc_'),('ch_');
+INSERT INTO t1 (a) VALUES ('aa%'),('cc%'),('ch%');
+INSERT INTO t1 (a) VALUES ('aaa'),('ccc'),('cch');
+INSERT INTO t1 (a) VALUES ('aaa_'),('ccc_'),('cch_');
+INSERT INTO t1 (a) VALUES ('aaa%'),('ccc%'),('cch%');
+INSERT INTO t1 (a) VALUES ('aaaaaaaaaaaaaaaaaaaa');
+
+CREATE VIEW v1 AS
+  SELECT id, 'a' AS name, a AS val FROM t1
+UNION
+  SELECT id, 'mn', HEX(LIKE_RANGE_MIN(a, 16)) AS min FROM t1
+UNION
+  SELECT id, 'mx', HEX(LIKE_RANGE_MAX(a, 16)) AS max FROM t1
+UNION
+  SELECT id, 'sp', REPEAT('-', 32) AS sep FROM t1
+ORDER BY id, name;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET latin1;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_unicode_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_czech_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_danish_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_czech_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_danish_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_unicode_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_czech_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_danish_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_unicode_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_czech_ci;
+SELECT * FROM v1;
+
+ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_danish_ci;
+SELECT * FROM v1;
+
+DROP VIEW v1;
+DROP TABLE t1;
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@@ -545,3 +545,19 @@ set collation_connection=ucs2_unicode_ci;
 set names utf8;

 -- echo End for 5.0 tests
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+#
+# Test my_like_range and contractions
+#
+SET collation_connection=utf8_czech_ci;
+--source include/ctype_czech.inc
+--source include/ctype_like_ignorable.inc
+SET collation_connection=ucs2_czech_ci;
+--source include/ctype_czech.inc
+--source include/ctype_like_ignorable.inc
+--echo #
+--echo # End of 5.5 tests
+--echo #
--- a/mysql-test/t/ctype_utf16_uca.test
+++ b/mysql-test/t/ctype_utf16_uca.test
@@ -284,6 +284,13 @@ DROP TABLE IF EXISTS t1;
 set collation_connection=utf16_unicode_ci;
 --source include/ctype_regex.inc

+#
+# Test my_like_range and contractions
+#
+SET collation_connection=utf16_czech_ci;
+--source include/ctype_czech.inc
+--source include/ctype_like_ignorable.inc
+

 --echo #
 --echo # End of 5.5 tests

--- a/mysql-test/t/ctype_utf32_uca.test
+++ b/mysql-test/t/ctype_utf32_uca.test
@@ -286,6 +286,14 @@ set collation_connection=utf32_unicode_ci;
 --source include/ctype_regex.inc


+#
+# Test my_like_range and contractions
+#
+SET collation_connection=utf32_czech_ci;
+--source include/ctype_czech.inc
+--source include/ctype_like_ignorable.inc
+
+
 --echo #
 --echo # End of 5.5 tests
 --echo #
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -1330,6 +1330,34 @@ class Create_func_length : public Create_func_arg1
 };


+#ifndef DBUG_OFF
+class Create_func_like_range_min : public Create_func_arg2
+{
+public:
+  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_like_range_min s_singleton;
+
+protected:
+  Create_func_like_range_min() {}
+  virtual ~Create_func_like_range_min() {}
+};
+
+
+class Create_func_like_range_max : public Create_func_arg2
+{
+public:
+  virtual Item *create(THD *thd, Item *arg1, Item *arg2);
+
+  static Create_func_like_range_max s_singleton;
+
+protected:
+  Create_func_like_range_max() {}
+  virtual ~Create_func_like_range_max() {}
+};
+#endif
+
+
 class Create_func_ln : public Create_func_arg1
 {
 public:
@@ -3836,6 +3864,26 @@ Create_func_length::create(THD *thd, Item *arg1)
 }


+#ifndef DBUG_OFF
+Create_func_like_range_min Create_func_like_range_min::s_singleton;
+
+Item*
+Create_func_like_range_min::create(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_like_range_min(arg1, arg2);
+}
+
+
+Create_func_like_range_max Create_func_like_range_max::s_singleton;
+
+Item*
+Create_func_like_range_max::create(THD *thd, Item *arg1, Item *arg2)
+{
+  return new (thd->mem_root) Item_func_like_range_max(arg1, arg2);
+}
+#endif
+
+
 Create_func_ln Create_func_ln::s_singleton;

 Item*
@@ -4924,6 +4972,10 @@ static Native_func_registry func_array[] =
  { { C_STRING_WITH_LEN("LCASE") }, BUILDER(Create_func_lcase)},
  { { C_STRING_WITH_LEN("LEAST") }, BUILDER(Create_func_least)},
  { { C_STRING_WITH_LEN("LENGTH") }, BUILDER(Create_func_length)},
+#ifndef DBUG_OFF
+  { { C_STRING_WITH_LEN("LIKE_RANGE_MIN") }, BUILDER(Create_func_like_range_min)},
+  { { C_STRING_WITH_LEN("LIKE_RANGE_MAX") }, BUILDER(Create_func_like_range_max)},
+#endif
  { { C_STRING_WITH_LEN("LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
  { { C_STRING_WITH_LEN("LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
  { { C_STRING_WITH_LEN("LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},

--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -3128,6 +3128,41 @@ String *Item_func_unhex::val_str(String *str)
 }


+#ifndef DBUG_OFF
+String *Item_func_like_range::val_str(String *str)
+{
+  DBUG_ASSERT(fixed == 1);
+  longlong nbytes= args[1]->val_int();
+  String *res= args[0]->val_str(str);
+  size_t min_len, max_len;
+  CHARSET_INFO *cs= collation.collation;
+
+  if (!res || args[0]->null_value || args[1]->null_value ||
+      nbytes < 0 || nbytes > MAX_BLOB_WIDTH ||
+      min_str.alloc(nbytes) || max_str.alloc(nbytes))
+    goto err;
+  null_value=0;
+
+  if (cs->coll->like_range(cs, res->ptr(), res->length(),
+                           '\\', '_', '%', nbytes,
+                           (char*) min_str.ptr(), (char*) max_str.ptr(),
+                           &min_len, &max_len))
+    goto err;
+
+  min_str.set_charset(collation.collation);
+  max_str.set_charset(collation.collation);
+  min_str.length(min_len);
+  max_str.length(max_len);
+
+  return is_min ? &min_str : &max_str;
+
+err:
+  null_value= 1;
+  return 0;
+}
+#endif
+
+
 void Item_func_binary::print(String *str, enum_query_type query_type)
 {
  str->append(STRING_WITH_LEN("cast("));

--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -657,6 +657,46 @@ class Item_func_unhex :public Item_str_func
 };


+#ifndef DBUG_OFF
+class Item_func_like_range :public Item_str_func
+{
+protected:
+  String min_str;
+  String max_str;
+  const bool is_min;
+public:
+  Item_func_like_range(Item *a, Item *b, bool is_min_arg)
+    :Item_str_func(a, b), is_min(is_min_arg)
+  { maybe_null= 1; }
+  String *val_str(String *);
+  void fix_length_and_dec()
+  {
+    collation.set(args[0]->collation);
+    decimals=0;
+    max_length= MAX_BLOB_WIDTH;
+  }
+};
+
+
+class Item_func_like_range_min :public Item_func_like_range
+{
+public:
+  Item_func_like_range_min(Item *a, Item *b) 
+    :Item_func_like_range(a, b, true) { }
+  const char *func_name() const { return "like_range_min"; }
+};
+
+
+class Item_func_like_range_max :public Item_func_like_range
+{
+public:
+  Item_func_like_range_max(Item *a, Item *b)
+    :Item_func_like_range(a, b, false) { }
+  const char *func_name() const { return "like_range_max"; }
+};
+#endif
+
+
 class Item_func_binary :public Item_str_func
 {
 public:

--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
  DBUG_ASSERT(buflen > 0);
  do
  {
-    if ((str + buflen) < end)
+    if ((str + buflen) <= end)
    {
      /* Enough space for the characer */
      memcpy(str, buf, buflen);
@@ -802,6 +802,192 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
 }


+/**
+   Calculate min_str and max_str that ranges a LIKE string.
+   Generic function, currently used for ucs2, utf16, utf32,
+   but should be suitable for any other character sets with
+   cs->min_sort_char and cs->max_sort_char represented in
+   Unicode code points.
+
+   @param cs           Character set and collation pointer
+   @param ptr          Pointer to LIKE pattern.
+   @param ptr_length   Length of LIKE pattern.
+   @param escape       Escape character pattern,  typically '\'.
+   @param w_one        'One character' pattern,   typically '_'.
+   @param w_many       'Many characters' pattern, typically '%'.
+   @param res_length   Length of min_str and max_str.
+
+   @param[out] min_str Smallest string that ranges LIKE.
+   @param[out] max_str Largest string that ranges LIKE.
+   @param[out] min_len Length of min_str
+   @param[out] max_len Length of max_str
+
+   @return Optimization status.
+   @retval FALSE if LIKE pattern can be optimized
+   @rerval TRUE if LIKE can't be optimized.
+*/
+my_bool
+my_like_range_generic(CHARSET_INFO *cs,
+                      const char *ptr, size_t ptr_length,
+                      pbool escape, pbool w_one, pbool w_many,
+                      size_t res_length,
+                      char *min_str,char *max_str,
+                      size_t *min_length,size_t *max_length)
+{
+  const char *end= ptr + ptr_length;
+  const char *min_org= min_str;
+  const char *max_org= max_str;
+  char *min_end= min_str + res_length;
+  char *max_end= max_str + res_length;
+  size_t charlen= res_length / cs->mbmaxlen;
+  size_t res_length_diff;
+  my_bool have_contractions= my_cs_have_contractions(cs);
+
+  for ( ; charlen > 0; charlen--)
+  {
+    my_wc_t wc, wc2;
+    int res;
+    if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
+    {
+      if (res == MY_CS_ILSEQ) /* Bad sequence */
+        return TRUE; /* min_length and max_length are not important */
+      break; /* End of the string */
+    }
+    ptr+= res;
+
+    if (wc == (my_wc_t) escape)
+    {
+      if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
+      {
+        if (res == MY_CS_ILSEQ)
+          return TRUE; /* min_length and max_length are not important */
+        /*
+           End of the string: Escape is the last character.
+           Put escape as a normal character.
+           We'll will leave the loop on the next iteration.
+        */
+      }
+      else
+        ptr+= res;
+
+      /* Put escape character to min_str and max_str  */
+      if ((res= cs->cset->wc_mb(cs, wc,
+                                (uchar*) min_str, (uchar*) min_end)) <= 0)
+        goto pad_set_lengths; /* No space */
+      min_str+= res;
+
+      if ((res= cs->cset->wc_mb(cs, wc,
+                                (uchar*) max_str, (uchar*) max_end)) <= 0)
+        goto pad_set_lengths; /* No space */
+      max_str+= res;
+      continue;
+    }
+    else if (wc == (my_wc_t) w_one)
+    {
+      if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
+                                (uchar*) min_str, (uchar*) min_end)) <= 0)
+        goto pad_set_lengths;
+      min_str+= res;
+
+      if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
+                                (uchar*) max_str, (uchar*) max_end)) <= 0)
+        goto pad_set_lengths;
+      max_str+= res;
+      continue;
+    }
+    else if (wc == (my_wc_t) w_many)
+    {
+      /*
+        Calculate length of keys:
+        a\min\min... is the smallest possible string
+        a\max\max... is the biggest possible string
+      */
+      *min_length= ((cs->state & MY_CS_BINSORT) ?
+                    (size_t) (min_str - min_org) :
+                    res_length);
+      *max_length= res_length;
+      goto pad_min_max;
+    }
+
+    if (have_contractions &&
+        my_cs_can_be_contraction_head(cs, wc) &&
+        (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
+    {
+      uint16 *weight;
+      if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
+      {
+        /* Contraction head followed by a wildcard */
+        *min_length= *max_length= res_length;
+        goto pad_min_max;
+      }
+
+      if (my_cs_can_be_contraction_tail(cs, wc2) &&
+          (weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
+      {
+        /* Contraction found */
+        if (charlen == 1)
+        {
+          /* contraction does not fit to result */
+          *min_length= *max_length= res_length;
+          goto pad_min_max;
+        }
+
+        ptr+= res;
+        charlen--;
+
+        /* Put contraction head */
+        if ((res= cs->cset->wc_mb(cs, wc,
+                                  (uchar*) min_str, (uchar*) min_end)) <= 0)
+          goto pad_set_lengths;
+        min_str+= res;
+
+        if ((res= cs->cset->wc_mb(cs, wc,
+                                  (uchar*) max_str, (uchar*) max_end)) <= 0)
+          goto pad_set_lengths;
+        max_str+= res;
+        wc= wc2; /* Prepare to put contraction tail */
+      }
+    }
+
+    /* Normal character, or contraction tail */
+    if ((res= cs->cset->wc_mb(cs, wc,
+                              (uchar*) min_str, (uchar*) min_end)) <= 0)
+      goto pad_set_lengths;
+    min_str+= res;
+    if ((res= cs->cset->wc_mb(cs, wc,
+                              (uchar*) max_str, (uchar*) max_end)) <= 0)
+      goto pad_set_lengths;
+    max_str+= res;
+  }
+
+pad_set_lengths:
+  *min_length= (size_t) (min_str - min_org);
+  *max_length= (size_t) (max_str - max_org);
+
+pad_min_max:
+  /*
+    Fill up max_str and min_str to res_length.
+    fill() cannot set incomplete characters and
+    requires that "length" argument is divisible to mbminlen.
+    Make sure to call fill() with proper "length" argument.
+  */
+  res_length_diff= res_length % cs->mbminlen;
+  cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
+                 cs->min_sort_char);
+  cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
+                 cs->max_sort_char);
+
+  /* In case of incomplete characters set the remainder to 0x00's */
+  if (res_length_diff)
+  {
+    /* Example: odd res_length for ucs2 */
+    memset(min_end - res_length_diff, 0, res_length_diff);
+    memset(max_end - res_length_diff, 0, res_length_diff);
+  }
+  return FALSE;
+}
+
+
 int
 my_wildcmp_mb_bin(CHARSET_INFO *cs,
                  const char *str,const char *str_end,

--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -8127,7 +8127,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
  my_strnncollsp_ucs2_uca,
  my_strnxfrm_ucs2_uca,
  my_strnxfrmlen_simple,
-  my_like_range_ucs2,
+  my_like_range_generic,
  my_wildcmp_uca,
  NULL,
  my_instr_mb,
@@ -10134,7 +10134,7 @@ MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
    my_strnncollsp_any_uca,
    my_strnxfrm_any_uca,
    my_strnxfrmlen_simple,
-    my_like_range_utf32,
+    my_like_range_generic,
    my_wildcmp_uca,
    NULL,
    my_instr_mb,
@@ -10801,7 +10801,7 @@ MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
    my_strnncollsp_any_uca,
    my_strnxfrm_any_uca,
    my_strnxfrmlen_simple,
-    my_like_range_utf16,
+    my_like_range_generic,
    my_wildcmp_uca,
    NULL,
    my_instr_mb,

--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c