Commit 13c32771 authored by Alexander Barkov's avatar Alexander Barkov

Backporting WL#1213

parent b5936f73
......@@ -13,11 +13,11 @@ define(CHARSETS_AVAILABLE1,armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257)
define(CHARSETS_AVAILABLE2,cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8)
define(CHARSETS_AVAILABLE3,greek hebrew hp8 keybcs2 koi8r koi8u)
define(CHARSETS_AVAILABLE4,latin1 latin2 latin5 latin7 macce macroman)
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8)
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32)
DEFAULT_CHARSET=latin1
CHARSETS_AVAILABLE="CHARSETS_AVAILABLE0 CHARSETS_AVAILABLE1 CHARSETS_AVAILABLE2 CHARSETS_AVAILABLE3 CHARSETS_AVAILABLE4 CHARSETS_AVAILABLE5"
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8"
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32"
AC_DIVERT_POP
......@@ -50,7 +50,7 @@ AC_ARG_WITH(extra-charsets,
AC_MSG_CHECKING("character sets")
CHARSETS="$default_charset latin1 utf8"
CHARSETS="$default_charset latin1 utf8mb4 utf8"
if test "$extra_charsets" = no; then
CHARSETS="$CHARSETS"
......@@ -195,8 +195,23 @@ do
AC_DEFINE([USE_MB], [1], [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf8mb4)
AC_DEFINE(HAVE_CHARSET_utf8mb4, 1, [Define to enable utf8mb4])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf8)
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable ut8])
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable utf8])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf16)
AC_DEFINE(HAVE_CHARSET_utf16, 1, [Define to enable utf16])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf32)
AC_DEFINE(HAVE_CHARSET_utf32, 1, [Define to enable utf32])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
......@@ -381,6 +396,48 @@ case $default_charset in
fi
default_charset_collations="$UTFC"
;;
utf8mb4)
default_charset_default_collation="utf8mb4_general_ci"
define(UTFC1, utf8mb4_general_ci utf8mb4_bin)
define(UTFC2, utf8mb4_czech_ci utf8mb4_danish_ci)
define(UTFC3, utf8mb4_esperanto_ci utf8mb4_estonian_ci utf8mb4_hungarian_ci)
define(UTFC4, utf8mb4_icelandic_ci utf8mb4_latvian_ci utf8mb4_lithuanian_ci)
define(UTFC5, utf8mb4_persian_ci utf8mb4_polish_ci utf8mb4_romanian_ci)
define(UTFC6, utf8mb4_sinhala_ci utf8mb4_slovak_ci utf8mb4_slovenian_ci)
define(UTFC7, utf8mb4_spanish2_ci utf8mb4_spanish_ci)
define(UTFC8, utf8mb4_swedish_ci utf8mb4_turkish_ci)
define(UTFC9, utf8mb4_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
utf16)
default_charset_default_collation="utf16_general_ci"
define(UTFC1, utf16_general_ci utf16_bin)
define(UTFC2, utf16_czech_ci utf16_danish_ci)
define(UTFC3, utf16_esperanto_ci utf16_estonian_ci utf16_hungarian_ci)
define(UTFC4, utf16_icelandic_ci utf16_latvian_ci utf16_lithuanian_ci)
define(UTFC5, utf16_persian_ci utf16_polish_ci utf16_romanian_ci)
define(UTFC6, utf16_sinhala_ci utf16_slovak_ci utf16_slovenian_ci)
define(UTFC7, utf16_spanish2_ci utf16_spanish_ci)
define(UTFC8, utf16_swedish_ci utf16_turkish_ci)
define(UTFC9, utf16_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
utf32)
default_charset_default_collation="utf32_general_ci"
define(UTFC1, utf32_general_ci utf32_bin)
define(UTFC2, utf32_czech_ci utf32_danish_ci)
define(UTFC3, utf32_esperanto_ci utf32_estonian_ci utf32_hungarian_ci)
define(UTFC4, utf32_icelandic_ci utf32_latvian_ci utf32_lithuanian_ci)
define(UTFC5, utf32_persian_ci utf32_polish_ci utf32_romanian_ci)
define(UTFC6, utf32_sinhala_ci utf32_slovak_ci utf32_slovenian_ci)
define(UTFC7, utf32_spanish2_ci utf32_spanish_ci)
define(UTFC8, utf32_swedish_ci utf32_turkish_ci)
define(UTFC9, utf32_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
*)
AC_MSG_ERROR([Charset $cs not available. (Available are: $CHARSETS_AVAILABLE).
See the Installation chapter in the Reference Manual.])
......
......@@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d)
#define HAVE_CHARSET_ucs2 1
#define HAVE_CHARSET_ujis 1
#define HAVE_CHARSET_utf8 1
#define HAVE_CHARSET_utf8mb4 1
#define HAVE_CHARSET_utf16 1
#define HAVE_CHARSET_utf32 1
#define HAVE_UCA_COLLATIONS 1
#define HAVE_BOOL 1
......
......@@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_BINSORT 16 /* if binary sort order */
#define MY_CS_PRIMARY 32 /* if primary collation */
#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CS_HIDDEN 2048 /* don't display in SHOW */
#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
#define MY_CHARSET_UNDEFINED 0
/* Character repertoire flags */
......@@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
typedef struct my_uni_idx_st
{
uint16 from;
......@@ -304,10 +304,14 @@ typedef struct charset_info_st
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
extern CHARSET_INFO my_charset_big5_chinese_ci;
extern CHARSET_INFO my_charset_big5_bin;
extern CHARSET_INFO my_charset_cp932_japanese_ci;
extern CHARSET_INFO my_charset_cp932_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
extern CHARSET_INFO my_charset_eucjpms_bin;
extern CHARSET_INFO my_charset_euckr_korean_ci;
......@@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci;
extern CHARSET_INFO my_charset_gb2312_bin;
extern CHARSET_INFO my_charset_gbk_chinese_ci;
extern CHARSET_INFO my_charset_gbk_bin;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
extern CHARSET_INFO my_charset_latin1_german2_ci;
extern CHARSET_INFO my_charset_latin1_bin;
extern CHARSET_INFO my_charset_latin2_czech_ci;
......@@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin;
extern CHARSET_INFO my_charset_ucs2_unicode_ci;
extern CHARSET_INFO my_charset_ujis_japanese_ci;
extern CHARSET_INFO my_charset_ujis_bin;
extern CHARSET_INFO my_charset_utf16_bin;
extern CHARSET_INFO my_charset_utf16_general_ci;
extern CHARSET_INFO my_charset_utf16_unicode_ci;
extern CHARSET_INFO my_charset_utf32_bin;
extern CHARSET_INFO my_charset_utf32_general_ci;
extern CHARSET_INFO my_charset_utf32_unicode_ci;
extern CHARSET_INFO my_charset_utf8_general_ci;
extern CHARSET_INFO my_charset_utf8_unicode_ci;
extern CHARSET_INFO my_charset_utf8_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
extern CHARSET_INFO my_charset_utf8mb4_bin;
extern CHARSET_INFO my_charset_utf8mb4_general_ci;
extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
#define MY_UTF8MB3 "utf8"
#define MY_UTF8MB4 "utf8mb4"
/* declarations for simple charsets */
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
......@@ -430,6 +444,19 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf16(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf32(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,
......@@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *,
const char *s, size_t s_length,
my_match_t *match, uint nmatch);
int my_strnncoll_mb_bin(CHARSET_INFO * cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix);
int my_strnncollsp_mb_bin(CHARSET_INFO *cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference);
int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many);
int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t);
void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2);
size_t my_strnxfrm_unicode(CHARSET_INFO *,
uchar *dst, size_t dstlen,
const uchar *src, size_t srclen);
int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,
......
#
# Bug#32390 Character sets: casting utf32 to/from date doesn't work
#
CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0;
SET timestamp=1216359724;
INSERT INTO t1 VALUES (current_date);
INSERT INTO t1 VALUES (current_time);
INSERT INTO t1 VALUES (current_timestamp);
SELECT s1, hex(s1) FROM t1;
DROP TABLE t1;
SET timestamp=0;
select @@collation_connection;
#
# Create a table with a nullable varchar(10) column
# using currect character_set_connection.
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
explain select * from t1 where a like concat('abc','%');
select * from t1 where a like "abc%";
select * from t1 where a like concat("abc","%");
select * from t1 where a like "ABC%";
select * from t1 where a like "test%";
select * from t1 where a like "te_t";
select * from t1 where a like "%a%";
select * from t1 where a like "%abcd%";
select * from t1 where a like "%abc\d%";
drop table t1;
#
# Bug #2619 ucs2 LIKE comparison fails in some cases
#
select 'AA' like 'AA';
select 'AA' like 'A%A';
select 'AA' like 'A%%A';
select 'AA' like 'AA%';
select 'AA' like '%AA%';
select 'AA' like '%A';
select 'AA' like '%AA';
select 'AA' like 'A%A%';
select 'AA' like '_%_%';
select 'AA' like '%A%A';
select 'AAA'like 'A%A%A';
select 'AZ' like 'AZ';
select 'AZ' like 'A%Z';
select 'AZ' like 'A%%Z';
select 'AZ' like 'AZ%';
select 'AZ' like '%AZ%';
select 'AZ' like '%Z';
select 'AZ' like '%AZ';
select 'AZ' like 'A%Z%';
select 'AZ' like '_%_%';
select 'AZ' like '%A%Z';
select 'AZ' like 'A_';
select 'AZ' like '_Z';
select 'AMZ'like 'A%M%Z';
-- require r/have_utf16.require
disable_query_log;
show collation like 'utf16_general_ci';
enable_query_log;
-- require r/have_utf32.require
disable_query_log;
show collation like 'utf32_general_ci';
enable_query_log;
--require r/have_utf8mb4.require
--disable_query_log
SHOW COLLATION LIKE 'utf8mb4_general_ci';
--enable_query_log
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -1899,6 +1899,20 @@ CONVERT(a, CHAR) CONVERT(b, CHAR)
DROP TABLE t1;
End of 5.0 tests
Start of 5.4 tests
SET NAMES utf8mb3;
SHOW VARIABLES LIKE 'character_set_results%';
Variable_name Value
character_set_results utf8
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(1) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
SELECT _utf8mb3'test';
test
test
CREATE TABLE t1 (
clipid INT NOT NULL,
Tape TINYTEXT,
......
This diff is collapsed.
Collation Charset Id Default Compiled Sortlen
utf16_general_ci utf16 54 Yes Yes 1
Collation Charset Id Default Compiled Sortlen
utf32_general_ci utf32 60 Yes Yes 1
Collation Charset Id Default Compiled Sortlen
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
......@@ -33,6 +33,36 @@
</collation>
</charset>
<charset name="utf8mb4">
<collation name="utf8mb4_test_ci" id="326">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="utf16">
<collation name="utf16_test_ci" id="327">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="utf32">
<collation name="utf32_test_ci" id="391">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="ucs2">
<collation name="ucs2_test_ci" id="358">
<rules>
......
......@@ -162,8 +162,16 @@ SET @@character_set_client = utf8;
SELECT @@character_set_client;
@@character_set_client
utf8
SET @@character_set_client = utf8mb4;
SELECT @@character_set_client;
@@character_set_client
utf8mb4
SET @@character_set_client = ucs2;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'ucs2'
SET @@character_set_client = utf16;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf16'
SET @@character_set_client = utf32;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf32'
SET @@character_set_client = cp866;
SELECT @@character_set_client;
@@character_set_client
......@@ -422,7 +430,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_010_10-------------------------#'
SET @@character_set_client = abc;
ERROR 42000: Unknown character set: 'abc'
......
......@@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_011_10-------------------------#'
SET @@character_set_connection = abc;
ERROR 42000: Unknown character set: 'abc'
......
......@@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_012_10-------------------------#'
SET @@character_set_database = "grek";
ERROR 42000: Unknown character set: 'grek'
......
......@@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_008_10-------------------------#'
SET @@character_set_filesystem = abc;
ERROR 42000: Unknown character set: 'abc'
......
......@@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -2776,7 +2776,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
sql_field->interval_list);
List_iterator<String> int_it(sql_field->interval_list);
String conv, *tmp;
char comma_buf[2];
char comma_buf[4]; /* 4 bytes for utf32 */
int comma_length= cs->cset->wc_mb(cs, ',', (uchar*) comma_buf,
(uchar*) comma_buf +
sizeof(comma_buf));
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment