Commit e56caa38 authored by Alexander Barkov's avatar Alexander Barkov

Bug#11752408 - 43593: DUMP/BACKUP/RESTORE/UPGRADE TOOLS FAILS BECAUSE OF UTF8_GENERAL_CI

Introducing new collations:
utf8_general_mysql500_ci and ucs2_general_mysql500_ci,
to reproduce behaviour of utf8_general_ci and ucs2_general_ci
from mysql-5.1.23 (and earlier).

The collations are added to simplify upgrade from mysql-5.1.23 and earlier.

Note: The patch does not make new server start over old data automatically.
Some manual upgrade procedures are assumed.

Paul: please get in touch with me to discuss upgrade procedures
when documenting this bug.

modified:
  include/m_ctype.h
  mysql-test/r/ctype_utf8.result
  mysql-test/t/ctype_utf8.test
  mysys/charset-def.c
  strings/ctype-ucs2.c
  strings/ctype-utf8.c
parent 429fdb3b
......@@ -50,6 +50,8 @@ typedef struct unicase_info_st
extern MY_UNICASE_INFO *my_unicase_default[256];
extern MY_UNICASE_INFO *my_unicase_turkish[256];
extern MY_UNICASE_INFO *my_unicase_mysql500[256];
typedef struct uni_ctype_st
{
......@@ -311,11 +313,13 @@ extern CHARSET_INFO my_charset_tis620_bin;
extern CHARSET_INFO my_charset_ucs2_general_ci;
extern CHARSET_INFO my_charset_ucs2_bin;
extern CHARSET_INFO my_charset_ucs2_unicode_ci;
extern CHARSET_INFO my_charset_ucs2_general_mysql500_ci;
extern CHARSET_INFO my_charset_ujis_japanese_ci;
extern CHARSET_INFO my_charset_ujis_bin;
extern CHARSET_INFO my_charset_utf8_general_ci;
extern CHARSET_INFO my_charset_utf8_unicode_ci;
extern CHARSET_INFO my_charset_utf8_bin;
extern CHARSET_INFO my_charset_utf8_general_mysql500_ci;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
......
......@@ -1928,4 +1928,28 @@ D120
SELECT HEX(INSERT(_utf8 0xD18FD18E, 2, 1, 0x20));
HEX(INSERT(_utf8 0xD18FD18E, 2, 1, 0x20))
D120D18E
#
# Bug#11752408 - 43593: DUMP/BACKUP/RESTORE/UPGRADE TOOLS FAILS BECAUSE OF UTF8_GENERAL_CI
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_general_mysql500_ci);
INSERT INTO t1 VALUES ('a'),('r'),('s'),(_latin1 0xDF),(_latin1 0xF7),('t'),('z');
SELECT * FROM t1 ORDER BY a;
a
a
r
s
t
z
ß
÷
SELECT a, COUNT(*) FROM t1 GROUP BY a;
a COUNT(*)
a 1
r 1
s 1
t 1
z 1
ß 1
÷ 1
DROP TABLE t1;
End of 5.1 tests
......@@ -1484,5 +1484,13 @@ SELECT HEX(LPAD(_utf8 0xD18F, 3, 0x20));
SELECT HEX(INSERT(_utf8 0xD18F, 2, 1, 0x20));
SELECT HEX(INSERT(_utf8 0xD18FD18E, 2, 1, 0x20));
--echo #
--echo # Bug#11752408 - 43593: DUMP/BACKUP/RESTORE/UPGRADE TOOLS FAILS BECAUSE OF UTF8_GENERAL_CI
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_general_mysql500_ci);
INSERT INTO t1 VALUES ('a'),('r'),('s'),(_latin1 0xDF),(_latin1 0xF7),('t'),('z');
SELECT * FROM t1 ORDER BY a;
SELECT a, COUNT(*) FROM t1 GROUP BY a;
DROP TABLE t1;
--echo End of 5.1 tests
......@@ -132,6 +132,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_ucs2
add_compiled_collation(&my_charset_ucs2_general_ci);
add_compiled_collation(&my_charset_ucs2_bin);
add_compiled_collation(&my_charset_ucs2_general_mysql500_ci);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_ucs2_unicode_ci);
add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
......@@ -163,6 +164,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf8
add_compiled_collation(&my_charset_utf8_general_ci);
add_compiled_collation(&my_charset_utf8_bin);
add_compiled_collation(&my_charset_utf8_general_mysql500_ci);
#ifdef HAVE_UTF8_GENERAL_CS
add_compiled_collation(&my_charset_utf8_general_cs);
#endif
......
......@@ -1732,6 +1732,40 @@ CHARSET_INFO my_charset_ucs2_general_ci=
&my_collation_ucs2_general_ci_handler
};
CHARSET_INFO my_charset_ucs2_general_mysql500_ci=
{
159, 0, 0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */
"ucs2", /* cs name */
"ucs2_general_mysql500_ci", /* name */
"", /* comment */
NULL, /* tailoring */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_ucs2_handler,
&my_collation_ucs2_general_ci_handler
};
CHARSET_INFO my_charset_ucs2_bin=
{
90,0,0, /* number */
......
......@@ -177,6 +177,141 @@ static MY_UNICASE_INFO plane00[]={
};
/*
Almost similar to plane00, but maps sorting order
for U+00DF to 0x00DF instead of 0x0053.
*/
static MY_UNICASE_INFO plane00_mysql500[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
{0x0006,0x0006,0x0006}, {0x0007,0x0007,0x0007},
{0x0008,0x0008,0x0008}, {0x0009,0x0009,0x0009},
{0x000A,0x000A,0x000A}, {0x000B,0x000B,0x000B},
{0x000C,0x000C,0x000C}, {0x000D,0x000D,0x000D},
{0x000E,0x000E,0x000E}, {0x000F,0x000F,0x000F},
{0x0010,0x0010,0x0010}, {0x0011,0x0011,0x0011},
{0x0012,0x0012,0x0012}, {0x0013,0x0013,0x0013},
{0x0014,0x0014,0x0014}, {0x0015,0x0015,0x0015},
{0x0016,0x0016,0x0016}, {0x0017,0x0017,0x0017},
{0x0018,0x0018,0x0018}, {0x0019,0x0019,0x0019},
{0x001A,0x001A,0x001A}, {0x001B,0x001B,0x001B},
{0x001C,0x001C,0x001C}, {0x001D,0x001D,0x001D},
{0x001E,0x001E,0x001E}, {0x001F,0x001F,0x001F},
{0x0020,0x0020,0x0020}, {0x0021,0x0021,0x0021},
{0x0022,0x0022,0x0022}, {0x0023,0x0023,0x0023},
{0x0024,0x0024,0x0024}, {0x0025,0x0025,0x0025},
{0x0026,0x0026,0x0026}, {0x0027,0x0027,0x0027},
{0x0028,0x0028,0x0028}, {0x0029,0x0029,0x0029},
{0x002A,0x002A,0x002A}, {0x002B,0x002B,0x002B},
{0x002C,0x002C,0x002C}, {0x002D,0x002D,0x002D},
{0x002E,0x002E,0x002E}, {0x002F,0x002F,0x002F},
{0x0030,0x0030,0x0030}, {0x0031,0x0031,0x0031},
{0x0032,0x0032,0x0032}, {0x0033,0x0033,0x0033},
{0x0034,0x0034,0x0034}, {0x0035,0x0035,0x0035},
{0x0036,0x0036,0x0036}, {0x0037,0x0037,0x0037},
{0x0038,0x0038,0x0038}, {0x0039,0x0039,0x0039},
{0x003A,0x003A,0x003A}, {0x003B,0x003B,0x003B},
{0x003C,0x003C,0x003C}, {0x003D,0x003D,0x003D},
{0x003E,0x003E,0x003E}, {0x003F,0x003F,0x003F},
{0x0040,0x0040,0x0040}, {0x0041,0x0061,0x0041},
{0x0042,0x0062,0x0042}, {0x0043,0x0063,0x0043},
{0x0044,0x0064,0x0044}, {0x0045,0x0065,0x0045},
{0x0046,0x0066,0x0046}, {0x0047,0x0067,0x0047},
{0x0048,0x0068,0x0048}, {0x0049,0x0069,0x0049},
{0x004A,0x006A,0x004A}, {0x004B,0x006B,0x004B},
{0x004C,0x006C,0x004C}, {0x004D,0x006D,0x004D},
{0x004E,0x006E,0x004E}, {0x004F,0x006F,0x004F},
{0x0050,0x0070,0x0050}, {0x0051,0x0071,0x0051},
{0x0052,0x0072,0x0052}, {0x0053,0x0073,0x0053},
{0x0054,0x0074,0x0054}, {0x0055,0x0075,0x0055},
{0x0056,0x0076,0x0056}, {0x0057,0x0077,0x0057},
{0x0058,0x0078,0x0058}, {0x0059,0x0079,0x0059},
{0x005A,0x007A,0x005A}, {0x005B,0x005B,0x005B},
{0x005C,0x005C,0x005C}, {0x005D,0x005D,0x005D},
{0x005E,0x005E,0x005E}, {0x005F,0x005F,0x005F},
{0x0060,0x0060,0x0060}, {0x0041,0x0061,0x0041},
{0x0042,0x0062,0x0042}, {0x0043,0x0063,0x0043},
{0x0044,0x0064,0x0044}, {0x0045,0x0065,0x0045},
{0x0046,0x0066,0x0046}, {0x0047,0x0067,0x0047},
{0x0048,0x0068,0x0048}, {0x0049,0x0069,0x0049},
{0x004A,0x006A,0x004A}, {0x004B,0x006B,0x004B},
{0x004C,0x006C,0x004C}, {0x004D,0x006D,0x004D},
{0x004E,0x006E,0x004E}, {0x004F,0x006F,0x004F},
{0x0050,0x0070,0x0050}, {0x0051,0x0071,0x0051},
{0x0052,0x0072,0x0052}, {0x0053,0x0073,0x0053},
{0x0054,0x0074,0x0054}, {0x0055,0x0075,0x0055},
{0x0056,0x0076,0x0056}, {0x0057,0x0077,0x0057},
{0x0058,0x0078,0x0058}, {0x0059,0x0079,0x0059},
{0x005A,0x007A,0x005A}, {0x007B,0x007B,0x007B},
{0x007C,0x007C,0x007C}, {0x007D,0x007D,0x007D},
{0x007E,0x007E,0x007E}, {0x007F,0x007F,0x007F},
{0x0080,0x0080,0x0080}, {0x0081,0x0081,0x0081},
{0x0082,0x0082,0x0082}, {0x0083,0x0083,0x0083},
{0x0084,0x0084,0x0084}, {0x0085,0x0085,0x0085},
{0x0086,0x0086,0x0086}, {0x0087,0x0087,0x0087},
{0x0088,0x0088,0x0088}, {0x0089,0x0089,0x0089},
{0x008A,0x008A,0x008A}, {0x008B,0x008B,0x008B},
{0x008C,0x008C,0x008C}, {0x008D,0x008D,0x008D},
{0x008E,0x008E,0x008E}, {0x008F,0x008F,0x008F},
{0x0090,0x0090,0x0090}, {0x0091,0x0091,0x0091},
{0x0092,0x0092,0x0092}, {0x0093,0x0093,0x0093},
{0x0094,0x0094,0x0094}, {0x0095,0x0095,0x0095},
{0x0096,0x0096,0x0096}, {0x0097,0x0097,0x0097},
{0x0098,0x0098,0x0098}, {0x0099,0x0099,0x0099},
{0x009A,0x009A,0x009A}, {0x009B,0x009B,0x009B},
{0x009C,0x009C,0x009C}, {0x009D,0x009D,0x009D},
{0x009E,0x009E,0x009E}, {0x009F,0x009F,0x009F},
{0x00A0,0x00A0,0x00A0}, {0x00A1,0x00A1,0x00A1},
{0x00A2,0x00A2,0x00A2}, {0x00A3,0x00A3,0x00A3},
{0x00A4,0x00A4,0x00A4}, {0x00A5,0x00A5,0x00A5},
{0x00A6,0x00A6,0x00A6}, {0x00A7,0x00A7,0x00A7},
{0x00A8,0x00A8,0x00A8}, {0x00A9,0x00A9,0x00A9},
{0x00AA,0x00AA,0x00AA}, {0x00AB,0x00AB,0x00AB},
{0x00AC,0x00AC,0x00AC}, {0x00AD,0x00AD,0x00AD},
{0x00AE,0x00AE,0x00AE}, {0x00AF,0x00AF,0x00AF},
{0x00B0,0x00B0,0x00B0}, {0x00B1,0x00B1,0x00B1},
{0x00B2,0x00B2,0x00B2}, {0x00B3,0x00B3,0x00B3},
{0x00B4,0x00B4,0x00B4}, {0x039C,0x00B5,0x039C},
{0x00B6,0x00B6,0x00B6}, {0x00B7,0x00B7,0x00B7},
{0x00B8,0x00B8,0x00B8}, {0x00B9,0x00B9,0x00B9},
{0x00BA,0x00BA,0x00BA}, {0x00BB,0x00BB,0x00BB},
{0x00BC,0x00BC,0x00BC}, {0x00BD,0x00BD,0x00BD},
{0x00BE,0x00BE,0x00BE}, {0x00BF,0x00BF,0x00BF},
{0x00C0,0x00E0,0x0041}, {0x00C1,0x00E1,0x0041},
{0x00C2,0x00E2,0x0041}, {0x00C3,0x00E3,0x0041},
{0x00C4,0x00E4,0x0041}, {0x00C5,0x00E5,0x0041},
{0x00C6,0x00E6,0x00C6}, {0x00C7,0x00E7,0x0043},
{0x00C8,0x00E8,0x0045}, {0x00C9,0x00E9,0x0045},
{0x00CA,0x00EA,0x0045}, {0x00CB,0x00EB,0x0045},
{0x00CC,0x00EC,0x0049}, {0x00CD,0x00ED,0x0049},
{0x00CE,0x00EE,0x0049}, {0x00CF,0x00EF,0x0049},
{0x00D0,0x00F0,0x00D0}, {0x00D1,0x00F1,0x004E},
{0x00D2,0x00F2,0x004F}, {0x00D3,0x00F3,0x004F},
{0x00D4,0x00F4,0x004F}, {0x00D5,0x00F5,0x004F},
{0x00D6,0x00F6,0x004F}, {0x00D7,0x00D7,0x00D7},
{0x00D8,0x00F8,0x00D8}, {0x00D9,0x00F9,0x0055},
{0x00DA,0x00FA,0x0055}, {0x00DB,0x00FB,0x0055},
{0x00DC,0x00FC,0x0055}, {0x00DD,0x00FD,0x0059},
{0x00DE,0x00FE,0x00DE}, {0x00DF,0x00DF,0x00DF},
{0x00C0,0x00E0,0x0041}, {0x00C1,0x00E1,0x0041},
{0x00C2,0x00E2,0x0041}, {0x00C3,0x00E3,0x0041},
{0x00C4,0x00E4,0x0041}, {0x00C5,0x00E5,0x0041},
{0x00C6,0x00E6,0x00C6}, {0x00C7,0x00E7,0x0043},
{0x00C8,0x00E8,0x0045}, {0x00C9,0x00E9,0x0045},
{0x00CA,0x00EA,0x0045}, {0x00CB,0x00EB,0x0045},
{0x00CC,0x00EC,0x0049}, {0x00CD,0x00ED,0x0049},
{0x00CE,0x00EE,0x0049}, {0x00CF,0x00EF,0x0049},
{0x00D0,0x00F0,0x00D0}, {0x00D1,0x00F1,0x004E},
{0x00D2,0x00F2,0x004F}, {0x00D3,0x00F3,0x004F},
{0x00D4,0x00F4,0x004F}, {0x00D5,0x00F5,0x004F},
{0x00D6,0x00F6,0x004F}, {0x00F7,0x00F7,0x00F7},
{0x00D8,0x00F8,0x00D8}, {0x00D9,0x00F9,0x0055},
{0x00DA,0x00FA,0x0055}, {0x00DB,0x00FB,0x0055},
{0x00DC,0x00FC,0x0055}, {0x00DD,0x00FD,0x0059},
{0x00DE,0x00FE,0x00DE}, {0x0178,0x00FF,0x0059}
};
static MY_UNICASE_INFO plane01[]={
{0x0100,0x0101,0x0041}, {0x0100,0x0101,0x0041},
......@@ -1525,6 +1660,47 @@ MY_UNICASE_INFO *my_unicase_default[256]={
};
/*
Reproduce old utf8_general_ci behaviour before we fixed Bug#27877.
*/
MY_UNICASE_INFO *my_unicase_mysql500[256]={
plane00_mysql500,
plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F,
NULL, plane21, NULL, NULL, plane24, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, planeFF
};
/*
Turkish lower/upper mapping:
1. LOWER(0x0049 LATIN CAPITAL LETTER I) ->
......@@ -2720,6 +2896,39 @@ CHARSET_INFO my_charset_utf8_general_ci=
};
CHARSET_INFO my_charset_utf8_general_mysql500_ci=
{
223,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */
"utf8", /* cs name */
"utf8_general_mysql500_ci", /* name */
"", /* comment */
NULL, /* tailoring */
ctype_utf8, /* ctype */
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
3, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_utf8_handler,
&my_collation_ci_handler
};
CHARSET_INFO my_charset_utf8_bin=
{
83,0,0, /* number */
......@@ -4231,6 +4440,7 @@ CHARSET_INFO my_charset_filename=
&my_collation_filename_handler
};
#ifdef MY_TEST_UTF8
#include <stdio.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment