Commit 5f6c6309 authored by Alexander Barkov's avatar Alexander Barkov

WL#4583 Case conversion in Asian character sets

  modified:
  include/m_ctype.h
  - Changing type for tolower/toupper members, to store values >= 0xFFFF.
  - Adding function prototypes

  mysql-test/r/ctype_big5.result
  mysql-test/r/ctype_cp932_binlog_stm.result
  mysql-test/r/ctype_eucjpms.result*
  mysql-test/r/ctype_euckr.result
  mysql-test/r/ctype_gb2312.result
  mysql-test/r/ctype_gbk.result
  mysql-test/r/ctype_sjis.result
  mysql-test/r/ctype_ujis.result
  mysql-test/t/ctype_big5.test
  mysql-test/t/ctype_cp932_binlog_stm.test
  mysql-test/t/ctype_eucjpms.test
  mysql-test/t/ctype_euckr.test
  mysql-test/t/ctype_gb2312.test
  mysql-test/t/ctype_gbk.test
  mysql-test/t/ctype_sjis.test
  mysql-test/t/ctype_ujis.test
  -  Adding tests

  strings/ctype-big5.c
  strings/ctype-cp932.c
  strings/ctype-euc_kr.c
  strings/ctype-eucjpms.c
  strings/ctype-gb2312.c
  strings/ctype-gbk.c
  strings/ctype-sjis.c
  - Adding upper/lower case conversion data

  strings/ctype-mb.c
  - Adding handling of upper/lower conversion for multi-byte characters.

  strings/ctype-ujis.c
  - Implementing shared upper/lower conversion
    functions  for ujis and eucjpms
  - Adding upper/lower case conversion data for ujis
parent 9c820e63
...@@ -40,9 +40,9 @@ extern "C" { ...@@ -40,9 +40,9 @@ extern "C" {
typedef struct unicase_info_st typedef struct unicase_info_st
{ {
uint16 toupper; uint32 toupper;
uint16 tolower; uint32 tolower;
uint16 sort; uint32 sort;
} MY_UNICASE_INFO; } MY_UNICASE_INFO;
...@@ -439,6 +439,14 @@ extern size_t my_caseup_mb(CHARSET_INFO *, char *src, size_t srclen, ...@@ -439,6 +439,14 @@ extern size_t my_caseup_mb(CHARSET_INFO *, char *src, size_t srclen,
char *dst, size_t dstlen); char *dst, size_t dstlen);
extern size_t my_casedn_mb(CHARSET_INFO *, char *src, size_t srclen, extern size_t my_casedn_mb(CHARSET_INFO *, char *src, size_t srclen,
char *dst, size_t dstlen); char *dst, size_t dstlen);
extern size_t my_caseup_mb_varlen(CHARSET_INFO *, char *src, size_t srclen,
char *dst, size_t dstlen);
extern size_t my_casedn_mb_varlen(CHARSET_INFO *, char *src, size_t srclen,
char *dst, size_t dstlen);
extern size_t my_caseup_ujis(CHARSET_INFO *, char *src, size_t srclen,
char *dst, size_t dstlen);
extern size_t my_casedn_ujis(CHARSET_INFO *, char *src, size_t srclen,
char *dst, size_t dstlen);
extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *); extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);
int my_wildcmp_mb(CHARSET_INFO *, int my_wildcmp_mb(CHARSET_INFO *,
......
...@@ -277,3 +277,281 @@ hex(a) ...@@ -277,3 +277,281 @@ hex(a)
EE00 EE00
drop table t1; drop table t1;
End of 5.0 tests End of 5.0 tests
#
# Start of 5.5 tests
#
#
# Testing WL#4583 Case conversion in Asian character sets
#
SET NAMES utf8;
SET collation_connection=big5_chinese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`code` varchar(8) DEFAULT NULL,
`a` varchar(1) CHARACTER SET big5 NOT NULL DEFAULT ''
) ENGINE=MyISAM DEFAULT CHARSET=latin1
SELECT COUNT(*) FROM t1;
COUNT(*)
28672
UPDATE t1 SET a=unhex(code) ORDER BY code;
Warnings:
Warning 1366 Incorrect string value: '\x80 ' for column 'a' at row 1
Warning 1366 Incorrect string value: '\x80!' for column 'a' at row 2
Warning 1366 Incorrect string value: '\x80"' for column 'a' at row 3
Warning 1366 Incorrect string value: '\x80#' for column 'a' at row 4
Warning 1366 Incorrect string value: '\x80$' for column 'a' at row 5
Warning 1366 Incorrect string value: '\x80%' for column 'a' at row 6
Warning 1366 Incorrect string value: '\x80&' for column 'a' at row 7
Warning 1366 Incorrect string value: '\x80'' for column 'a' at row 8
Warning 1366 Incorrect string value: '\x80(' for column 'a' at row 9
Warning 1366 Incorrect string value: '\x80)' for column 'a' at row 10
Warning 1366 Incorrect string value: '\x80*' for column 'a' at row 11
Warning 1366 Incorrect string value: '\x80+' for column 'a' at row 12
Warning 1366 Incorrect string value: '\x80,' for column 'a' at row 13
Warning 1366 Incorrect string value: '\x80-' for column 'a' at row 14
Warning 1366 Incorrect string value: '\x80.' for column 'a' at row 15
Warning 1366 Incorrect string value: '\x80/' for column 'a' at row 16
Warning 1366 Incorrect string value: '\x800' for column 'a' at row 17
Warning 1366 Incorrect string value: '\x801' for column 'a' at row 18
Warning 1366 Incorrect string value: '\x802' for column 'a' at row 19
Warning 1366 Incorrect string value: '\x803' for column 'a' at row 20
Warning 1366 Incorrect string value: '\x804' for column 'a' at row 21
Warning 1366 Incorrect string value: '\x805' for column 'a' at row 22
Warning 1366 Incorrect string value: '\x806' for column 'a' at row 23
Warning 1366 Incorrect string value: '\x807' for column 'a' at row 24
Warning 1366 Incorrect string value: '\x808' for column 'a' at row 25
Warning 1366 Incorrect string value: '\x809' for column 'a' at row 26
Warning 1366 Incorrect string value: '\x80:' for column 'a' at row 27
Warning 1366 Incorrect string value: '\x80;' for column 'a' at row 28
Warning 1366 Incorrect string value: '\x80<' for column 'a' at row 29
Warning 1366 Incorrect string value: '\x80=' for column 'a' at row 30
Warning 1366 Incorrect string value: '\x80>' for column 'a' at row 31
Warning 1366 Incorrect string value: '\x80?' for column 'a' at row 32
Warning 1366 Incorrect string value: '\x80@' for column 'a' at row 33
Warning 1366 Incorrect string value: '\x80A' for column 'a' at row 34
Warning 1366 Incorrect string value: '\x80B' for column 'a' at row 35
Warning 1366 Incorrect string value: '\x80C' for column 'a' at row 36
Warning 1366 Incorrect string value: '\x80D' for column 'a' at row 37
Warning 1366 Incorrect string value: '\x80E' for column 'a' at row 38
Warning 1366 Incorrect string value: '\x80F' for column 'a' at row 39
Warning 1366 Incorrect string value: '\x80G' for column 'a' at row 40
Warning 1366 Incorrect string value: '\x80H' for column 'a' at row 41
Warning 1366 Incorrect string value: '\x80I' for column 'a' at row 42
Warning 1366 Incorrect string value: '\x80J' for column 'a' at row 43
Warning 1366 Incorrect string value: '\x80K' for column 'a' at row 44
Warning 1366 Incorrect string value: '\x80L' for column 'a' at row 45
Warning 1366 Incorrect string value: '\x80M' for column 'a' at row 46
Warning 1366 Incorrect string value: '\x80N' for column 'a' at row 47
Warning 1366 Incorrect string value: '\x80O' for column 'a' at row 48
Warning 1366 Incorrect string value: '\x80P' for column 'a' at row 49
Warning 1366 Incorrect string value: '\x80Q' for column 'a' at row 50
Warning 1366 Incorrect string value: '\x80R' for column 'a' at row 51
Warning 1366 Incorrect string value: '\x80S' for column 'a' at row 52
Warning 1366 Incorrect string value: '\x80T' for column 'a' at row 53
Warning 1366 Incorrect string value: '\x80U' for column 'a' at row 54
Warning 1366 Incorrect string value: '\x80V' for column 'a' at row 55
Warning 1366 Incorrect string value: '\x80W' for column 'a' at row 56
Warning 1366 Incorrect string value: '\x80X' for column 'a' at row 57
Warning 1366 Incorrect string value: '\x80Y' for column 'a' at row 58
Warning 1366 Incorrect string value: '\x80Z' for column 'a' at row 59
Warning 1366 Incorrect string value: '\x80[' for column 'a' at row 60
Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1 WHERE a<>'';
COUNT(*)
13973
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
code hex(upper(a)) hex(lower(a)) a upper(a) lower(a)
A2CF A2CF A2E9 A A a
A2D0 A2D0 A2EA B B b
A2D1 A2D1 A2EB C C c
A2D2 A2D2 A2EC D D d
A2D3 A2D3 A2ED E E e
A2D4 A2D4 A2EE F F f
A2D5 A2D5 A2EF G G g
A2D6 A2D6 A2F0 H H h
A2D7 A2D7 A2F1 I I i
A2D8 A2D8 A2F2 J J j
A2D9 A2D9 A2F3 K K k
A2DA A2DA A2F4 L L l
A2DB A2DB A2F5 M M m
A2DC A2DC A2F6 N N n
A2DD A2DD A2F7 O O o
A2DE A2DE A2F8 P P p
A2DF A2DF A2F9 Q Q q
A2E0 A2E0 A2FA R R r
A2E1 A2E1 A2FB S S s
A2E2 A2E2 A2FC T T t
A2E3 A2E3 A2FD U U u
A2E4 A2E4 A2FE V V v
A2E5 A2E5 A340 W W w
A2E6 A2E6 A341 X X x
A2E7 A2E7 A342 Y Y y
A2E8 A2E8 A343 Z Z z
A2E9 A2CF A2E9 a A a
A2EA A2D0 A2EA b B b
A2EB A2D1 A2EB c C c
A2EC A2D2 A2EC d D d
A2ED A2D3 A2ED e E e
A2EE A2D4 A2EE f F f
A2EF A2D5 A2EF g G g
A2F0 A2D6 A2F0 h H h
A2F1 A2D7 A2F1 i I i
A2F2 A2D8 A2F2 j J j
A2F3 A2D9 A2F3 k K k
A2F4 A2DA A2F4 l L l
A2F5 A2DB A2F5 m M m
A2F6 A2DC A2F6 n N n
A2F7 A2DD A2F7 o O o
A2F8 A2DE A2F8 p P p
A2F9 A2DF A2F9 q Q q
A2FA A2E0 A2FA r R r
A2FB A2E1 A2FB s S s
A2FC A2E2 A2FC t T t
A2FD A2E3 A2FD u U u
A2FE A2E4 A2FE v V v
A340 A2E5 A340 w W w
A341 A2E6 A341 x X x
A342 A2E7 A342 y Y y
A343 A2E8 A343 z Z z
A344 A344 A35C Α Α α
A345 A345 A35D Β Β β
A346 A346 A35E Γ Γ γ
A347 A347 A35F Δ Δ δ
A348 A348 A360 Ε Ε ε
A349 A349 A361 Ζ Ζ ζ
A34A A34A A362 Η Η η
A34B A34B A363 Θ Θ θ
A34C A34C A364 Ι Ι ι
A34D A34D A365 Κ Κ κ
A34E A34E A366 Λ Λ λ
A34F A34F A367 Μ Μ μ
A350 A350 A368 Ν Ν ν
A351 A351 A369 Ξ Ξ ξ
A352 A352 A36A Ο Ο ο
A353 A353 A36B Π Π π
A354 A354 A36C Ρ Ρ ρ
A355 A355 A36D Σ Σ σ
A356 A356 A36E Τ Τ τ
A357 A357 A36F Υ Υ υ
A358 A358 A370 Φ Φ φ
A359 A359 A371 Χ Χ χ
A35A A35A A372 Ψ Ψ ψ
A35B A35B A373 Ω Ω ω
A35C A344 A35C α Α α
A35D A345 A35D β Β β
A35E A346 A35E γ Γ γ
A35F A347 A35F δ Δ δ
A360 A348 A360 ε Ε ε
A361 A349 A361 ζ Ζ ζ
A362 A34A A362 η Η η
A363 A34B A363 θ Θ θ
A364 A34C A364 ι Ι ι
A365 A34D A365 κ Κ κ
A366 A34E A366 λ Λ λ
A367 A34F A367 μ Μ μ
A368 A350 A368 ν Ν ν
A369 A351 A369 ξ Ξ ξ
A36A A352 A36A ο Ο ο
A36B A353 A36B π Π π
A36C A354 A36C ρ Ρ ρ
A36D A355 A36D σ Σ σ
A36E A356 A36E τ Τ τ
A36F A357 A36F υ Υ υ
A370 A358 A370 φ Φ φ
A371 A359 A371 χ Χ χ
A372 A35A A372 ψ Ψ ψ
A373 A35B A373 ω Ω ω
C7B1 C7B1 C7CC Д Д д
C7B2 C7B2 C7CD Е Е е
C7B3 C7B3 C7CE Ё Ё ё
C7B4 C7B4 C7CF Ж Ж ж
C7B5 C7B5 C7D0 З З з
C7B6 C7B6 C7D1 И И и
C7B7 C7B7 C7D2 Й Й й
C7B8 C7B8 C7D3 К К к
C7B9 C7B9 C7D4 Л Л л
C7BA C7BA C7D5 М М м
C7BB C7BB C7DC У У у
C7BC C7BC C7DD Ф Ф ф
C7BD C7BD C7DE Х Х х
C7BE C7BE C7DF Ц Ц ц
C7BF C7BF C7E0 Ч Ч ч
C7C0 C7C0 C7E1 Ш Ш ш
C7C1 C7C1 C7E2 Щ Щ щ
C7C2 C7C2 C7E3 Ъ Ъ ъ
C7C3 C7C3 C7E4 Ы Ы ы
C7C4 C7C4 C7E5 Ь Ь ь
C7C5 C7C5 C7E6 Э Э э
C7C6 C7C6 C7E7 Ю Ю ю
C7C7 C7C7 C7E8 Я Я я
C7CC C7B1 C7CC д Д д
C7CD C7B2 C7CD е Е е
C7CE C7B3 C7CE ё Ё ё
C7CF C7B4 C7CF ж Ж ж
C7D0 C7B5 C7D0 з З з
C7D1 C7B6 C7D1 и И и
C7D2 C7B7 C7D2 й Й й
C7D3 C7B8 C7D3 к К к
C7D4 C7B9 C7D4 л Л л
C7D5 C7BA C7D5 м М м
C7DC C7BB C7DC у У у
C7DD C7BC C7DD ф Ф ф
C7DE C7BD C7DE х Х х
C7DF C7BE C7DF ц Ц ц
C7E0 C7BF C7E0 ч Ч ч
C7E1 C7C0 C7E1 ш Ш ш
C7E2 C7C1 C7E2 щ Щ щ
C7E3 C7C2 C7E3 ъ Ъ ъ
C7E4 C7C3 C7E4 ы Ы ы
C7E5 C7C4 C7E5 ь Ь ь
C7E6 C7C5 C7E6 э Э э
C7E7 C7C6 C7E7 ю Ю ю
C7E8 C7C7 C7E8 я Я я
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
code a
A2B9 Ⅰ
A2BA Ⅱ
A2BB Ⅲ
A2BC Ⅳ
A2BD Ⅴ
A2BE Ⅵ
A2BF Ⅶ
A2C0 Ⅷ
A2C1 Ⅸ
A2C2 Ⅹ
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
code a
C7C8 а
C7C9 б
C7CA в
C7CB г
C7D6 н
C7D7 о
C7D8 п
C7D9 р
C7DA с
C7DB т
DROP TABLE t1;
#
# End of 5.5 tests
#
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -84,3 +84,64 @@ select hex(a) from t1; ...@@ -84,3 +84,64 @@ select hex(a) from t1;
drop table t1; drop table t1;
--echo End of 5.0 tests --echo End of 5.0 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=big5_chinese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all values [80..FF][20..FF]
# Expected valid big5 codes: [A1..F9][40..7E,A1..FE] (89x157=13973)
#
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
SELECT COUNT(*) FROM t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
#
# Make sure all possible conversion happened
#
# Expect U+2160 to U+2169 ROMAN NUMERAL ONE to ROMAN NUMERAL TEN
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect U+0430 to U+0433 CYRILLIC SMALL LETTER A, BE, VE, GHE
# Expect U+043D to U+0442 CYRILLIC SMALL LETTER EN, O, PE, ER, ES, TE
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -42,3 +42,79 @@ SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a); ...@@ -42,3 +42,79 @@ SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a);
DROP TABLE t1; DROP TABLE t1;
--echo End of 5.1 tests --echo End of 5.1 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=cp932_japanese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all codes [80..FF][20..FF]
# excluding Half Width Kana [A1..DF]
# Expected valid cp932 multibyte codes:
# [81..9F,E0..FC][40..7E,80..fC] (60x188=11280 characters)
#
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (head NOT BETWEEN 'A1' AND 'DF')
AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
#
# Populate t1 with Half Width Kana [A1..DF]
#
INSERT t1 (code) SELECT head FROM head
WHERE (head BETWEEN 'A1' AND 'DF')
ORDER BY head;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1
WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a))
ORDER BY code;
#
# Make sure all possible conversion happened
#
# Expect U+212B ANGSTROM SIGN
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect no results
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -381,3 +381,89 @@ select hex(convert(_eucjpms 0xA5FE41 using ucs2)); ...@@ -381,3 +381,89 @@ select hex(convert(_eucjpms 0xA5FE41 using ucs2));
# the next character, which is a single byte character 0x41. # the next character, which is a single byte character 0x41.
select hex(convert(_eucjpms 0x8FABF841 using ucs2)); select hex(convert(_eucjpms 0x8FABF841 using ucs2));
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=eucjpms_japanese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all eucjpms codes.
#
CREATE TABLE t1 AS SELECT 'XXXXXX' AS code, ' ' AS a LIMIT 0;
#
# Pupulate JIS-X-0201 range (Half Width Kana)
# Expected valid code range: [8E][A1..DF] (1x63 characters)
#
INSERT INTO t1 (code) SELECT concat('8E', head) FROM head
WHERE (head BETWEEN 'A1' AND 'DF') ORDER BY head;
#
# Populate JIS-X-0208 range
# Expected valid codes: [A1..FE][A1..FE] (94x94=8836 characters)
#
INSERT INTO t1 (code) SELECT concat(head, tail)
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (head NOT BETWEEN '8E' AND '8F')
AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
#
# Populate JIS-X-0212 range
# Expected valid codes [8F][A1..FE][A1..FE] (1x94x94=8836 characters)
#
INSERT INTO t1 (code) SELECT concat('8F', head, tail)
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
SELECT * FROM t1 WHERE CHAR_LENGTH(a)=2;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a)) ORDER BY code;
#
# Make sure all possible conversion happened
#
# Expect U+0122 LATIN CAPITAL LETTER G WITH CEDILLA
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect U+00F0 LATIN SMALL LETTER ETH
# Expect U+01F5 LATIN SMALL LETTER G WITH ACUTE
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -107,3 +107,68 @@ SELECT s, hex(a), hex(u), hex(a2) FROM t2 ORDER BY s; ...@@ -107,3 +107,68 @@ SELECT s, hex(a), hex(u), hex(a2) FROM t2 ORDER BY s;
DROP TABLE t1, t2; DROP TABLE t1, t2;
--echo End of 5.4 tests --echo End of 5.4 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=euckr_korean_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all codes [80..FF][20..FF]
# Expected valid euckr codes: [81..FE][41..5A,61..7A,81..FE]
#
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
#
# Make sure all possible conversion happened
#
# Expect U+212B ANGSTROM SIGN
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect U+0111 LATIN SMALL LETTER D WITH STROKE
# Expect U+24D0 to U+24E9 CIRCLED LATIN SMALL LETTER A to Z
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -33,3 +33,94 @@ SELECT hex(a) FROM t1 ORDER BY a; ...@@ -33,3 +33,94 @@ SELECT hex(a) FROM t1 ORDER BY a;
DROP TABLE t1; DROP TABLE t1;
# End of 4.1 tests # End of 4.1 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=gb2312_chinese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all codes [80..FF][20..FF]
# Expected valid gb2312 codes [A1..F7][A1..FE]
#
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
#
# Make sure all possible conversion happened
#
# Expect U+2160 to U+216B ROMAN NUMERAL ONE to ROMAN NUMERAL TWELVE
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect
# U+00E0 LATIN SMALL LETTER A WITH GRAVE
# U+00E1 LATIN SMALL LETTER A WITH ACUTE
# U+00E8 LATIN SMALL LETTER E WITH GRAVE
# U+00E9 LATIN SMALL LETTER E WITH ACUTE
# U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
# U+00EC LATIN SMALL LETTER I WITH GRAVE
# U+00ED LATIN SMALL LETTER I WITH ACUTE
# U+00F2 LATIN SMALL LETTER O WITH GRAVE
# U+00F3 LATIN SMALL LETTER O WITH ACUTE
# U+00F9 LATIN SMALL LETTER U WITH GRAVE
# U+00FA LATIN SMALL LETTER U WITH ACUTE
# U+00FC LATIN SMALL LETTER U WITH DIAERESIS
# U+0101 LATIN SMALL LETTER A WITH MACRON
# U+0113 LATIN SMALL LETTER E WITH MACRON
# U+011B LATIN SMALL LETTER E WITH CARON
# U+012B LATIN SMALL LETTER I WITH MACRON
# U+0144 LATIN SMALL LETTER N WITH ACUTE
# U+0148 LATIN SMALL LETTER N WITH CARON
# U+014D LATIN SMALL LETTER O WITH MACRON
# U+016B LATIN SMALL LETTER U WITH MACRON
# U+01CE LATIN SMALL LETTER A WITH CARON
# U+01D0 LATIN SMALL LETTER I WITH CARON
# U+01D2 LATIN SMALL LETTER O WITH CARON
# U+01D4 LATIN SMALL LETTER U WITH CARON
# U+01D6 LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
# U+01D8 LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
# U+01DA LATIN SMALL LETTER U WITH DIAERESIS AND CARON
# U+01DC LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -68,3 +68,94 @@ SELECT b FROM t1 GROUP BY 1 LIMIT 1 INTO @nullll; ...@@ -68,3 +68,94 @@ SELECT b FROM t1 GROUP BY 1 LIMIT 1 INTO @nullll;
DROP TABLES t1; DROP TABLES t1;
--echo End of 5.0 tests --echo End of 5.0 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=gbk_chinese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all codes [80..FF][20..FF]
# Expected valid gbk codes [81..FE][40..7E,80..FE]
#
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
#
# Make sure all possible conversion happened
#
# Expect U+216A to U+216B ROMAN NUMERAL ELEVEN to ROMAN TWELVE
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect
# U+00E0 LATIN SMALL LETTER A WITH GRAVE
# U+00E1 LATIN SMALL LETTER A WITH ACUTE
# U+00E8 LATIN SMALL LETTER E WITH GRAVE
# U+00E9 LATIN SMALL LETTER E WITH ACUTE
# U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
# U+00EC LATIN SMALL LETTER I WITH GRAVE
# U+00ED LATIN SMALL LETTER I WITH ACUTE
# U+00F2 LATIN SMALL LETTER O WITH GRAVE
# U+00F3 LATIN SMALL LETTER O WITH ACUTE
# U+00F9 LATIN SMALL LETTER U WITH GRAVE
# U+00FA LATIN SMALL LETTER U WITH ACUTE
# U+00FC LATIN SMALL LETTER U WITH DIAERESIS
# U+0101 LATIN SMALL LETTER A WITH MACRON
# U+0113 LATIN SMALL LETTER E WITH MACRON
# U+011B LATIN SMALL LETTER E WITH CARON
# U+012B LATIN SMALL LETTER I WITH MACRON
# U+0144 LATIN SMALL LETTER N WITH ACUTE
# U+0148 LATIN SMALL LETTER N WITH CARON
# U+014D LATIN SMALL LETTER O WITH MACRON
# U+016B LATIN SMALL LETTER U WITH MACRON
# U+01CE LATIN SMALL LETTER A WITH CARON
# U+01D0 LATIN SMALL LETTER I WITH CARON
# U+01D2 LATIN SMALL LETTER O WITH CARON
# U+01D4 LATIN SMALL LETTER U WITH CARON
# U+01D6 LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
# U+01D8 LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
# U+01DA LATIN SMALL LETTER U WITH DIAERESIS AND CARON
# U+01DC LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -93,3 +93,78 @@ SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a); ...@@ -93,3 +93,78 @@ SELECT hex(a), hex(lower(a)), hex(upper(a)) FROM t1 ORDER BY binary(a);
DROP TABLE t1; DROP TABLE t1;
--echo # End of 5.1 tests --echo # End of 5.1 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=sjis_japanese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all codes [80..FF][20..FF]
# excluding Half Width Kana [A1..DF]
# Expected valid sjis codes:
# [81..9F,E0..FC][40..7E,80..fC] (60x188=11280 characters)
#
CREATE TABLE t1 AS
SELECT concat(head, tail) AS code, ' ' AS a
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (head NOT BETWEEN 'A1' AND 'DF')
AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
#
# Populate Half Width Kana: [A1..DF]
#
INSERT t1 (code) SELECT head FROM head WHERE (head BETWEEN 'A1' AND 'DF');
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a)
FROM t1
WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a))
ORDER BY code;
#
# Make sure all possible conversion happened
#
# Expect U+212B ANGSTROM SIGN
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# Expect no results
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -1212,3 +1212,88 @@ DROP TABLE t2; ...@@ -1212,3 +1212,88 @@ DROP TABLE t2;
set names default; set names default;
set character_set_database=default; set character_set_database=default;
set character_set_server=default; set character_set_server=default;
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # Testing WL#4583 Case conversion in Asian character sets
--echo #
#
# Populate t1 with all hex digits
#
SET NAMES utf8;
SET collation_connection=ujis_japanese_ci;
CREATE TABLE t1 (b VARCHAR(2));
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.b, b2.b) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.b, b2.b) AS tail FROM t1 b1, t1 b2;
DROP TABLE t1;
#
# Populate table t1 with all ujis codes.
#
#
CREATE TABLE t1 AS SELECT 'XXXXXX' AS code, ' ' AS a LIMIT 0;
#
# Pupulate JIS-X-0201 range (Half Width Kana)
# Valid characters: [8E][A1-DF]
#
INSERT INTO t1 (code) SELECT concat('8E', head) FROM head
WHERE (head BETWEEN 'A1' AND 'DF') ORDER BY head;
#
# Populate JIS-X-0208 range
# Expected valid range: [A1..FE][A1..FE]
#
INSERT INTO t1 (code) SELECT concat(head, tail)
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (head NOT BETWEEN '8E' AND '8F')
AND (tail BETWEEN '20' AND 'FF')
ORDER BY head, tail;
#
# Populate JIS-X-0212 range
# Expected valid range: [8F][A1..FE][A1..FE]
#
INSERT INTO t1 (code) SELECT concat('8F', head, tail)
FROM head, tail
WHERE (head BETWEEN '80' AND 'FF') AND (tail BETWEEN '80' AND 'FF')
ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
#
# Display all characters that have upper or lower case mapping.
#
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a)) ORDER BY code;
#
# Make sure all possible conversion happened
#
# Expect U+0122 LATIN CAPITAL LETTER G WITH CEDILLA
#
SELECT * FROM t1
WHERE HEX(CAST(LOWER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(LOWER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
#
# U+00F0 LATIN SMALL LETTER ETH
# U+01F5 LATIN SMALL LETTER G WITH ACUTE
#
SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment