Commit 0c61e7ab authored by Alexander Barkov's avatar Alexander Barkov

WL#3090 Japanese Character Set adjustments

added:
  @ mysql-test/include/ctype_utf8_table.inc
    Adding a share file to populate all utf8 values [U+0000..U+FFFF]
modified:
  @ include/m_ctype.h
    Introducing MB2 and MY_PUT_MB2 macros

  @ mysql-test/r/ctype_cp932_binlog_stm.result
  @ mysql-test/r/ctype_eucjpms.result
  @ mysql-test/r/ctype_sjis.result
  @ mysql-test/r/ctype_ujis.result
  @ mysql-test/t/ctype_cp932_binlog_stm.test
  @ mysql-test/t/ctype_eucjpms.test
  @ mysql-test/t/ctype_sjis.test
  @ mysql-test/t/ctype_ujis.test
    Adding test

  @ strings/ctype-cp932.c
  @ strings/ctype-eucjpms.c
  @ strings/ctype-sjis.c
  @ strings/ctype-ujis.c
    Adding new functions using Big-Table approach.
parent 2fa7509b
......@@ -38,6 +38,23 @@ extern "C" {
#define my_wc_t ulong
/*
On i386 we store Unicode->CS conversion tables for
some character sets using Big-endian order,
to copy two bytes at onces.
This gives some performance improvement.
*/
#ifdef __i386__
#define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8))
#define MY_PUT_MB2(s, code) { *((uint16*)(s))= (code); }
#else
#define MB2(x) (x)
#define MY_PUT_MB2(s, code) { s[0]= code >> 8; s[1]= code & 0xFF; }
#endif
typedef struct unicase_info_st
{
uint32 toupper;
......
CREATE TABLE t1 (a CHAR(1)) CHARACTER SET utf8;
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
#
# Populate tables head and tail with values '00'-'FF'
#
CREATE TEMPORARY TABLE head AS SELECT concat(b1.a, b2.a) AS head FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.a, b2.a) AS tail FROM t1 b1, t1 b2;
CREATE TEMPORARY TABLE middle AS SELECT concat(b1.a, b2.a) AS middle FROM t1 b1, t1 b2;
DROP TABLE t1;
CREATE TABLE t1 (a varchar(1)) CHARACTER SET utf8;
#
# Populate single byte characters
#
INSERT INTO t1 SELECT UNHEX(head)
FROM head WHERE (head BETWEEN '00' AND '7F') ORDER BY head;
#
# Populate 2-byte byte characters: U+80..U+7FF: [C2-DF][80-BF]
#
INSERT INTO t1
SELECT UNHEX(CONCAT(head,tail))
FROM head, tail
WHERE (head BETWEEN 'C2' AND 'DF') AND (tail BETWEEN '80' AND 'BF')
ORDER BY head, tail;
#
# Populate 3-byte characters: U+800..U+FFFF: [E0-EF][80-BF][80-BF]
# excluding overlong [E0][80-9F][80-BF]
#
INSERT INTO t1
SELECT UNHEX(CONCAT(head, middle, tail))
FROM head, middle, tail
WHERE (head BETWEEN 'E0' AND 'EF')
AND (middle BETWEEN '80' AND 'BF')
AND (tail BETWEEN '80' AND 'BF')
AND NOT (head='E0' AND middle BETWEEN '80' AND '9F')
ORDER BY head, middle, tail;
SELECT count(*) FROM t1;
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -109,9 +109,24 @@ SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test cp932->Unicode conversion
--echo #
SELECT HEX(a), HEX(CONVERT(a USING utf8)) as b FROM t1
WHERE a<>'' HAVING b<>'3F' ORDER BY code;
DROP TABLE t1;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test Unicode->cp932 conversion
--echo #
--source include/ctype_utf8_table.inc
SELECT HEX(a), HEX(CONVERT(a using cp932)) as b FROM t1 HAVING b<>'3F' ORDER BY BINARY a;
DROP TABLE t1;
--echo #
......
......@@ -461,9 +461,26 @@ SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test sjis->Unicode conversion
--echo #
SELECT HEX(a), HEX(CONVERT(a USING utf8)) as b FROM t1
WHERE a<>'' HAVING b<>'3F' ORDER BY code;
DROP TABLE t1;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test Unicode->sjis conversion
--echo #
--source include/ctype_utf8_table.inc
SELECT HEX(a), HEX(CONVERT(a using sjis)) as b FROM t1 HAVING b<>'3F' ORDER BY BINARY a;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
......@@ -160,9 +160,23 @@ SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test sjis->Unicode conversion
--echo #
SELECT HEX(a), HEX(CONVERT(a USING utf8)) as b FROM t1
WHERE a<>'' HAVING b<>'3F' ORDER BY code;
DROP TABLE t1;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test Unicode->sjis conversion
--echo #
--source include/ctype_utf8_table.inc
SELECT HEX(a), HEX(CONVERT(a using sjis)) as b FROM t1 HAVING b<>'3F' ORDER BY BINARY a;
DROP TABLE t1;
--echo #
......
......@@ -1291,9 +1291,26 @@ SELECT * FROM t1
WHERE HEX(CAST(UPPER(a) AS CHAR CHARACTER SET utf8)) <>
HEX(UPPER(CAST(a AS CHAR CHARACTER SET utf8))) ORDER BY code;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test sjis->Unicode conversion
--echo #
SELECT HEX(a), HEX(CONVERT(a USING utf8)) as b FROM t1
WHERE a<>'' HAVING b<>'3F' ORDER BY code;
DROP TABLE t1;
--echo #
--echo # WL#3090 Japanese Character Set adjustments
--echo # Test Unicode->sjis conversion
--echo #
--source include/ctype_utf8_table.inc
SELECT HEX(a), HEX(CONVERT(a using sjis)) as b FROM t1 HAVING b<>'3F' ORDER BY BINARY a;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment