Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
63120090
Commit
63120090
authored
Jun 21, 2016
by
Alexander Barkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MDEV-10262 ucs2_thai_520_w2: wrong implicit weights on the secondary level
parent
61492ea5
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
150 additions
and
21 deletions
+150
-21
include/m_ctype.h
include/m_ctype.h
+1
-0
mysql-test/include/ctype_uca_w2.inc
mysql-test/include/ctype_uca_w2.inc
+7
-0
mysql-test/r/ctype_uca.result
mysql-test/r/ctype_uca.result
+26
-0
mysql-test/r/ctype_utf16_uca.result
mysql-test/r/ctype_utf16_uca.result
+13
-0
mysql-test/r/ctype_utf32_uca.result
mysql-test/r/ctype_utf32_uca.result
+13
-0
mysql-test/r/ctype_utf8mb4_uca.result
mysql-test/r/ctype_utf8mb4_uca.result
+13
-0
strings/ctype-uca.c
strings/ctype-uca.c
+77
-21
No files found.
include/m_ctype.h
View file @
63120090
...
...
@@ -131,6 +131,7 @@ typedef struct my_uca_level_info_st
uchar
*
lengths
;
uint16
**
weights
;
MY_CONTRACTIONS
contractions
;
uint
levelno
;
}
MY_UCA_WEIGHT_LEVEL
;
...
...
mysql-test/include/ctype_uca_w2.inc
View file @
63120090
...
...
@@ -22,6 +22,13 @@ SELECT HEX(a), HEX(WEIGHT_STRING(a AS CHAR(4) LEVEL 2)) FROM t1;
SELECT
HEX
(
a
),
HEX
(
WEIGHT_STRING
(
a
AS
CHAR
(
4
)
LEVEL
3
))
FROM
t1
;
DROP
TABLE
t1
;
CREATE
TABLE
t1
AS
SELECT
SPACE
(
10
)
AS
a
LIMIT
0
;
SHOW
CREATE
TABLE
t1
;
INSERT
INTO
t1
VALUES
(
_ucs2
0x3400
);
INSERT
INTO
t1
VALUES
(
_ucs2
0xF001
);
SELECT
HEX
(
CONVERT
(
a
USING
ucs2
))
AS
ucs2
,
HEX
(
a
),
HEX
(
WEIGHT_STRING
(
a
))
FROM
t1
;
DROP
TABLE
t1
;
CREATE
TABLE
t1
AS
SELECT
SPACE
(
10
)
AS
a
LIMIT
0
;
SHOW
CREATE
TABLE
t1
;
INSERT
INTO
t1
VALUES
(
'A'
),(
'À'
),(
'Á'
),(
'Â'
),(
'Ã'
),(
'Ä'
),(
'Å'
);
...
...
mysql-test/r/ctype_uca.result
View file @
63120090
...
...
@@ -14033,6 +14033,19 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf8 COLLATE utf8_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES (_ucs2 0x3400);
INSERT INTO t1 VALUES (_ucs2 0xF001);
SELECT HEX(CONVERT(a USING ucs2)) AS ucs2, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1;
ucs2 HEX(a) HEX(WEIGHT_STRING(a))
3400 E39080 FB80B4000020
F001 EF8081 FBC1F0010020
DROP TABLE t1;
CREATE TABLE t1 AS SELECT SPACE(10) AS a LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf8 COLLATE utf8_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('A'),('À'),('Á'),('Â'),('Ã'),('Ä'),('Å');
SELECT a, HEX(WEIGHT_STRING(a LEVEL 2)) FROM t1 ORDER BY a;
a HEX(WEIGHT_STRING(a LEVEL 2))
...
...
@@ -14714,6 +14727,19 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES (_ucs2 0x3400);
INSERT INTO t1 VALUES (_ucs2 0xF001);
SELECT HEX(CONVERT(a USING ucs2)) AS ucs2, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1;
ucs2 HEX(a) HEX(WEIGHT_STRING(a))
3400 3400 FB80B4000020
F001 F001 FBC1F0010020
DROP TABLE t1;
CREATE TABLE t1 AS SELECT SPACE(10) AS a LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('A'),('À'),('Á'),('Â'),('Ã'),('Ä'),('Å');
SELECT a, HEX(WEIGHT_STRING(a LEVEL 2)) FROM t1 ORDER BY a;
a HEX(WEIGHT_STRING(a LEVEL 2))
...
...
mysql-test/r/ctype_utf16_uca.result
View file @
63120090
...
...
@@ -6663,6 +6663,19 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16 COLLATE utf16_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES (_ucs2 0x3400);
INSERT INTO t1 VALUES (_ucs2 0xF001);
SELECT HEX(CONVERT(a USING ucs2)) AS ucs2, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1;
ucs2 HEX(a) HEX(WEIGHT_STRING(a))
3400 3400 FB80B4000020
F001 F001 FBC1F0010020
DROP TABLE t1;
CREATE TABLE t1 AS SELECT SPACE(10) AS a LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16 COLLATE utf16_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('A'),('À'),('Á'),('Â'),('Ã'),('Ä'),('Å');
SELECT a, HEX(WEIGHT_STRING(a LEVEL 2)) FROM t1 ORDER BY a;
a HEX(WEIGHT_STRING(a LEVEL 2))
...
...
mysql-test/r/ctype_utf32_uca.result
View file @
63120090
...
...
@@ -6683,6 +6683,19 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf32 COLLATE utf32_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES (_ucs2 0x3400);
INSERT INTO t1 VALUES (_ucs2 0xF001);
SELECT HEX(CONVERT(a USING ucs2)) AS ucs2, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1;
ucs2 HEX(a) HEX(WEIGHT_STRING(a))
3400 00003400 FB80B4000020
F001 0000F001 FBC1F0010020
DROP TABLE t1;
CREATE TABLE t1 AS SELECT SPACE(10) AS a LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf32 COLLATE utf32_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('A'),('À'),('Á'),('Â'),('Ã'),('Ä'),('Å');
SELECT a, HEX(WEIGHT_STRING(a LEVEL 2)) FROM t1 ORDER BY a;
a HEX(WEIGHT_STRING(a LEVEL 2))
...
...
mysql-test/r/ctype_utf8mb4_uca.result
View file @
63120090
...
...
@@ -5373,6 +5373,19 @@ Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES (_ucs2 0x3400);
INSERT INTO t1 VALUES (_ucs2 0xF001);
SELECT HEX(CONVERT(a USING ucs2)) AS ucs2, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1;
ucs2 HEX(a) HEX(WEIGHT_STRING(a))
3400 E39080 FB80B4000020
F001 EF8081 FBC1F0010020
DROP TABLE t1;
CREATE TABLE t1 AS SELECT SPACE(10) AS a LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_thai_520_w2 NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('A'),('À'),('Á'),('Â'),('Ã'),('Ä'),('Å');
SELECT a, HEX(WEIGHT_STRING(a LEVEL 2)) FROM t1 ORDER BY a;
a HEX(WEIGHT_STRING(a LEVEL 2))
...
...
strings/ctype-uca.c
View file @
63120090
...
...
@@ -6539,7 +6539,8 @@ MY_UCA_INFO my_uca_v400=
0, /* nitems */
NULL, /* item */
NULL /* flags */
}
},
0 /* levelno */
},
},
...
...
@@ -30084,7 +30085,8 @@ MY_UCA_INFO my_uca_v520_th=
THAI_CONTRACTIONS, /* nitems */
thai_contractions, /* item */
NULL /* flags */
}
},
0 /* levelno */
},
{
0x10FFFF, /* maxchar */
...
...
@@ -30094,7 +30096,8 @@ MY_UCA_INFO my_uca_v520_th=
THAI_CONTRACTIONS_W2, /* nitems */
thai_contractions_w2, /* item */
NULL /* flags */
}
},
1 /* levelno */
},
},
...
...
@@ -30127,8 +30130,9 @@ MY_UCA_INFO my_uca_v520=
{ /* Contractions: */
0, /* nitems */
NULL, /* item */
NULL /* flags */
}
NULL /* flags */
},
0 /* levelno */
},
},
...
...
@@ -31529,37 +31533,88 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
/****************************************************************/
/**
Implicit weights for a code CP are constructed as follows:
[.AAAA.0020.0002][.BBBB.0000.0000]
where:
AAAA= BASE + (CP >> 15);
BBBB= (CP & 0x7FFF) | 0x8000;
There are two weights in the primary level (AAAA followed by BBBB).
There is one weight on other levels:
- 0020 on the secondary level
- 0002 on the tertiary level
*/
/**
Return BASE for an implicit weight on the primary level
According to UCA, BASE is calculated as follows:
- FB40 for Unified_Ideograph=True AND
((Block=CJK_Unified_Ideograph) OR
(Block=CJK_Compatibility_Ideographs))
- FB80 for Unified_Ideograph=True AND NOT
((Block=CJK_Unified_Ideograph) OR
(Block=CJK_Compatibility_Ideographs))
- FBC0 for any other code point
TODO: it seems we're not handling BASE correctly:
- check what are those blocks
- there are more Unified Ideograph blocks in the latest Unicode versions
*/
static inline uint16
my_uca_implicit_weight_base(my_wc_t code)
{
if (code >= 0x3400 && code <= 0x4DB5)
return 0xFB80;
if (code >= 0x4E00 && code <= 0x9FA5)
return 0xFB40;
return 0xFBC0;
}
/**
Return
implicit UCA weight
Return
an implicit UCA weight for the primary level.
Used for characters that do not have assigned UCA weights.
@param scanner UCA weight scanner
@return The leading implicit weight.
The second weight is stored in scanner->implicit[0]
and is later returned on the next my_uca_scanner_next_any() call.
*/
static inline int
my_uca_scanner_next_implicit(my_uca_scanner *scanner)
my_uca_scanner_next_implicit
_primary
(my_uca_scanner *scanner)
{
scanner->code
= (scanner->page << 8) + scanner->code;
scanner->implicit[0]= (
scanner->code & 0x7FFF) | 0x8000;
scanner->implicit[1]= 0;
my_wc_t wc
= (scanner->page << 8) + scanner->code;
scanner->implicit[0]= (
wc & 0x7FFF) | 0x8000; /* The second weight */
scanner->implicit[1]= 0;
/* 0 terminator */
scanner->wbeg= scanner->implicit;
scanner->page= scanner->page >> 7;
if (scanner->code >= 0x3400 && scanner->code <= 0x4DB5)
scanner->page+= 0xFB80;
else if (scanner->code >= 0x4E00 && scanner->code <= 0x9FA5)
scanner->page+= 0xFB40;
else
scanner->page+= 0xFBC0;
return scanner->page;
return my_uca_implicit_weight_base(wc) + (wc >> 15);
}
/**
Return an implicit weight for the current level
(according to scanner->level->levelno).
*/
static inline int
my_uca_scanner_next_implicit(my_uca_scanner *scanner)
{
switch (scanner->level->levelno) {
case 0: return my_uca_scanner_next_implicit_primary(scanner);/* Primary level*/
case 1: scanner->wbeg= nochar; return 0x0020; /* Secondary level */
case 2: scanner->wbeg= nochar; return 0x0002; /* Tertiary level */
default: scanner->wbeg= nochar; break;
}
DBUG_ASSERT(0);
return 0;
}
/*
The same two functions for any character set
*/
...
...
@@ -33829,6 +33884,7 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, int level,
size_t i, npages= (src->maxchar + 1) / 256;
dst->maxchar= src->maxchar;
dst->levelno= src->levelno;
if (check_rules(loader, rules, dst, src))
return TRUE;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment