Commit b99f9ab7 authored by bar@mysql.com's avatar bar@mysql.com

Bug#15377 Valid multibyte sequences are truncated on INSERT

ctype-euc_kr.c:
ctype-gb2312.c:
  Adding specific well_formed_length functions
  for gb2312 and euckr, to allow storing characters
  which are correct according to the character set
  specifications but just don't have Unicode mapping.
  Previously only those which have Unicode mapping
  could be stored, while unassigned characters lead
  to data truncation.
Many files:
  new file
parent dd7d2d0a
-- require r/have_euckr.require
disable_query_log;
show collation like "euckr_korean_ci";
enable_query_log;
-- require r/have_gb2312.require
disable_query_log;
show collation like "gb2312_chinese_ci";
enable_query_log;
drop table if exists t1;
SET @test_character_set= 'euckr';
SET @test_collation= 'euckr_korean_ci';
SET @safe_character_set_server= @@character_set_server;
SET @safe_collation_server= @@collation_server;
SET character_set_server= @test_character_set;
SET collation_server= @test_collation;
CREATE DATABASE d1;
USE d1;
CREATE TABLE t1 (c CHAR(10), KEY(c));
SHOW FULL COLUMNS FROM t1;
Field Type Collation Null Key Default Extra Privileges Comment
c char(10) euckr_korean_ci YES MUL NULL
INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa');
SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%';
want3results
aaa
aaaa
aaaaa
DROP TABLE t1;
CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2)));
SHOW FULL COLUMNS FROM t1;
Field Type Collation Null Key Default Extra Privileges Comment
c1 varchar(15) euckr_korean_ci YES MUL NULL
INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab');
SELECT c1 as want3results from t1 where c1 like 'l%';
want3results
location
loberge
lotre
SELECT c1 as want3results from t1 where c1 like 'lo%';
want3results
location
loberge
lotre
SELECT c1 as want1result from t1 where c1 like 'loc%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'loca%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locat%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locati%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locatio%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'location%';
want1result
location
DROP TABLE t1;
DROP DATABASE d1;
USE test;
SET character_set_server= @safe_character_set_server;
SET collation_server= @safe_collation_server;
SET NAMES euckr;
SET collation_connection='euckr_korean_ci';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
euckr_korean_ci 6109
euckr_korean_ci 61
euckr_korean_ci 6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
euckr_korean_ci
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
select @@collation_connection;
@@collation_connection
euckr_korean_ci
create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
insert into t1 values('abcdef');
insert into t1 values('_bcdef');
insert into t1 values('a_cdef');
insert into t1 values('ab_def');
insert into t1 values('abc_ef');
insert into t1 values('abcd_f');
insert into t1 values('abcde_');
select c1 as c1u from t1 where c1 like 'ab\_def';
c1u
ab_def
select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h
ab_def
drop table t1;
SET collation_connection='euckr_bin';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
euckr_bin 6109
euckr_bin 61
euckr_bin 6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
euckr_bin
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
select @@collation_connection;
@@collation_connection
euckr_bin
create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
insert into t1 values('abcdef');
insert into t1 values('_bcdef');
insert into t1 values('a_cdef');
insert into t1 values('ab_def');
insert into t1 values('abc_ef');
insert into t1 values('abcd_f');
insert into t1 values('abcde_');
select c1 as c1u from t1 where c1 like 'ab\_def';
c1u
ab_def
select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h
ab_def
drop table t1;
SET NAMES euckr;
CREATE TABLE t1 (a text) character set euckr;
INSERT INTO t1 VALUES (0xA2E6),(0xFEF7);
SELECT hex(a) FROM t1 ORDER BY a;
hex(a)
A2E6
FEF7
DROP TABLE t1;
drop table if exists t1;
SET @test_character_set= 'gb2312';
SET @test_collation= 'gb2312_chinese_ci';
SET @safe_character_set_server= @@character_set_server;
SET @safe_collation_server= @@collation_server;
SET character_set_server= @test_character_set;
SET collation_server= @test_collation;
CREATE DATABASE d1;
USE d1;
CREATE TABLE t1 (c CHAR(10), KEY(c));
SHOW FULL COLUMNS FROM t1;
Field Type Collation Null Key Default Extra Privileges Comment
c char(10) gb2312_chinese_ci YES MUL NULL
INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa');
SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%';
want3results
aaa
aaaa
aaaaa
DROP TABLE t1;
CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2)));
SHOW FULL COLUMNS FROM t1;
Field Type Collation Null Key Default Extra Privileges Comment
c1 varchar(15) gb2312_chinese_ci YES MUL NULL
INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab');
SELECT c1 as want3results from t1 where c1 like 'l%';
want3results
location
loberge
lotre
SELECT c1 as want3results from t1 where c1 like 'lo%';
want3results
location
loberge
lotre
SELECT c1 as want1result from t1 where c1 like 'loc%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'loca%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locat%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locati%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locatio%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'location%';
want1result
location
DROP TABLE t1;
DROP DATABASE d1;
USE test;
SET character_set_server= @safe_character_set_server;
SET collation_server= @safe_collation_server;
SET NAMES gb2312;
SET collation_connection='gb2312_chinese_ci';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
gb2312_chinese_ci 6109
gb2312_chinese_ci 61
gb2312_chinese_ci 6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
gb2312_chinese_ci
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
select @@collation_connection;
@@collation_connection
gb2312_chinese_ci
create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
insert into t1 values('abcdef');
insert into t1 values('_bcdef');
insert into t1 values('a_cdef');
insert into t1 values('ab_def');
insert into t1 values('abc_ef');
insert into t1 values('abcd_f');
insert into t1 values('abcde_');
select c1 as c1u from t1 where c1 like 'ab\_def';
c1u
ab_def
select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h
ab_def
drop table t1;
SET collation_connection='gb2312_bin';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
gb2312_bin 6109
gb2312_bin 61
gb2312_bin 6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
gb2312_bin
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
select @@collation_connection;
@@collation_connection
gb2312_bin
create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
insert into t1 values('abcdef');
insert into t1 values('_bcdef');
insert into t1 values('a_cdef');
insert into t1 values('ab_def');
insert into t1 values('abc_ef');
insert into t1 values('abcd_f');
insert into t1 values('abcde_');
select c1 as c1u from t1 where c1 like 'ab\_def';
c1u
ab_def
select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h
ab_def
drop table t1;
SET NAMES gb2312;
CREATE TABLE t1 (a text) character set gb2312;
INSERT INTO t1 VALUES (0xA2A1),(0xD7FE);
SELECT hex(a) FROM t1 ORDER BY a;
hex(a)
A2A1
D7FE
DROP TABLE t1;
Collation Charset Id Default Compiled Sortlen
euckr_korean_ci euckr 19 Yes Yes 1
Collation Charset Id Default Compiled Sortlen
gb2312_chinese_ci gb2312 24 Yes Yes 1
-- source include/have_euckr.inc
#
# Tests with the euckr character set
#
--disable_warnings
drop table if exists t1;
--enable_warnings
SET @test_character_set= 'euckr';
SET @test_collation= 'euckr_korean_ci';
-- source include/ctype_common.inc
SET NAMES euckr;
SET collation_connection='euckr_korean_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
-- source include/ctype_like_escape.inc
SET collation_connection='euckr_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
-- source include/ctype_like_escape.inc
#
# Bug#15377 Valid multibyte sequences are truncated on INSERT
#
SET NAMES euckr;
CREATE TABLE t1 (a text) character set euckr;
INSERT INTO t1 VALUES (0xA2E6),(0xFEF7);
SELECT hex(a) FROM t1 ORDER BY a;
DROP TABLE t1;
# End of 4.1 tests
-- source include/have_gb2312.inc
#
# Tests with the gb2312 character set
#
--disable_warnings
drop table if exists t1;
--enable_warnings
SET @test_character_set= 'gb2312';
SET @test_collation= 'gb2312_chinese_ci';
-- source include/ctype_common.inc
SET NAMES gb2312;
SET collation_connection='gb2312_chinese_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
-- source include/ctype_like_escape.inc
SET collation_connection='gb2312_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
-- source include/ctype_like_escape.inc
#
# Bug#15377 Valid multibyte sequences are truncated on INSERT
#
SET NAMES gb2312;
CREATE TABLE t1 (a text) character set gb2312;
INSERT INTO t1 VALUES (0xA2A1),(0xD7FE);
SELECT hex(a) FROM t1 ORDER BY a;
DROP TABLE t1;
# End of 4.1 tests
...@@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), ...@@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
} }
/*
Returns well formed length of a EUC-KR string.
*/
static uint
my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
uint pos, int *error)
{
const char *b0= b;
const char *emb= e - 1; /* Last possible end of an MB character */
*error= 0;
while (pos-- && b < e)
{
if ((uchar) b[0] < 128)
{
/* Single byte ascii character */
b++;
}
else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
{
/* Double byte character */
b+= 2;
}
else
{
/* Wrong byte sequence */
*error= 1;
break;
}
}
return (uint) (b - b0);
}
static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_COLLATION_HANDLER my_collation_ci_handler =
{ {
NULL, /* init */ NULL, /* init */
...@@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler= ...@@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_euc_kr, mbcharlen_euc_kr,
my_numchars_mb, my_numchars_mb,
my_charpos_mb, my_charpos_mb,
my_well_formed_len_mb, my_well_formed_len_euckr,
my_lengthsp_8bit, my_lengthsp_8bit,
my_numcells_8bit, my_numcells_8bit,
my_mb_wc_euc_kr, /* mb_wc */ my_mb_wc_euc_kr, /* mb_wc */
......
...@@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), ...@@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
} }
/*
Returns well formed length of a EUC-KR string.
*/
static uint
my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
uint pos, int *error)
{
const char *b0= b;
const char *emb= e - 1; /* Last possible end of an MB character */
*error= 0;
while (pos-- && b < e)
{
if ((uchar) b[0] < 128)
{
/* Single byte ascii character */
b++;
}
else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
{
/* Double byte character */
b+= 2;
}
else
{
/* Wrong byte sequence */
*error= 1;
break;
}
}
return (uint) (b - b0);
}
static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_COLLATION_HANDLER my_collation_ci_handler =
{ {
NULL, /* init */ NULL, /* init */
...@@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler= ...@@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_gb2312, mbcharlen_gb2312,
my_numchars_mb, my_numchars_mb,
my_charpos_mb, my_charpos_mb,
my_well_formed_len_mb, my_well_formed_len_gb2312,
my_lengthsp_8bit, my_lengthsp_8bit,
my_numcells_8bit, my_numcells_8bit,
my_mb_wc_gb2312, /* mb_wc */ my_mb_wc_gb2312, /* mb_wc */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment