Commit d516a2ae authored by Alexander Barkov's avatar Alexander Barkov

MDEV-9823 LOAD DATA INFILE silently truncates incomplete byte sequences

parent bddd63cf
......@@ -33913,3 +33913,24 @@ DROP TABLE t1;
#
# End of 10.1 tests
#
#
# End of 10.2 tests
#
#
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
#
CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
SELECT HEX(a) FROM t1;
HEX(a)
3F
78787831
3F3F
78787832
8FA1A1
78787833
3F3F
DROP TABLE t1;
#
# End of 10.2 tests
#
......@@ -26218,3 +26218,24 @@ DROP TABLE t1;
#
# End of 10.1 tests
#
#
# End of 10.2 tests
#
#
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
#
CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
SELECT HEX(a) FROM t1;
HEX(a)
3F
78787831
3F3F
78787832
8FA1A1
78787833
3F3F
DROP TABLE t1;
#
# End of 10.2 tests
#
......@@ -10426,5 +10426,27 @@ b
c
DROP TABLE t1;
#
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
#
CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
Warnings:
Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x8E' for column 'a' at row 7
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
SELECT HEX(a) FROM t1;
HEX(a)
3F
78787831
3F3F
78787832
3F3F3F
78787833
3F3F3F3F
3F3F3F
DROP TABLE t1;
#
# End of 10.2 tests
#
......@@ -3398,3 +3398,30 @@ DROP FUNCTION f1;
#
# End of 10.1 tests
#
#
# End of 10.2 tests
#
#
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
#
CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
Warnings:
Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
SELECT HEX(a) FROM t1;
HEX(a)
3F
78787831
3F3F
78787832
3F3F3F
78787833
F09F988E
3F3F3F
DROP TABLE t1;
#
# End of 10.2 tests
#
# This file has incomplete UJIS sequences {8F}, {8FA1},
# has a valid UJIS sequence {8FA1A1},
# and has no NL at the end:
# {8F} \n xxx1 {8FA1} \n xxx2 {8FA1A1} \n xxx3 \n {8FA1} EOF
xxx1
¡
xxx2
¡¡
xxx3
¡
\ No newline at end of file
# This file has incomplete utf8mb4 sequences {D0}, {E180}, {F09F98},
# has a valid utf8mb4 sequence {F09F988E}
# and has no NL at the end:
# {D0} \n xxx1 {E180} xxx2 \n {F09F98} \n xxx3 {F09F988E} {F09F98} EOF
xxx1
xxx2
xxx3
😎
\ No newline at end of file
......@@ -566,3 +566,19 @@ DROP TABLE t1;
--echo #
--echo # End of 10.1 tests
--echo #
--echo #
--echo # End of 10.2 tests
--echo #
--echo #
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
--echo #
CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.2 tests
--echo #
......@@ -1396,3 +1396,20 @@ SELECT HEX(a) FROM t1 ORDER BY a;DROP TABLE t1;
--echo #
--echo # End of 10.1 tests
--echo #
--echo #
--echo # End of 10.2 tests
--echo #
--echo #
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
--echo #
CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.2 tests
--echo #
......@@ -1966,6 +1966,14 @@ LOAD DATA INFILE '../../std_data/loaddata/mdev9824.txt' INTO TABLE t1 CHARACTER
SELECT c1 FROM t1;
DROP TABLE t1;
--echo #
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
--echo #
CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.2 tests
--echo #
......@@ -1919,3 +1919,20 @@ DROP FUNCTION f1;
--echo #
--echo # End of 10.1 tests
--echo #
--echo #
--echo # End of 10.2 tests
--echo #
--echo #
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
--echo #
CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.2 tests
--echo #
......@@ -1589,38 +1589,34 @@ int READ_INFO::read_field()
return 0;
}
}
#ifdef USE_MB
if (my_mbcharlen(read_charset, chr) > 1)
data.append(chr);
if (use_mb(read_charset))
{
uint32 length0= data.length();
int ml= my_mbcharlen(read_charset, chr);
data.append(chr);
for (int i= 1; i < ml; i++)
int chlen;
if ((chlen= my_charlen(read_charset, data.end() - 1,
data.end())) != 1)
{
chr= GET;
if (chr == my_b_EOF)
for (uint32 length0= data.length() - 1 ; MY_CS_IS_TOOSMALL(chlen); )
{
/*
Need to back up the bytes already ready from illformed
multi-byte char
*/
data.length(length0);
goto found_eof;
chr= GET;
if (chr == my_b_EOF)
goto found_eof;
data.append(chr);
chlen= my_charlen(read_charset, data.ptr() + length0, data.end());
if (chlen == MY_CS_ILSEQ)
{
/**
It has been an incomplete (but a valid) sequence so far,
but the last byte turned it into a bad byte sequence.
Unget the very last byte.
*/
data.length(data.length() - 1);
PUSH(chr);
break;
}
}
data.append(chr);
}
if (my_ismbchar(read_charset,
(const char *) data.ptr() + length0,
(const char *) data.end()))
continue;
for (int i= 0; i < ml; i++)
PUSH(data.end()[-1 - i]);
data.length(length0);
chr= GET;
}
#endif
data.append(chr);
}
/*
** We come here if buffer is too small. Enlarge it and continue
......
......@@ -199,6 +199,7 @@ static const uchar sort_order_eucjpms[]=
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
#define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
......@@ -75,7 +75,13 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
#ifdef IS_MB3_CHAR
if (b + 3 > e)
{
#ifdef IS_MB_PREFIX2
if (!IS_MB_PREFIX2(b[0], b[1]))
return MY_CS_ILSEQ;
#endif
return MY_CS_TOOSMALLN(3);
}
if (IS_MB3_CHAR(b[0], b[1], b[2]))
return 3; /* Three-byte character */
#endif
......
......@@ -198,6 +198,7 @@ static const uchar sort_order_ujis[]=
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
#define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment