Commit 8286bcd7 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion

parent 391fddf6
...@@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed; ...@@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed;
DROP TABLE allbytes; DROP TABLE allbytes;
# End of ctype_backslash.inc # End of ctype_backslash.inc
# #
# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
#
SET NAMES utf8, character_set_connection=latin1;
SELECT '';
?
?
SELECT HEX('');
HEX('')
3F
SELECT HEX(CAST('' AS CHAR CHARACTER SET utf8));
HEX(CAST('' AS CHAR CHARACTER SET utf8))
3F
SELECT HEX(CAST('' AS CHAR CHARACTER SET latin1));
HEX(CAST('' AS CHAR CHARACTER SET latin1))
3F
SELECT HEX(CONVERT('' USING utf8));
HEX(CONVERT('' USING utf8))
3F
SELECT HEX(CONVERT('' USING latin1));
HEX(CONVERT('' USING latin1))
3F
SELECT 'x';
?x
?x
SELECT HEX('x');
HEX('x')
3F78
SELECT HEX(CAST('x' AS CHAR CHARACTER SET utf8));
HEX(CAST('x' AS CHAR CHARACTER SET utf8))
3F78
SELECT HEX(CAST('x' AS CHAR CHARACTER SET latin1));
HEX(CAST('x' AS CHAR CHARACTER SET latin1))
3F78
SELECT HEX(CONVERT('x' USING utf8));
HEX(CONVERT('x' USING utf8))
3F78
SELECT HEX(CONVERT('x' USING latin1));
HEX(CONVERT('x' USING latin1))
3F78
SET NAMES utf8;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (''),('#');
Warnings:
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
SHOW WARNINGS;
Level Code Message
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
SELECT HEX(a),a FROM t1;
HEX(a) a
3F ?
3F23 ?#
DROP TABLE t1;
#
# End of 10.0 tests # End of 10.0 tests
# #
...@@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8; ...@@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8;
INSERT INTO t1 values('è'); INSERT INTO t1 values('è');
SELECT hex(a),CHAR_LENGTH(a) FROM t1; SELECT hex(a),CHAR_LENGTH(a) FROM t1;
hex(a) CHAR_LENGTH(a) hex(a) CHAR_LENGTH(a)
03 1 033F 2
DELETE FROM t1; DELETE FROM t1;
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
SET @@global.character_set_client = @global_character_set_client; SET @@global.character_set_client = @global_character_set_client;
......
...@@ -210,6 +210,29 @@ set names latin1; ...@@ -210,6 +210,29 @@ set names latin1;
let $ctype_unescape_combinations=selected; let $ctype_unescape_combinations=selected;
--source include/ctype_unescape.inc --source include/ctype_unescape.inc
--echo #
--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
--echo #
SET NAMES utf8, character_set_connection=latin1;
SELECT '';
SELECT HEX('');
SELECT HEX(CAST('' AS CHAR CHARACTER SET utf8));
SELECT HEX(CAST('' AS CHAR CHARACTER SET latin1));
SELECT HEX(CONVERT('' USING utf8));
SELECT HEX(CONVERT('' USING latin1));
SELECT 'x';
SELECT HEX('x');
SELECT HEX(CAST('x' AS CHAR CHARACTER SET utf8));
SELECT HEX(CAST('x' AS CHAR CHARACTER SET latin1));
SELECT HEX(CONVERT('x' USING utf8));
SELECT HEX(CONVERT('x' USING latin1));
SET NAMES utf8;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (''),('#');
SHOW WARNINGS;
SELECT HEX(a),a FROM t1;
DROP TABLE t1;
--echo # --echo #
--echo # End of 10.0 tests --echo # End of 10.0 tests
--echo # --echo #
...@@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, ...@@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
wc= '?'; wc= '?';
} }
else else
break; // Not enough characters {
if ((uchar *) from >= from_end)
break; // End of line
// Incomplete byte sequence
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
outp: outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres; to+= cnvres;
......
...@@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length, ...@@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length,
wc= '?'; wc= '?';
} }
else else
break; // Not enough characters {
if ((uchar *) from >= from_end)
break; /* End of line */
/* Incomplete byte sequence */
error_count++;
from++;
wc= '?';
}
outp: outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment