From 0a91f285ad4a8d017a508e32a1c5d4deb7e0203a Mon Sep 17 00:00:00 2001 From: unknown <gshchepa/uchum@gleb.loc> Date: Tue, 20 Nov 2007 20:15:20 +0400 Subject: [PATCH] Fixed bug #32533. 8bit escape characters, termination and enclosed characters were silently ignored by SELECT INTO query, but LOAD DATA INFILE algorithm is 8bit-clean, so data was corrupted during encoding. sql/sql_class.cc: Fixed bug #32533. SELECT INTO OUTFILE encoding was not 8bit clear, it has been fixed for a symmetry with the LOAD DATA INFILE decoding algorithm. mysql-test/t/outfile_loaddata.test: Added test case for bug #32533. mysql-test/r/outfile_loaddata.result: Added test case for bug #32533. --- mysql-test/r/outfile_loaddata.result | 18 ++++++++++++++++++ mysql-test/t/outfile_loaddata.test | 24 ++++++++++++++++++++++++ sql/sql_class.cc | 17 ++++++++++------- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/mysql-test/r/outfile_loaddata.result b/mysql-test/r/outfile_loaddata.result index 1bcaf308b7..4a9bdcf412 100644 --- a/mysql-test/r/outfile_loaddata.result +++ b/mysql-test/r/outfile_loaddata.result @@ -82,4 +82,22 @@ c1 c2 -r- =raker= DROP TABLE t2; DROP TABLE t1; +# +# Bug#32533: SELECT INTO OUTFILE never escapes multibyte character +# +CREATE TABLE t1 (c1 VARCHAR(256)); +INSERT INTO t1 VALUES (0xC3); +SELECT HEX(c1) FROM t1; +HEX(c1) +C3 +SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug32533.txt' FIELDS ENCLOSED BY 0xC3 FROM t1; +TRUNCATE t1; +SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug32533.txt')); +HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug32533.txt')) +C35CC3C30A +LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug32533.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3; +SELECT HEX(c1) FROM t1; +HEX(c1) +C3 +DROP TABLE t1; # End of 5.0 tests. diff --git a/mysql-test/t/outfile_loaddata.test b/mysql-test/t/outfile_loaddata.test index 2f6ac998b3..2a120871e7 100644 --- a/mysql-test/t/outfile_loaddata.test +++ b/mysql-test/t/outfile_loaddata.test @@ -86,4 +86,28 @@ DROP TABLE t2; DROP TABLE t1; +--echo # +--echo # Bug#32533: SELECT INTO OUTFILE never escapes multibyte character +--echo # + +CREATE TABLE t1 (c1 VARCHAR(256)); +INSERT INTO t1 VALUES (0xC3); +SELECT HEX(c1) FROM t1; + +--let $file=$MYSQLTEST_VARDIR/tmp/bug32533.txt + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--eval SELECT * INTO OUTFILE '$file' FIELDS ENCLOSED BY 0xC3 FROM t1 +TRUNCATE t1; + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--eval SELECT HEX(LOAD_FILE('$file')) + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--eval LOAD DATA INFILE '$file' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3 +SELECT HEX(c1) FROM t1; + +--remove_file $file +DROP TABLE t1; + --echo # End of 5.0 tests. diff --git a/sql/sql_class.cc b/sql/sql_class.cc index ef199b6f88..93f5a34d5c 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1219,16 +1219,18 @@ select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u) } } field_term_length=exchange->field_term->length(); - field_term_char= field_term_length ? (*exchange->field_term)[0] : INT_MAX; + field_term_char= field_term_length ? + (int) (uchar) (*exchange->field_term)[0] : INT_MAX; if (!exchange->line_term->length()) exchange->line_term=exchange->field_term; // Use this if it exists - field_sep_char= (exchange->enclosed->length() ? (*exchange->enclosed)[0] : - field_term_char); - escape_char= (exchange->escaped->length() ? (*exchange->escaped)[0] : -1); + field_sep_char= (exchange->enclosed->length() ? + (int) (uchar) (*exchange->enclosed)[0] : field_term_char); + escape_char= (exchange->escaped->length() ? + (int) (uchar) (*exchange->escaped)[0] : -1); is_ambiguous_field_sep= test(strchr(ESCAPE_CHARS, field_sep_char)); is_unsafe_field_sep= test(strchr(NUMERIC_CHARS, field_sep_char)); line_sep_char= (exchange->line_term->length() ? - (*exchange->line_term)[0] : INT_MAX); + (int) (uchar) (*exchange->line_term)[0] : INT_MAX); if (!field_term_length) exchange->opt_enclosed=0; if (!exchange->enclosed->length()) @@ -1385,10 +1387,11 @@ bool select_export::send_data(List<Item> &items) Don't escape field_term_char by doubling - doubling is only valid for ENCLOSED BY characters: */ - (enclosed || !is_ambiguous_field_term || *pos != field_term_char)) + (enclosed || !is_ambiguous_field_term || + (int) (uchar) *pos != field_term_char)) { char tmp_buff[2]; - tmp_buff[0]= ((int) *pos == field_sep_char && + tmp_buff[0]= ((int) (uchar) *pos == field_sep_char && is_ambiguous_field_sep) ? field_sep_char : escape_char; tmp_buff[1]= *pos ? *pos : '0'; -- 2.30.9