Commit b4766fc3 authored by Georgi Kodinov's avatar Georgi Kodinov

Bug #51876: crash/memory underrun when loading data with ucs2

and reverse() function
      
3 problems fixed : 
1. The reported problem : caused by incorrect parsing of 
the file as ucs data resulting in wrong length of the parsed
string. Fixed by truncating the invalid trailing bytes 
(non-complete multibyte characters) when reading from the file
2. LOAD DATA when reading from a proper UCS2 file wasn't 
recognizing the new line characters. Fixed by first looking 
if a byte is a new line (or any other special) character before
reading it as a part of a multibyte character.
3. When using user variables to hold the column data in LOAD
DATA the character set of the user variable was set incorrectly
to the database charset. Fixed by setting it to the charset
specified by LOAD DATA (if any). 
parent 16a73088
...@@ -503,4 +503,33 @@ DROP TABLE t1; ...@@ -503,4 +503,33 @@ DROP TABLE t1;
CREATE TABLE t1 (id INT NOT NULL); CREATE TABLE t1 (id INT NOT NULL);
LOAD DATA LOCAL INFILE 'tb.txt' INTO TABLE t1; LOAD DATA LOCAL INFILE 'tb.txt' INTO TABLE t1;
DROP TABLE t1; DROP TABLE t1;
#
# Bug #51876 : crash/memory underrun when loading data with ucs2
# and reverse() function
#
# Problem # 1 (original report): wrong parsing of ucs2 data
SELECT '00' UNION SELECT '10' INTO OUTFILE 'tmpp.txt';
CREATE TABLE t1(a INT);
LOAD DATA INFILE 'tmpp.txt' INTO TABLE t1 CHARACTER SET ucs2
(@b) SET a=REVERSE(@b);
Warnings:
Warning 1366 Incorrect integer value: '00' for column 'a' at row 1
Warning 1366 Incorrect integer value: '10' for column 'a' at row 2
# should return 2 zeroes (as the value is truncated)
SELECT * FROM t1;
a
0
0
DROP TABLE t1;
# Problem # 2 : if you write and read ucs2 data to a file they're lost
SELECT '00' UNION SELECT '10' INTO OUTFILE 'tmpp2.txt' CHARACTER SET ucs2;
CREATE TABLE t1(a INT);
LOAD DATA INFILE 'tmpp2.txt' INTO TABLE t1 CHARACTER SET ucs2
(@b) SET a=REVERSE(@b);
# should return 0 and 1 (10 reversed)
SELECT * FROM t1;
a
0
1
DROP TABLE t1;
End of 5.1 tests End of 5.1 tests
...@@ -580,4 +580,36 @@ DROP TABLE t1; ...@@ -580,4 +580,36 @@ DROP TABLE t1;
connection default; connection default;
disconnect con1; disconnect con1;
--echo #
--echo # Bug #51876 : crash/memory underrun when loading data with ucs2
--echo # and reverse() function
--echo #
--echo # Problem # 1 (original report): wrong parsing of ucs2 data
SELECT '00' UNION SELECT '10' INTO OUTFILE 'tmpp.txt';
CREATE TABLE t1(a INT);
LOAD DATA INFILE 'tmpp.txt' INTO TABLE t1 CHARACTER SET ucs2
(@b) SET a=REVERSE(@b);
--echo # should return 2 zeroes (as the value is truncated)
SELECT * FROM t1;
DROP TABLE t1;
let $MYSQLD_DATADIR= `select @@datadir`;
remove_file $MYSQLD_DATADIR/test/tmpp.txt;
--echo # Problem # 2 : if you write and read ucs2 data to a file they're lost
SELECT '00' UNION SELECT '10' INTO OUTFILE 'tmpp2.txt' CHARACTER SET ucs2;
CREATE TABLE t1(a INT);
LOAD DATA INFILE 'tmpp2.txt' INTO TABLE t1 CHARACTER SET ucs2
(@b) SET a=REVERSE(@b);
--echo # should return 0 and 1 (10 reversed)
SELECT * FROM t1;
DROP TABLE t1;
let $MYSQLD_DATADIR= `select @@datadir`;
remove_file $MYSQLD_DATADIR/test/tmpp2.txt;
--echo End of 5.1 tests --echo End of 5.1 tests
...@@ -4715,6 +4715,7 @@ bool Item_func_get_user_var::set_value(THD *thd, ...@@ -4715,6 +4715,7 @@ bool Item_func_get_user_var::set_value(THD *thd,
bool Item_user_var_as_out_param::fix_fields(THD *thd, Item **ref) bool Item_user_var_as_out_param::fix_fields(THD *thd, Item **ref)
{ {
DBUG_ASSERT(fixed == 0); DBUG_ASSERT(fixed == 0);
DBUG_ASSERT(thd->lex->exchange);
if (Item::fix_fields(thd, ref) || if (Item::fix_fields(thd, ref) ||
!(entry= get_variable(&thd->user_vars, name, 1))) !(entry= get_variable(&thd->user_vars, name, 1)))
return TRUE; return TRUE;
...@@ -4724,7 +4725,9 @@ bool Item_user_var_as_out_param::fix_fields(THD *thd, Item **ref) ...@@ -4724,7 +4725,9 @@ bool Item_user_var_as_out_param::fix_fields(THD *thd, Item **ref)
of fields in LOAD DATA INFILE. of fields in LOAD DATA INFILE.
(Since Item_user_var_as_out_param is used only there). (Since Item_user_var_as_out_param is used only there).
*/ */
entry->collation.set(thd->variables.collation_database); entry->collation.set(thd->lex->exchange->cs ?
thd->lex->exchange->cs :
thd->variables.collation_database);
entry->update_query_id= thd->query_id; entry->update_query_id= thd->query_id;
return FALSE; return FALSE;
} }
......
...@@ -1208,29 +1208,6 @@ int READ_INFO::read_field() ...@@ -1208,29 +1208,6 @@ int READ_INFO::read_field()
while ( to < end_of_buff) while ( to < end_of_buff)
{ {
chr = GET; chr = GET;
#ifdef USE_MB
if ((my_mbcharlen(read_charset, chr) > 1) &&
to+my_mbcharlen(read_charset, chr) <= end_of_buff)
{
uchar* p = (uchar*)to;
*to++ = chr;
int ml = my_mbcharlen(read_charset, chr);
int i;
for (i=1; i<ml; i++) {
chr = GET;
if (chr == my_b_EOF)
goto found_eof;
*to++ = chr;
}
if (my_ismbchar(read_charset,
(const char *)p,
(const char *)to))
continue;
for (i=0; i<ml; i++)
PUSH((uchar) *--to);
chr = GET;
}
#endif
if (chr == my_b_EOF) if (chr == my_b_EOF)
goto found_eof; goto found_eof;
if (chr == escape_char) if (chr == escape_char)
...@@ -1314,6 +1291,39 @@ int READ_INFO::read_field() ...@@ -1314,6 +1291,39 @@ int READ_INFO::read_field()
return 0; return 0;
} }
} }
#ifdef USE_MB
if (my_mbcharlen(read_charset, chr) > 1 &&
to + my_mbcharlen(read_charset, chr) <= end_of_buff)
{
uchar* p= (uchar*) to;
int ml, i;
*to++ = chr;
ml= my_mbcharlen(read_charset, chr);
for (i= 1; i < ml; i++)
{
chr= GET;
if (chr == my_b_EOF)
{
/*
Need to back up the bytes already ready from illformed
multi-byte char
*/
to-= i;
goto found_eof;
}
*to++ = chr;
}
if (my_ismbchar(read_charset,
(const char *)p,
(const char *)to))
continue;
for (i= 0; i < ml; i++)
PUSH((uchar) *--to);
chr= GET;
}
#endif
*to++ = (uchar) chr; *to++ = (uchar) chr;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment