Commit b326b9a3 authored by Alexander Barkov's avatar Alexander Barkov

Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters

  @ mysql-test/r/ctype_latin1.result
  @ mysql-test/r/ctype_utf8.result
  @ mysql-test/t/ctype_latin1.test
  @ mysql-test/t/ctype_utf8.test
  Adding tests

  @ sql/mysqld.h
  @ sql/item.cc
  @ sql/sql_parse.cc
  @ sql/sql_view.cc

  Refactoring (thanks to Guilhem for the idea):

  Item_string::print() was hard to understand because of the different
  QT_ constants: in "query_type==QT_x", QT_x is explicitely included
  but the other two QT_ are implicitely excluded. The combinations
  with '||' and '&&' make this even harder.
  - logic is now more "explicit" by changing QT_ constants to a bitmap of flags:
    QT_ORDINARY: no change,
    QT_IS -> QT_TO_SYSTEM_CHARSET | QT_WITHOUT_INTRODUCERS,
    QT_EXPLAIN -> QT_TO_SYSTEM_CHARSET
    (QT_EXPLAIN was introduced in the first version of the Bug#57341 patch)
  - Item_string::print() is rewritten using those flags

  Bugfix itself:

  When QT_TO_SYSTEM_CHARSET is used alone (with no QT_WITHOUT_INTRODUCERS),
  we print string literals as follows:

  - display introducers if they were in the original query
  - print ASCII characters as is
  - print non-ASCII characters using hex-escape
  Note: as "EXPLAIN" output is only for human readability purposes
  and does not need to be a pasrable SQL, so using hex-escape is Ok.
  ErrConvString class perfectly suites for hex escaping purposes.
parent 7e6d938f
...@@ -3246,5 +3246,20 @@ maketime(`a`,`a`,`a`) ...@@ -3246,5 +3246,20 @@ maketime(`a`,`a`,`a`)
DROP TABLE t1; DROP TABLE t1;
SET sql_mode=default; SET sql_mode=default;
# #
# Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
#
SET NAMES utf8;
EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select 'abcdó' AS `abcdó`,_latin1'abcd\xC3\xB3' AS `abcdó`,_utf8'abcd\xC3\xB3' AS `abcdó`
SET NAMES latin1;
EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select 'abcdó' AS `abcdó`,_latin1'abcd\xC3\xB3' AS `abcdó`,_utf8'abcd\xC3\xB3' AS `abcd`
#
# End of 5.5 tests # End of 5.5 tests
# #
...@@ -4969,5 +4969,20 @@ GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size)); ...@@ -4969,5 +4969,20 @@ GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size));
ERROR 23000: Duplicate entry '107374182410737418241' for key 'group_key' ERROR 23000: Duplicate entry '107374182410737418241' for key 'group_key'
DROP TABLE t1; DROP TABLE t1;
# #
# Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
#
SET NAMES latin1;
EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ' AS u;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `u`
SET NAMES utf8;
EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`
#
# End of 5.5 tests # End of 5.5 tests
# #
...@@ -143,6 +143,21 @@ SELECT '' LIKE '' ESCAPE EXPORT_SET(1, 1, 1, 1, ''); ...@@ -143,6 +143,21 @@ SELECT '' LIKE '' ESCAPE EXPORT_SET(1, 1, 1, 1, '');
--source include/ctype_numconv.inc --source include/ctype_numconv.inc
--echo #
--echo # Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
--echo #
# Test latin1 client erroneously started with --default-character-set=utf8
# EXPLAIN output should still be pretty readable.
# We're using 'ó' (\xC3\xB3) as a magic sequence:
# - it's "LATIN CAPITAL LETTER A WITH TILDE ABOVE + SUPERSCRIPT 3" in latin1
# - it's "LATIN SMALL LETTER O WITH ACUTE ABOVE" in utf8.
SET NAMES utf8;
EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
# Test normal latin1
SET NAMES latin1;
EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
--echo # --echo #
--echo # End of 5.5 tests --echo # End of 5.5 tests
--echo # --echo #
...@@ -1550,6 +1550,16 @@ SELECT COUNT(*) FROM t1, t1 t2 ...@@ -1550,6 +1550,16 @@ SELECT COUNT(*) FROM t1, t1 t2
GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size)); GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size));
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
--echo #
# Emulate utf8 client erroneously started with --default-character-set=latin1,
# as in the bug report. EXPLAIN output should still be pretty readable
SET NAMES latin1;
EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ' AS u;
# Test normal utf8
SET NAMES utf8;
EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ';
--echo # --echo #
--echo # End of 5.5 tests --echo # End of 5.5 tests
......
...@@ -2514,7 +2514,9 @@ my_decimal *Item_float::val_decimal(my_decimal *decimal_value) ...@@ -2514,7 +2514,9 @@ my_decimal *Item_float::val_decimal(my_decimal *decimal_value)
void Item_string::print(String *str, enum_query_type query_type) void Item_string::print(String *str, enum_query_type query_type)
{ {
if (query_type == QT_ORDINARY && is_cs_specified()) const bool print_introducer=
!(query_type & QT_WITHOUT_INTRODUCERS) && is_cs_specified();
if (print_introducer)
{ {
str->append('_'); str->append('_');
str->append(collation.collation->csname); str->append(collation.collation->csname);
...@@ -2522,27 +2524,52 @@ void Item_string::print(String *str, enum_query_type query_type) ...@@ -2522,27 +2524,52 @@ void Item_string::print(String *str, enum_query_type query_type)
str->append('\''); str->append('\'');
if (query_type == QT_ORDINARY || if (query_type & QT_TO_SYSTEM_CHARSET)
my_charset_same(str_value.charset(), system_charset_info))
{ {
str_value.print(str); if (print_introducer)
} {
else /*
{ Because we wrote an introducer, we must print str_value in its
THD *thd= current_thd; charset, and the resulting bytes must not be changed until they
LEX_STRING utf8_lex_str; reach the end client.
But the caller is asking for system_charset_info, and may later
convert into character_set_results. That means two conversions: we
must ensure that they don't change our printed bytes.
So we print str_value in the least common denominator of the three
charsets involved: ASCII. Non-ASCII characters are printed as \xFF
sequences (which is ASCII too). This way, our bytes will not be
changed.
*/
ErrConvString tmp(str_value.ptr(), str_value.length(), &my_charset_bin);
str->append(tmp.ptr());
}
else
{
if (my_charset_same(str_value.charset(), system_charset_info))
str_value.print(str); // already in system_charset_info
else // need to convert
{
THD *thd= current_thd;
LEX_STRING utf8_lex_str;
thd->convert_string(&utf8_lex_str, thd->convert_string(&utf8_lex_str,
system_charset_info, system_charset_info,
str_value.c_ptr_safe(), str_value.c_ptr_safe(),
str_value.length(), str_value.length(),
str_value.charset()); str_value.charset());
String utf8_str(utf8_lex_str.str, String utf8_str(utf8_lex_str.str,
utf8_lex_str.length, utf8_lex_str.length,
system_charset_info); system_charset_info);
utf8_str.print(str); utf8_str.print(str);
}
}
}
else
{
// Caller wants a result in the charset of str_value.
str_value.print(str);
} }
str->append('\''); str->append('\'');
......
...@@ -399,16 +399,16 @@ enum options_mysqld ...@@ -399,16 +399,16 @@ enum options_mysqld
/** /**
Query type constants. Query type constants (usable as bitmap flags).
QT_ORDINARY -- ordinary SQL query.
QT_IS -- SQL query to be shown in INFORMATION_SCHEMA (in utf8 and without
character set introducers).
*/ */
enum enum_query_type enum enum_query_type
{ {
QT_ORDINARY, /// Nothing specific, ordinary SQL query.
QT_IS QT_ORDINARY= 0,
/// In utf8.
QT_TO_SYSTEM_CHARSET= (1 << 0),
/// Without character set introducers.
QT_WITHOUT_INTRODUCERS= (1 << 1)
}; };
/* query_id */ /* query_id */
......
...@@ -4439,7 +4439,11 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) ...@@ -4439,7 +4439,11 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
char buff[1024]; char buff[1024];
String str(buff,(uint32) sizeof(buff), system_charset_info); String str(buff,(uint32) sizeof(buff), system_charset_info);
str.length(0); str.length(0);
thd->lex->unit.print(&str, QT_ORDINARY); /*
The warnings system requires input in utf8, @see
mysqld_show_warnings().
*/
thd->lex->unit.print(&str, QT_TO_SYSTEM_CHARSET);
str.append('\0'); str.append('\0');
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
ER_YES, str.ptr()); ER_YES, str.ptr());
......
...@@ -841,7 +841,8 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view, ...@@ -841,7 +841,8 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
thd->variables.sql_mode&= ~MODE_ANSI_QUOTES; thd->variables.sql_mode&= ~MODE_ANSI_QUOTES;
lex->unit.print(&view_query, QT_ORDINARY); lex->unit.print(&view_query, QT_ORDINARY);
lex->unit.print(&is_query, QT_IS); lex->unit.print(&is_query,
enum_query_type(QT_TO_SYSTEM_CHARSET | QT_WITHOUT_INTRODUCERS));
thd->variables.sql_mode|= sql_mode; thd->variables.sql_mode|= sql_mode;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment