Commit d16a9a68 authored by Ramil Kalimullin's avatar Ramil Kalimullin

Fix for bug#36772: When using UTF8, CONVERT with GROUP BY

returns truncated results

Problem: performig conversion from {INT, DECIMAL, REAL} to CHAR
we incorrectly set its max length in some cases that may lead
to truncated results returned.

Fix: properly set CONVERT({INT, DECIMAL, REAL}, CHAR) result's
max length.


mysql-test/r/ctype_utf8.result:
  Fix for bug#36772: When using UTF8, CONVERT with GROUP BY
  returns truncated results
    - test result.
mysql-test/t/ctype_utf8.test:
  Fix for bug#36772: When using UTF8, CONVERT with GROUP BY
  returns truncated results
    - test case.
sql/item_timefunc.cc:
  Fix for bug#36772: When using UTF8, CONVERT with GROUP BY
  returns truncated results
    - calculating Item_char_typecast::max_length use initial
  argument's charset mbmaxlen instead of from_cs->mbmaxlen,
  as from_cs may differ in some case (see comment above).
parent 82ee1ccb
...@@ -1813,3 +1813,35 @@ select hex(_utf8 B'001111111111'); ...@@ -1813,3 +1813,35 @@ select hex(_utf8 B'001111111111');
ERROR HY000: Invalid utf8 character string: 'FF' ERROR HY000: Invalid utf8 character string: 'FF'
select (_utf8 X'616263FF'); select (_utf8 X'616263FF');
ERROR HY000: Invalid utf8 character string: 'FF' ERROR HY000: Invalid utf8 character string: 'FF'
CREATE TABLE t1 (a INT NOT NULL, b INT NOT NULL);
INSERT INTO t1 VALUES (70000, 1092), (70001, 1085), (70002, 1065);
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b;
CONVERT(a, CHAR) CONVERT(b, CHAR)
70002 1065
70001 1085
70000 1092
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1;
CONVERT(a, CHAR) CONVERT(b, CHAR)
70000 1092
70001 1085
70002 1065
ALTER TABLE t1 ADD UNIQUE (b);
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b;
CONVERT(a, CHAR) CONVERT(b, CHAR)
70002 1065
70001 1085
70000 1092
DROP INDEX b ON t1;
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b;
CONVERT(a, CHAR) CONVERT(b, CHAR)
70002 1065
70001 1085
70000 1092
ALTER TABLE t1 ADD INDEX (b);
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b;
CONVERT(a, CHAR) CONVERT(b, CHAR)
70002 1065
70001 1085
70000 1092
DROP TABLE t1;
End of 5.0 tests
...@@ -1437,3 +1437,20 @@ select hex(_utf8 X'616263FF'); ...@@ -1437,3 +1437,20 @@ select hex(_utf8 X'616263FF');
select hex(_utf8 B'001111111111'); select hex(_utf8 B'001111111111');
--error ER_INVALID_CHARACTER_STRING --error ER_INVALID_CHARACTER_STRING
select (_utf8 X'616263FF'); select (_utf8 X'616263FF');
#
# Bug #36772: When using UTF8, CONVERT with GROUP BY returns truncated results
#
CREATE TABLE t1 (a INT NOT NULL, b INT NOT NULL);
INSERT INTO t1 VALUES (70000, 1092), (70001, 1085), (70002, 1065);
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b;
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1;
ALTER TABLE t1 ADD UNIQUE (b);
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b;
DROP INDEX b ON t1;
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) FROM t1 GROUP BY b;
ALTER TABLE t1 ADD INDEX (b);
SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b;
DROP TABLE t1;
--echo End of 5.0 tests
...@@ -2550,6 +2550,8 @@ void Item_char_typecast::fix_length_and_dec() ...@@ -2550,6 +2550,8 @@ void Item_char_typecast::fix_length_and_dec()
and thus avoid unnecessary character set conversion. and thus avoid unnecessary character set conversion.
- If the argument is not a number, then from_cs is set to - If the argument is not a number, then from_cs is set to
the argument's charset. the argument's charset.
Note (TODO): we could use repertoire technique here.
*/ */
from_cs= (args[0]->result_type() == INT_RESULT || from_cs= (args[0]->result_type() == INT_RESULT ||
args[0]->result_type() == DECIMAL_RESULT || args[0]->result_type() == DECIMAL_RESULT ||
...@@ -2557,12 +2559,13 @@ void Item_char_typecast::fix_length_and_dec() ...@@ -2557,12 +2559,13 @@ void Item_char_typecast::fix_length_and_dec()
(cast_cs->mbminlen == 1 ? cast_cs : &my_charset_latin1) : (cast_cs->mbminlen == 1 ? cast_cs : &my_charset_latin1) :
args[0]->collation.collation; args[0]->collation.collation;
charset_conversion= (cast_cs->mbmaxlen > 1) || charset_conversion= (cast_cs->mbmaxlen > 1) ||
!my_charset_same(from_cs, cast_cs) && (!my_charset_same(from_cs, cast_cs) &&
from_cs != &my_charset_bin && from_cs != &my_charset_bin &&
cast_cs != &my_charset_bin; cast_cs != &my_charset_bin);
collation.set(cast_cs, DERIVATION_IMPLICIT); collation.set(cast_cs, DERIVATION_IMPLICIT);
char_length= (cast_length >= 0) ? cast_length : char_length= (cast_length >= 0) ?
args[0]->max_length/from_cs->mbmaxlen; cast_length :
args[0]->max_length / args[0]->collation.collation->mbmaxlen;
max_length= char_length * cast_cs->mbmaxlen; max_length= char_length * cast_cs->mbmaxlen;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment