Commit b6346b44 authored by Ramil Kalimullin's avatar Ramil Kalimullin

Fix for bug#57272: crash in rpad() when using utf8

Problem: if multibyte and binary string arguments passed to 
RPAD(), LPAD() or INSERT() functions, they might return 
wrong results or even lead to a server crash due to missed
character set convertion.

Fix: perform the convertion if necessary.


mysql-test/r/ctype_utf8.result:
  Fix for bug#57272: crash in rpad() when using utf8
    - test result.
mysql-test/t/ctype_utf8.test:
  Fix for bug#57272: crash in rpad() when using utf8
    - test case.
sql/item_strfunc.cc:
  Fix for bug#57272: crash in rpad() when using utf8
    - convert multibyte argument's character set to binary in case of
  FUNCTION(MULTIBYTE_ARG, .., BINARY_ARG,..) for RPAD(), LPAD() and 
  INSERT() functions.
parent e7631b0d
...@@ -1898,3 +1898,34 @@ CONVERT(a, CHAR) CONVERT(b, CHAR) ...@@ -1898,3 +1898,34 @@ CONVERT(a, CHAR) CONVERT(b, CHAR)
70000 1092 70000 1092
DROP TABLE t1; DROP TABLE t1;
End of 5.0 tests End of 5.0 tests
SELECT LENGTH(RPAD(0.0115E88, 61297, _utf8'яэюя'));
LENGTH(RPAD(0.0115E88, 61297, _utf8'яэюя'))
61297
SELECT LENGTH(RPAD(0.0115E88, 61297, _utf8'йцуя'));
LENGTH(RPAD(0.0115E88, 61297, _utf8'йцуя'))
61297
SELECT HEX(RPAD(0x20, 2, _utf8 0xD18F));
HEX(RPAD(0x20, 2, _utf8 0xD18F))
20D1
SELECT HEX(RPAD(0x20, 4, _utf8 0xD18F));
HEX(RPAD(0x20, 4, _utf8 0xD18F))
20D18FD1
SELECT HEX(LPAD(0x20, 2, _utf8 0xD18F));
HEX(LPAD(0x20, 2, _utf8 0xD18F))
D120
SELECT HEX(LPAD(0x20, 4, _utf8 0xD18F));
HEX(LPAD(0x20, 4, _utf8 0xD18F))
D18FD120
SELECT HEX(RPAD(_utf8 0xD18F, 3, 0x20));
HEX(RPAD(_utf8 0xD18F, 3, 0x20))
D18F20
SELECT HEX(LPAD(_utf8 0xD18F, 3, 0x20));
HEX(LPAD(_utf8 0xD18F, 3, 0x20))
20D18F
SELECT HEX(INSERT(_utf8 0xD18F, 2, 1, 0x20));
HEX(INSERT(_utf8 0xD18F, 2, 1, 0x20))
D120
SELECT HEX(INSERT(_utf8 0xD18FD18E, 2, 1, 0x20));
HEX(INSERT(_utf8 0xD18FD18E, 2, 1, 0x20))
D120D18E
End of 5.1 tests
...@@ -1466,3 +1466,23 @@ SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b; ...@@ -1466,3 +1466,23 @@ SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b;
DROP TABLE t1; DROP TABLE t1;
--echo End of 5.0 tests --echo End of 5.0 tests
#
# Bug #57272: crash in rpad() when using utf8
#
SELECT LENGTH(RPAD(0.0115E88, 61297, _utf8'яэюя'));
SELECT LENGTH(RPAD(0.0115E88, 61297, _utf8'йцуя'));
SELECT HEX(RPAD(0x20, 2, _utf8 0xD18F));
SELECT HEX(RPAD(0x20, 4, _utf8 0xD18F));
SELECT HEX(LPAD(0x20, 2, _utf8 0xD18F));
SELECT HEX(LPAD(0x20, 4, _utf8 0xD18F));
SELECT HEX(RPAD(_utf8 0xD18F, 3, 0x20));
SELECT HEX(LPAD(_utf8 0xD18F, 3, 0x20));
SELECT HEX(INSERT(_utf8 0xD18F, 2, 1, 0x20));
SELECT HEX(INSERT(_utf8 0xD18FD18E, 2, 1, 0x20));
--echo End of 5.1 tests
...@@ -1013,6 +1013,20 @@ String *Item_func_insert::val_str(String *str) ...@@ -1013,6 +1013,20 @@ String *Item_func_insert::val_str(String *str)
if ((length < 0) || (length > res->length())) if ((length < 0) || (length > res->length()))
length= res->length(); length= res->length();
/*
There is one exception not handled (intentionaly) by the character set
aggregation code. If one string is strong side and is binary, and
another one is weak side and is a multi-byte character string,
then we need to operate on the second string in terms on bytes when
calling ::numchars() and ::charpos(), rather than in terms of characters.
Lets substitute its character set to binary.
*/
if (collation.collation == &my_charset_bin)
{
res->set_charset(&my_charset_bin);
res2->set_charset(&my_charset_bin);
}
/* start and length are now sufficiently valid to pass to charpos function */ /* start and length are now sufficiently valid to pass to charpos function */
start= res->charpos((int) start); start= res->charpos((int) start);
length= res->charpos((int) length, (uint32) start); length= res->charpos((int) length, (uint32) start);
...@@ -2514,6 +2528,20 @@ String *Item_func_rpad::val_str(String *str) ...@@ -2514,6 +2528,20 @@ String *Item_func_rpad::val_str(String *str)
/* Set here so that rest of code sees out-of-bound value as such. */ /* Set here so that rest of code sees out-of-bound value as such. */
if ((ulonglong) count > INT_MAX32) if ((ulonglong) count > INT_MAX32)
count= INT_MAX32; count= INT_MAX32;
/*
There is one exception not handled (intentionaly) by the character set
aggregation code. If one string is strong side and is binary, and
another one is weak side and is a multi-byte character string,
then we need to operate on the second string in terms on bytes when
calling ::numchars() and ::charpos(), rather than in terms of characters.
Lets substitute its character set to binary.
*/
if (collation.collation == &my_charset_bin)
{
res->set_charset(&my_charset_bin);
rpad->set_charset(&my_charset_bin);
}
if (count <= (res_char_length= res->numchars())) if (count <= (res_char_length= res->numchars()))
{ // String to pad is big enough { // String to pad is big enough
res->length(res->charpos((int) count)); // Shorten result if longer res->length(res->charpos((int) count)); // Shorten result if longer
...@@ -2616,6 +2644,20 @@ String *Item_func_lpad::val_str(String *str) ...@@ -2616,6 +2644,20 @@ String *Item_func_lpad::val_str(String *str)
if ((ulonglong) count > INT_MAX32) if ((ulonglong) count > INT_MAX32)
count= INT_MAX32; count= INT_MAX32;
/*
There is one exception not handled (intentionaly) by the character set
aggregation code. If one string is strong side and is binary, and
another one is weak side and is a multi-byte character string,
then we need to operate on the second string in terms on bytes when
calling ::numchars() and ::charpos(), rather than in terms of characters.
Lets substitute its character set to binary.
*/
if (collation.collation == &my_charset_bin)
{
res->set_charset(&my_charset_bin);
pad->set_charset(&my_charset_bin);
}
res_char_length= res->numchars(); res_char_length= res->numchars();
if (count <= res_char_length) if (count <= res_char_length)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment