Commit 03c3dc63 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER...

MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT

Problem:
Parse-time conversion from binary to tricky character sets like utf32
produced ill-formed strings. So, later a chash happened in debug builds,
or a wrong SHOW CREATE TABLE was returned in release builds.

Fix:

1. Backporting a few methods from 10.3:
  - THD::check_string_for_wellformedness()
  - THD::convert_string() overloads
  - THD::make_text_string_connection()

2. Adding a new method THD::reinterpret_string_from_binary(),
   which makes sure to either returns a well-formed string
   (optionally prepending with zero bytes), or returns an error.
parent ed6e271f
......@@ -2890,5 +2890,28 @@ HEX(c1)
0000006100000063
DROP TABLE t1;
#
# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
#
CREATE TABLE t1 (a CHAR(1));
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
ERROR HY000: Invalid utf32 character string: '\x00aaa'
ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` enum('慡') CHARACTER SET utf32 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
SET NAMES utf8;
#
# End of 10.2 tests
#
......@@ -7941,5 +7941,20 @@ EXECUTE s;
DEALLOCATE PREPARE s;
SET NAMES utf8;
#
# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
#
CREATE TABLE t1 (a CHAR(1));
SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
SET NAMES utf8;
#
# End of 10.2 tests
#
......@@ -1048,6 +1048,25 @@ INSERT INTO t1 (c1) VALUES (1),(2),(3);
SELECT HEX(c1) FROM t1 ORDER BY c1;
DROP TABLE t1;
--echo #
--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
--echo #
CREATE TABLE t1 (a CHAR(1));
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
--error ER_DUPLICATED_VALUE_IN_TYPE
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
--error ER_INVALID_CHARACTER_STRING
ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
SHOW CREATE TABLE t1;
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
SHOW CREATE TABLE t1;
DROP TABLE t1;
SET NAMES utf8;
--echo #
--echo # End of 10.2 tests
--echo #
......@@ -290,6 +290,19 @@ EXECUTE s;
DEALLOCATE PREPARE s;
SET NAMES utf8;
--echo #
--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
--echo #
CREATE TABLE t1 (a CHAR(1));
SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
--error ER_DUPLICATED_VALUE_IN_TYPE
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
SHOW CREATE TABLE t1;
DROP TABLE t1;
SET NAMES utf8;
--echo #
--echo # End of 10.2 tests
......
......@@ -2148,7 +2148,7 @@ void THD::cleanup_after_query()
*/
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
const char *from, uint from_length,
const char *from, size_t from_length,
CHARSET_INFO *from_cs)
{
DBUG_ENTER("THD::convert_string");
......@@ -2170,6 +2170,58 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
}
/*
Reinterpret a binary string to a character string
@param[OUT] to The result will be written here,
either the original string as is,
or a newly alloced fixed string with
some zero bytes prepended.
@param cs The destination character set
@param str The binary string
@param length The length of the binary string
@return false on success
@return true on error
*/
bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs,
const char *str, size_t length)
{
/*
When reinterpreting from binary to tricky character sets like
UCS2, UTF16, UTF32, we may need to prepend some zero bytes.
This is possible in scenarios like this:
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
This code is similar to String::copy_aligned().
*/
size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character
if (incomplete)
{
size_t zeros= cs->mbminlen - incomplete;
size_t aligned_length= zeros + length;
char *dst= (char*) alloc(aligned_length + 1);
if (!dst)
{
to->str= NULL; // Safety
to->length= 0;
return true;
}
bzero(dst, zeros);
memcpy(dst + zeros, str, length);
dst[aligned_length]= '\0';
to->str= dst;
to->length= aligned_length;
}
else
{
to->str= str;
to->length= length;
}
return check_string_for_wellformedness(to->str, to->length, cs);
}
/*
Convert a string between two character sets.
dstcs and srccs cannot be &my_charset_bin.
......@@ -2274,6 +2326,21 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
}
bool THD::check_string_for_wellformedness(const char *str,
size_t length,
CHARSET_INFO *cs) const
{
size_t wlen= Well_formed_prefix(cs, str, length).length();
if (wlen < length)
{
ErrConvString err(str, length, &my_charset_bin);
my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr());
return true;
}
return false;
}
/*
Update some cache variables when character set changes
*/
......
......@@ -3503,8 +3503,31 @@ class THD :public Statement,
return true; // EOM
}
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
const char *from, uint from_length,
const char *from, size_t from_length,
CHARSET_INFO *from_cs);
bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs,
const char *from, size_t from_length);
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
const char *from, size_t from_length,
CHARSET_INFO *from_cs)
{
LEX_STRING tmp;
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
to->str= tmp.str;
to->length= tmp.length;
return rc;
}
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
bool simple_copy_is_possible)
{
if (!simple_copy_is_possible)
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
if (fromcs == &my_charset_bin)
return reinterpret_string_from_binary(to, tocs, from->str, from->length);
*to= *from;
return false;
}
/*
Convert a strings between character sets.
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
......@@ -3540,6 +3563,44 @@ class THD :public Statement,
bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs);
/*
Check if the string is wellformed, raise an error if not wellformed.
@param str - The string to check.
@param length - the string length.
*/
bool check_string_for_wellformedness(const char *str,
size_t length,
CHARSET_INFO *cs) const;
bool make_text_string_connection(LEX_CSTRING *to,
const LEX_CSTRING *from)
{
return convert_string(to, variables.collation_connection,
from, charset(), charset_is_collation_connection);
}
#if MYSQL_VERSION_ID < 100300
/*
A wrapper method for 10.2. It fixes the problem
that various fields in bison %union use LEX_STRING.
In 10.3 those fields are fixed to use LEX_CSTRING.
Please remove this wrapper when mering to 10.3.
*/
bool make_text_string_connection(LEX_STRING *to,
const LEX_STRING *from)
{
LEX_CSTRING cto;
LEX_CSTRING cfrom;
bool rc;
cfrom.str= from->str;
cfrom.length= from->length;
rc= make_text_string_connection(&cto, &cfrom);
to->str= (char*) cto.str;
to->length= cto.length;
return rc;
}
#else
#error Remove the above wrapper
#endif
void add_changed_table(TABLE *table);
void add_changed_table(const char *key, long key_length);
CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length);
......
......@@ -14571,14 +14571,8 @@ TEXT_STRING_sys:
TEXT_STRING_literal:
TEXT_STRING
{
if (thd->charset_is_collation_connection)
$$= $1;
else
{
if (thd->convert_string(&$$, thd->variables.collation_connection,
$1.str, $1.length, thd->charset()))
MYSQL_YYABORT;
}
if (thd->make_text_string_connection(&$$, &$1))
MYSQL_YYABORT;
}
;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment