Commit 22fd31c5 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets

TYPELIBs for ENUM/SET columns could erroneously undergo redundant
hex-unescaping at the table open time.

Fix:
- Prevent multiple unescaping of the same TYPELIB
- Prevent sharing TYPELIBs between columns with different mbminlen
parent 118826d1
......@@ -2913,5 +2913,30 @@ t1 CREATE TABLE `t1` (
DROP TABLE t1;
SET NAMES utf8;
#
# MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
#
CREATE TABLE t1 (
c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` enum('a','b') CHARACTER SET utf32 DEFAULT 'a',
`c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (
c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` enum('00000061','00000062') CHARACTER SET latin1 COLLATE latin1_bin DEFAULT '00000061',
`c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
#
# End of 10.2 tests
#
......@@ -1067,6 +1067,25 @@ DROP TABLE t1;
SET NAMES utf8;
--echo #
--echo # MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
--echo #
CREATE TABLE t1 (
c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
);
SHOW CREATE TABLE t1;
DROP TABLE t1;
CREATE TABLE t1 (
c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
);
SHOW CREATE TABLE t1;
DROP TABLE t1;
--echo #
--echo # End of 10.2 tests
--echo #
......@@ -1229,6 +1229,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
plugin_ref se_plugin= 0;
MEM_ROOT *old_root= thd->mem_root;
Virtual_column_info **table_check_constraints;
bool *interval_unescaped= NULL;
DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image");
keyinfo= &first_keyinfo;
......@@ -1686,6 +1687,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
goto err;
if (interval_count)
{
if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool))))
goto err;
bzero(interval_unescaped, interval_count * sizeof(bool));
}
field_ptr= share->field;
table_check_constraints= share->check_constraints;
read_length=(uint) (share->fields * field_pack_length +
......@@ -1956,11 +1964,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (share->mysql_version < 100200)
pack_flag&= ~FIELDFLAG_LONG_DECIMAL;
if (interval_nr && charset->mbminlen > 1)
if (interval_nr && charset->mbminlen > 1 &&
!interval_unescaped[interval_nr - 1])
{
/* Unescape UCS2 intervals from HEX notation */
/*
Unescape UCS2/UTF16/UTF32 intervals from HEX notation.
Note, ENUM/SET columns with equal value list share a single
copy of TYPELIB. Unescape every TYPELIB only once.
*/
TYPELIB *interval= share->intervals + interval_nr - 1;
unhex_type2(interval);
interval_unescaped[interval_nr - 1]= true;
}
#ifndef TO_BE_DELETED_ON_PRODUCTION
......@@ -2610,6 +2624,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
share->error= OPEN_FRM_OK;
thd->status_var.opened_shares++;
thd->mem_root= old_root;
my_afree(interval_unescaped);
DBUG_RETURN(0);
err:
......@@ -2623,6 +2638,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno);
thd->mem_root= old_root;
my_afree(interval_unescaped);
DBUG_RETURN(HA_ERR_NOT_A_TABLE);
}
......
......@@ -757,7 +757,16 @@ static uint get_interval_id(uint *int_count,List<Create_field> &create_fields,
while ((field=it++) != last_field)
{
if (field->interval_id && field->interval->count == interval->count)
/*
ENUM/SET columns with equal value lists share a single
copy of the underlying TYPELIB.
Fields with different mbminlen can't reuse TYPELIBs, because:
- mbminlen==1 are written to FRM as is
- mbminlen>1 are written to FRM in hex-encoded format
*/
if (field->interval_id &&
field->interval->count == interval->count &&
field->charset->mbminlen == last_field->charset->mbminlen)
{
const char **a,**b;
for (a=field->interval->type_names, b=interval->type_names ;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment