Commit cb9c49a9 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets...

MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed

The code incorrectly assumed in multiple places that TYPELIB
values cannot have 0x00 bytes inside. In fact they can:

  CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY);

Note, the TYPELIB value encoding used in FRM is ambiguous about 0x00.

So this fix is partial.

It fixes 0x00 bytes in many (but not all) places:

- In the middle or in the end of a value:
    CREATE TABLE t1 (a ENUM(0x6100) ...);
    CREATE TABLE t1 (a ENUM(0x610062) ...);

- In the beginning of the first value:
    CREATE TABLE t1 (a ENUM(0x0061));
    CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));

- In the beginning of the second (and following) value of the *last* ENUM/SET
  in the table:

    CREATE TABLE t1 (a ENUM('a',0x0061));
    CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));

However, it does not fix 0x00 when:

- 0x00 byte is in the beginning of a value of a non-last ENUM/SET
  causes an error:

   CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
   ERROR 1033 (HY000): Incorrect information in file: './test/t1.frm'

  This is an ambuguous case and will be fixed separately.
  We need a new TYPELIB encoding to fix this.

Details:

- unireg.cc

  The function pack_header() incorrectly used strlen() to detect
  a TYPELIB value length. Adding a new function typelib_values_packed_length()
  which uses TYPELIB::type_lengths[n] to detect the n-th value length,
  and reusing the new function in pack_header() and packed_fields_length()

- table.cc
  fix_type_pointers() assumed in multiple places that values cannot have
  0x00 inside and used strlen(TYPELIB::type_names[n]) to set
  the corresponding TYPELIB::type_lengths[n].

  Also, fix_type_pointers() did not check the encoded data for consistency.

  Rewriting fix_type_pointers() code to populate TYPELIB::type_names[n] and
  TYPELIB::type_lengths[n] at the same time, so no additional loop
  with strlen() is needed any more.

  Adding many data consistency tests.

  Fixing the main loop in fix_type_pointers() to use memchr() instead of
  strchr() to handle 0x00 properly.

  Fixing create_key_infos() to return the result in a LEX_STRING rather
  that in a char*.
parent 836d7089
......@@ -74,6 +74,66 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE COERCIBILITY(a)=2 AND a='a';
EXPLAIN EXTENDED SELECT * FROM t1 WHERE WEIGHT_STRING(a)='a' AND a='a';
DROP TABLE t1;
--echo #
--echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
--echo #
CREATE TABLE t1(a ENUM(0x6100,0x6200,0x6300) CHARACTER SET 'Binary');
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1),(2),(3);
SELECT HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
--echo 0x00 in the middle or in the end of a value
CREATE TABLE t1 (a ENUM(0x6100));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a ENUM(0x610062));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo 0x00 in the beginning of the first value:
CREATE TABLE t1 (a ENUM(0x0061));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES(1);
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1,1);
SELECT HEX(a), HEX(b) FROM t1;
DROP TABLE t1;
--echo # 0x00 in the beginning of the second (and following) value of the *last* ENUM/SET in the table:
CREATE TABLE t1 (a ENUM('a',0x0061));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1),(2);
SELECT HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES (1,1);
INSERT INTO t1 VALUES (1,2);
SELECT HEX(a), HEX(b) FROM t1 ORDER BY a, b;
DROP TABLE t1;
--echo 0x00 in the beginning of a value of a non-last ENUM/SET causes an error:
--replace_regex /'.*t1.frm'/'DIR\/t1.frm'/
--error ER_NOT_FORM_FILE
CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
--echo #
--echo # End of 10.1 tests
--echo #
This diff is collapsed.
......@@ -498,6 +498,18 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo,
} /* pack_keys */
static uint typelib_values_packed_length(const TYPELIB *t)
{
uint length= 0;
for (uint i= 0; t->type_names[i]; i++)
{
length+= t->type_lengths[i];
length++; /* Separator */
}
return length;
}
/* Make formheader */
static bool pack_header(THD *thd, uchar *forminfo,
......@@ -619,9 +631,8 @@ static bool pack_header(THD *thd, uchar *forminfo,
field->interval_id=get_interval_id(&int_count,create_fields,field);
if (old_int_count != int_count)
{
for (const char **pos=field->interval->type_names ; *pos ; pos++)
int_length+=(uint) strlen(*pos)+1; // field + suffix prefix
int_parts+=field->interval->count+1;
int_length+= typelib_values_packed_length(field->interval);
int_parts+= field->interval->count + 1;
}
}
if (f_maybe_null(field->pack_flag))
......@@ -710,11 +721,7 @@ static size_t packed_fields_length(List<Create_field> &create_fields)
{
int_count= field->interval_id;
length++;
for (int i=0; field->interval->type_names[i]; i++)
{
length+= field->interval->type_lengths[i];
length++;
}
length+= typelib_values_packed_length(field->interval);
length++;
}
if (field->vcol_info)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment