Commit e9aac091 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25440: Indexed CHAR columns are broken with NO_PAD collations

cmp_data(): Compare different-length CHAR fields with
the new strnncollsp_nchars function that will pad spaces if needed.

Any InnoDB ROW_FORMAT except the original one that was named
ROW_FORMAT=REDUNDANT in MySQL 5.0.3 will internally store
CHAR(n) columns as variable-length if the character encoding is
variable length. Spaces may be trimmed from the end.
For NOT NULL values, the minimum length is always n*mbminlen.
In cmp_data() we only know the lengths in bytes and we cannot
easily know the ROW_FORMAT.

is_strnncoll_compatible(): Refactored from innobase_mysql_cmp().

innobase_mysql_cmp(): Merged to cmp_whole_field().

cmp_whole_field(): Invoke strnncollsp_nchars for the DATA_MYSQL
(the CHAR type with any other collation than latin1_swedish_ci).

Reviewed by: Alexander Barkov
Tested by: Roel Roel Van de Paar
parent 37144afb
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
INSERT INTO t1 VALUES ('',1);
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
INSERT INTO t1 VALUES ('',2);
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
DROP TABLE t1;
--source include/have_innodb.inc
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
INSERT INTO t1 VALUES ('',1);
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
INSERT INTO t1 VALUES ('',2);
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
DROP TABLE t1;
/***************************************************************************** /*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2020, 2021, MariaDB Corporation. Copyright (c) 2020, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -49,25 +49,11 @@ At the present, the comparison functions return 0 in the case, ...@@ -49,25 +49,11 @@ At the present, the comparison functions return 0 in the case,
where two records disagree only in the way that one where two records disagree only in the way that one
has more fields than the other. */ has more fields than the other. */
/** Compare two data fields. #ifndef DBUG_OFF
@param[in] prtype precise type /** @return whether a data type is compatible with strnncoll() functions */
@param[in] a data field static bool is_strnncoll_compatible(ulint type)
@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
@param[in] b data field
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
@return positive, 0, negative, if a is greater, equal, less than b,
respectively */
UNIV_INLINE
int
innobase_mysql_cmp(
ulint prtype,
const byte* a,
unsigned int a_length,
const byte* b,
unsigned int b_length)
{ {
#ifdef UNIV_DEBUG switch (type) {
switch (prtype & DATA_MYSQL_TYPE_MASK) {
case MYSQL_TYPE_BIT: case MYSQL_TYPE_BIT:
case MYSQL_TYPE_STRING: case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_VAR_STRING:
...@@ -76,22 +62,12 @@ innobase_mysql_cmp( ...@@ -76,22 +62,12 @@ innobase_mysql_cmp(
case MYSQL_TYPE_BLOB: case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_LONG_BLOB: case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_VARCHAR: case MYSQL_TYPE_VARCHAR:
break; return true;
default: default:
ut_error; return false;
}
#endif /* UNIV_DEBUG */
uint cs_num = (uint) dtype_get_charset_coll(prtype);
if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
return(cs->coll->strnncollsp(
cs, a, a_length, b, b_length));
} }
ib::fatal() << "Unable to find charset-collation " << cs_num;
return(0);
} }
#endif /* DBUG_OFF */
/*************************************************************//** /*************************************************************//**
Returns TRUE if two columns are equal for comparison purposes. Returns TRUE if two columns are equal for comparison purposes.
...@@ -309,68 +285,52 @@ cmp_gis_field( ...@@ -309,68 +285,52 @@ cmp_gis_field(
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL) @param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
@return positive, 0, negative, if a is greater, equal, less than b, @return positive, 0, negative, if a is greater, equal, less than b,
respectively */ respectively */
static static int cmp_whole_field(ulint mtype, ulint prtype,
int const byte *a, unsigned a_length,
cmp_whole_field( const byte *b, unsigned b_length)
ulint mtype,
ulint prtype,
const byte* a,
unsigned int a_length,
const byte* b,
unsigned int b_length)
{ {
float f_1;
float f_2;
double d_1;
double d_2;
switch (mtype) { switch (mtype) {
default:
ib::fatal() << "Unknown data type number " << mtype;
return 0;
case DATA_DECIMAL: case DATA_DECIMAL:
return(cmp_decimal(a, a_length, b, b_length)); return cmp_decimal(a, a_length, b, b_length);
case DATA_DOUBLE: case DATA_DOUBLE:
d_1 = mach_double_read(a); {
d_2 = mach_double_read(b); const double af= mach_double_read(a), bf= mach_double_read(b);
return af > bf ? 1 : bf > af ? -1 : 0;
if (d_1 > d_2) {
return(1);
} else if (d_2 > d_1) {
return(-1);
} }
return(0);
case DATA_FLOAT: case DATA_FLOAT:
f_1 = mach_float_read(a); {
f_2 = mach_float_read(b); const float af= mach_float_read(a), bf= mach_float_read(b);
return af > bf ? 1 : bf > af ? -1 : 0;
if (f_1 > f_2) {
return(1);
} else if (f_2 > f_1) {
return(-1);
} }
return(0);
case DATA_VARCHAR: case DATA_VARCHAR:
case DATA_CHAR: case DATA_CHAR:
return(my_charset_latin1.coll->strnncollsp( /* latin1_swedish_ci is treated as a special case in InnoDB.
&my_charset_latin1, Because it is a fixed-length encoding (mbminlen=mbmaxlen=1),
a, a_length, b, b_length)); non-NULL CHAR(n) values will always occupy n bytes and we
can invoke strnncollsp() instead of strnncollsp_nchars(). */
return my_charset_latin1.coll->strnncollsp(&my_charset_latin1,
a, a_length, b, b_length);
case DATA_BLOB: case DATA_BLOB:
if (prtype & DATA_BINARY_TYPE) { ut_ad(!(prtype & DATA_BINARY_TYPE)); /* our only caller tested this */
ib::error() << "Comparing a binary BLOB"
" using a character set collation!";
ut_ad(0);
}
/* fall through */ /* fall through */
case DATA_VARMYSQL: case DATA_VARMYSQL:
DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
MYF(MY_WME)))
return cs->coll->strnncollsp(cs, a, a_length, b, b_length);
break;
case DATA_MYSQL: case DATA_MYSQL:
return(innobase_mysql_cmp(prtype, DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
a, a_length, b, b_length)); if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
default: MYF(MY_WME)))
ib::fatal() << "Unknown data type number " << mtype; return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length,
std::max(a_length, b_length));
} }
return(0); ib::fatal() << "Unable to find charset-collation for " << prtype;
} }
/** Compare two data fields. /** Compare two data fields.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment