Commit e9aac091 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25440: Indexed CHAR columns are broken with NO_PAD collations

cmp_data(): Compare different-length CHAR fields with
the new strnncollsp_nchars function that will pad spaces if needed.

Any InnoDB ROW_FORMAT except the original one that was named
ROW_FORMAT=REDUNDANT in MySQL 5.0.3 will internally store
CHAR(n) columns as variable-length if the character encoding is
variable length. Spaces may be trimmed from the end.
For NOT NULL values, the minimum length is always n*mbminlen.
In cmp_data() we only know the lengths in bytes and we cannot
easily know the ROW_FORMAT.

is_strnncoll_compatible(): Refactored from innobase_mysql_cmp().

innobase_mysql_cmp(): Merged to cmp_whole_field().

cmp_whole_field(): Invoke strnncollsp_nchars for the DATA_MYSQL
(the CHAR type with any other collation than latin1_swedish_ci).

Reviewed by: Alexander Barkov
Tested by: Roel Roel Van de Paar
parent 37144afb
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
INSERT INTO t1 VALUES ('',1);
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
INSERT INTO t1 VALUES ('',2);
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
DROP TABLE t1;
--source include/have_innodb.inc
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
INSERT INTO t1 VALUES ('',1);
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
INSERT INTO t1 VALUES ('',2);
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
DROP TABLE t1;
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2020, 2021, MariaDB Corporation.
Copyright (c) 2020, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -49,25 +49,11 @@ At the present, the comparison functions return 0 in the case,
where two records disagree only in the way that one
has more fields than the other. */
/** Compare two data fields.
@param[in] prtype precise type
@param[in] a data field
@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
@param[in] b data field
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
@return positive, 0, negative, if a is greater, equal, less than b,
respectively */
UNIV_INLINE
int
innobase_mysql_cmp(
ulint prtype,
const byte* a,
unsigned int a_length,
const byte* b,
unsigned int b_length)
#ifndef DBUG_OFF
/** @return whether a data type is compatible with strnncoll() functions */
static bool is_strnncoll_compatible(ulint type)
{
#ifdef UNIV_DEBUG
switch (prtype & DATA_MYSQL_TYPE_MASK) {
switch (type) {
case MYSQL_TYPE_BIT:
case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING:
......@@ -76,22 +62,12 @@ innobase_mysql_cmp(
case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_VARCHAR:
break;
return true;
default:
ut_error;
}
#endif /* UNIV_DEBUG */
uint cs_num = (uint) dtype_get_charset_coll(prtype);
if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
return(cs->coll->strnncollsp(
cs, a, a_length, b, b_length));
return false;
}
ib::fatal() << "Unable to find charset-collation " << cs_num;
return(0);
}
#endif /* DBUG_OFF */
/*************************************************************//**
Returns TRUE if two columns are equal for comparison purposes.
......@@ -309,68 +285,52 @@ cmp_gis_field(
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
@return positive, 0, negative, if a is greater, equal, less than b,
respectively */
static
int
cmp_whole_field(
ulint mtype,
ulint prtype,
const byte* a,
unsigned int a_length,
const byte* b,
unsigned int b_length)
static int cmp_whole_field(ulint mtype, ulint prtype,
const byte *a, unsigned a_length,
const byte *b, unsigned b_length)
{
float f_1;
float f_2;
double d_1;
double d_2;
switch (mtype) {
default:
ib::fatal() << "Unknown data type number " << mtype;
return 0;
case DATA_DECIMAL:
return(cmp_decimal(a, a_length, b, b_length));
return cmp_decimal(a, a_length, b, b_length);
case DATA_DOUBLE:
d_1 = mach_double_read(a);
d_2 = mach_double_read(b);
if (d_1 > d_2) {
return(1);
} else if (d_2 > d_1) {
return(-1);
{
const double af= mach_double_read(a), bf= mach_double_read(b);
return af > bf ? 1 : bf > af ? -1 : 0;
}
return(0);
case DATA_FLOAT:
f_1 = mach_float_read(a);
f_2 = mach_float_read(b);
if (f_1 > f_2) {
return(1);
} else if (f_2 > f_1) {
return(-1);
{
const float af= mach_float_read(a), bf= mach_float_read(b);
return af > bf ? 1 : bf > af ? -1 : 0;
}
return(0);
case DATA_VARCHAR:
case DATA_CHAR:
return(my_charset_latin1.coll->strnncollsp(
&my_charset_latin1,
a, a_length, b, b_length));
/* latin1_swedish_ci is treated as a special case in InnoDB.
Because it is a fixed-length encoding (mbminlen=mbmaxlen=1),
non-NULL CHAR(n) values will always occupy n bytes and we
can invoke strnncollsp() instead of strnncollsp_nchars(). */
return my_charset_latin1.coll->strnncollsp(&my_charset_latin1,
a, a_length, b, b_length);
case DATA_BLOB:
if (prtype & DATA_BINARY_TYPE) {
ib::error() << "Comparing a binary BLOB"
" using a character set collation!";
ut_ad(0);
}
ut_ad(!(prtype & DATA_BINARY_TYPE)); /* our only caller tested this */
/* fall through */
case DATA_VARMYSQL:
DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
MYF(MY_WME)))
return cs->coll->strnncollsp(cs, a, a_length, b, b_length);
break;
case DATA_MYSQL:
return(innobase_mysql_cmp(prtype,
a, a_length, b, b_length));
default:
ib::fatal() << "Unknown data type number " << mtype;
DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
MYF(MY_WME)))
return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length,
std::max(a_length, b_length));
}
return(0);
ib::fatal() << "Unable to find charset-collation for " << prtype;
}
/** Compare two data fields.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment