Commit e9aac091 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25440: Indexed CHAR columns are broken with NO_PAD collations

cmp_data(): Compare different-length CHAR fields with
the new strnncollsp_nchars function that will pad spaces if needed.

Any InnoDB ROW_FORMAT except the original one that was named
ROW_FORMAT=REDUNDANT in MySQL 5.0.3 will internally store
CHAR(n) columns as variable-length if the character encoding is
variable length. Spaces may be trimmed from the end.
For NOT NULL values, the minimum length is always n*mbminlen.
In cmp_data() we only know the lengths in bytes and we cannot
easily know the ROW_FORMAT.

is_strnncoll_compatible(): Refactored from innobase_mysql_cmp().

innobase_mysql_cmp(): Merged to cmp_whole_field().

cmp_whole_field(): Invoke strnncollsp_nchars for the DATA_MYSQL
(the CHAR type with any other collation than latin1_swedish_ci).

Reviewed by: Alexander Barkov
Tested by: Roel Roel Van de Paar
parent 37144afb
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
INSERT INTO t1 VALUES ('',1);
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
INSERT INTO t1 VALUES ('',2);
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
DROP TABLE t1;
--source include/have_innodb.inc
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
INSERT INTO t1 VALUES ('',1);
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
INSERT INTO t1 VALUES ('',2);
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
DROP TABLE t1;
/***************************************************************************** /*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2020, 2021, MariaDB Corporation. Copyright (c) 2020, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -49,49 +49,25 @@ At the present, the comparison functions return 0 in the case, ...@@ -49,49 +49,25 @@ At the present, the comparison functions return 0 in the case,
where two records disagree only in the way that one where two records disagree only in the way that one
has more fields than the other. */ has more fields than the other. */
/** Compare two data fields. #ifndef DBUG_OFF
@param[in] prtype precise type /** @return whether a data type is compatible with strnncoll() functions */
@param[in] a data field static bool is_strnncoll_compatible(ulint type)
@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
@param[in] b data field
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
@return positive, 0, negative, if a is greater, equal, less than b,
respectively */
UNIV_INLINE
int
innobase_mysql_cmp(
ulint prtype,
const byte* a,
unsigned int a_length,
const byte* b,
unsigned int b_length)
{ {
#ifdef UNIV_DEBUG switch (type) {
switch (prtype & DATA_MYSQL_TYPE_MASK) { case MYSQL_TYPE_BIT:
case MYSQL_TYPE_BIT: case MYSQL_TYPE_STRING:
case MYSQL_TYPE_STRING: case MYSQL_TYPE_VAR_STRING:
case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_TINY_BLOB: case MYSQL_TYPE_MEDIUM_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB: case MYSQL_TYPE_BLOB:
case MYSQL_TYPE_BLOB: case MYSQL_TYPE_LONG_BLOB:
case MYSQL_TYPE_LONG_BLOB: case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VARCHAR: return true;
break; default:
default: return false;
ut_error; }
}
#endif /* UNIV_DEBUG */
uint cs_num = (uint) dtype_get_charset_coll(prtype);
if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
return(cs->coll->strnncollsp(
cs, a, a_length, b, b_length));
}
ib::fatal() << "Unable to find charset-collation " << cs_num;
return(0);
} }
#endif /* DBUG_OFF */
/*************************************************************//** /*************************************************************//**
Returns TRUE if two columns are equal for comparison purposes. Returns TRUE if two columns are equal for comparison purposes.
...@@ -309,68 +285,52 @@ cmp_gis_field( ...@@ -309,68 +285,52 @@ cmp_gis_field(
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL) @param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
@return positive, 0, negative, if a is greater, equal, less than b, @return positive, 0, negative, if a is greater, equal, less than b,
respectively */ respectively */
static static int cmp_whole_field(ulint mtype, ulint prtype,
int const byte *a, unsigned a_length,
cmp_whole_field( const byte *b, unsigned b_length)
ulint mtype,
ulint prtype,
const byte* a,
unsigned int a_length,
const byte* b,
unsigned int b_length)
{ {
float f_1; switch (mtype) {
float f_2; default:
double d_1; ib::fatal() << "Unknown data type number " << mtype;
double d_2; return 0;
case DATA_DECIMAL:
switch (mtype) { return cmp_decimal(a, a_length, b, b_length);
case DATA_DECIMAL: case DATA_DOUBLE:
return(cmp_decimal(a, a_length, b, b_length)); {
case DATA_DOUBLE: const double af= mach_double_read(a), bf= mach_double_read(b);
d_1 = mach_double_read(a); return af > bf ? 1 : bf > af ? -1 : 0;
d_2 = mach_double_read(b); }
case DATA_FLOAT:
if (d_1 > d_2) { {
return(1); const float af= mach_float_read(a), bf= mach_float_read(b);
} else if (d_2 > d_1) { return af > bf ? 1 : bf > af ? -1 : 0;
return(-1); }
} case DATA_VARCHAR:
case DATA_CHAR:
return(0); /* latin1_swedish_ci is treated as a special case in InnoDB.
Because it is a fixed-length encoding (mbminlen=mbmaxlen=1),
case DATA_FLOAT: non-NULL CHAR(n) values will always occupy n bytes and we
f_1 = mach_float_read(a); can invoke strnncollsp() instead of strnncollsp_nchars(). */
f_2 = mach_float_read(b); return my_charset_latin1.coll->strnncollsp(&my_charset_latin1,
a, a_length, b, b_length);
if (f_1 > f_2) { case DATA_BLOB:
return(1); ut_ad(!(prtype & DATA_BINARY_TYPE)); /* our only caller tested this */
} else if (f_2 > f_1) { /* fall through */
return(-1); case DATA_VARMYSQL:
} DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
return(0); MYF(MY_WME)))
case DATA_VARCHAR: return cs->coll->strnncollsp(cs, a, a_length, b, b_length);
case DATA_CHAR: break;
return(my_charset_latin1.coll->strnncollsp( case DATA_MYSQL:
&my_charset_latin1, DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
a, a_length, b, b_length)); if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
case DATA_BLOB: MYF(MY_WME)))
if (prtype & DATA_BINARY_TYPE) { return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length,
ib::error() << "Comparing a binary BLOB" std::max(a_length, b_length));
" using a character set collation!"; }
ut_ad(0);
} ib::fatal() << "Unable to find charset-collation for " << prtype;
/* fall through */
case DATA_VARMYSQL:
case DATA_MYSQL:
return(innobase_mysql_cmp(prtype,
a, a_length, b, b_length));
default:
ib::fatal() << "Unknown data type number " << mtype;
}
return(0);
} }
/** Compare two data fields. /** Compare two data fields.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment