MDEV-25440: Indexed CHAR columns are broken with NO_PAD collations

cmp_data(): Compare different-length CHAR fields with the new strnncollsp_nchars function that will pad spaces if needed. Any InnoDB ROW_FORMAT except the original one that was named ROW_FORMAT=REDUNDANT in MySQL 5.0.3 will internally store CHAR(n) columns as variable-length if the character encoding is variable length. Spaces may be trimmed from the end. For NOT NULL values, the minimum length is always n*mbminlen. In cmp_data() we only know the lengths in bytes and we cannot easily know the ROW_FORMAT. is_strnncoll_compatible(): Refactored from innobase_mysql_cmp(). innobase_mysql_cmp(): Merged to cmp_whole_field(). cmp_whole_field(): Invoke strnncollsp_nchars for the DATA_MYSQL (the CHAR type with any other collation than latin1_swedish_ci). Reviewed by: Alexander Barkov Tested by: Roel Roel Van de Paar

MDEV-25440: Indexed CHAR columns are broken with NO_PAD collations
cmp_data(): Compare different-length CHAR fields with the new strnncollsp_nchars function that will pad spaces if needed. Any InnoDB ROW_FORMAT except the original one that was named ROW_FORMAT=REDUNDANT in MySQL 5.0.3 will internally store CHAR(n) columns as variable-length if the character encoding is variable length. Spaces may be trimmed from the end. For NOT NULL values, the minimum length is always n*mbminlen. In cmp_data() we only know the lengths in bytes and we cannot easily know the ROW_FORMAT. is_strnncoll_compatible(): Refactored from innobase_mysql_cmp(). innobase_mysql_cmp(): Merged to cmp_whole_field(). cmp_whole_field(): Invoke strnncollsp_nchars for the DATA_MYSQL (the CHAR type with any other collation than latin1_swedish_ci). Reviewed by: Alexander Barkov Tested by: Roel Roel Van de Paar
e9aac091 · Marko Mäkelä · 37144afb · e9aac091 · e9aac091 · e9aac091
Commit e9aac091 authored Jan 26, 2022 by Marko Mäkelä
3 changed files
--- a/mysql-test/suite/innodb/r/no_pad.result
+++ b/mysql-test/suite/innodb/r/no_pad.result
+CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
+ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
+INSERT INTO t1 VALUES ('',1);
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES ('',2);
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+DROP TABLE t1;
--- a/mysql-test/suite/innodb/t/no_pad.test
+++ b/mysql-test/suite/innodb/t/no_pad.test
+--source include/have_innodb.inc
+CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
+ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
+INSERT INTO t1 VALUES ('',1);
+ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES ('',2);
+ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
+DROP TABLE t1;
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
 /*****************************************************************************
 Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2020, 2021, MariaDB Corporation.
+Copyright (c) 2020, 2022, MariaDB Corporation.
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -49,25 +49,11 @@ At the present, the comparison functions return 0 in the case,
 where two records disagree only in the way that one
 has more fields than the other. */
-/** Compare two data fields.
+#ifndef DBUG_OFF
-@param[in] prtype precise type
+/** @return whether a data type is compatible with strnncoll() functions */
-@param[in] a data field
+static bool is_strnncoll_compatible(ulint type)
-@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
-@param[in] b data field
-@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
-@return positive, 0, negative, if a is greater, equal, less than b,
-respectively */
-UNIV_INLINE
-int
-innobase_mysql_cmp(
-	ulint		prtype,
-	const byte*	a,
-	unsigned int	a_length,
-	const byte*	b,
-	unsigned int	b_length)
 {
-#ifdef UNIV_DEBUG
+  switch (type) {
-	switch (prtype & DATA_MYSQL_TYPE_MASK) {
  case MYSQL_TYPE_BIT:
  case MYSQL_TYPE_STRING:
  case MYSQL_TYPE_VAR_STRING:
@@ -76,22 +62,12 @@ innobase_mysql_cmp(
  case MYSQL_TYPE_BLOB:
  case MYSQL_TYPE_LONG_BLOB:
  case MYSQL_TYPE_VARCHAR:
-		break;
+    return true;
  default:
-		ut_error;
+    return false;
-	}
-#endif /* UNIV_DEBUG */
-	uint cs_num = (uint) dtype_get_charset_coll(prtype);
-	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
-		return(cs->coll->strnncollsp(
-			       cs, a, a_length, b, b_length));
  }
-	ib::fatal() << "Unable to find charset-collation " << cs_num;
-	return(0);
 }
+#endif /* DBUG_OFF */
 /*************************************************************//**
 Returns TRUE if two columns are equal for comparison purposes.
@@ -309,68 +285,52 @@ cmp_gis_field(
 @param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
 @return positive, 0, negative, if a is greater, equal, less than b,
 respectively */
-static
+static int cmp_whole_field(ulint mtype, ulint prtype,
-int
+                           const byte *a, unsigned a_length,
-cmp_whole_field(
+                           const byte *b, unsigned b_length)
-	ulint		mtype,
-	ulint		prtype,
-	const byte*	a,
-	unsigned int	a_length,
-	const byte*	b,
-	unsigned int	b_length)
 {
-	float		f_1;
-	float		f_2;
-	double		d_1;
-	double		d_2;
  switch (mtype) {
+  default:
+    ib::fatal() << "Unknown data type number " << mtype;
+    return 0;
  case DATA_DECIMAL:
-		return(cmp_decimal(a, a_length, b, b_length));
+    return cmp_decimal(a, a_length, b, b_length);
  case DATA_DOUBLE:
-		d_1 = mach_double_read(a);
+    {
-		d_2 = mach_double_read(b);
+      const double af= mach_double_read(a), bf= mach_double_read(b);
+      return af > bf ? 1 : bf > af ? -1 : 0;
-		if (d_1 > d_2) {
-			return(1);
-		} else if (d_2 > d_1) {
-			return(-1);
    }
-		return(0);
  case DATA_FLOAT:
-		f_1 = mach_float_read(a);
+    {
-		f_2 = mach_float_read(b);
+      const float af= mach_float_read(a), bf= mach_float_read(b);
+      return af > bf ? 1 : bf > af ? -1 : 0;
-		if (f_1 > f_2) {
-			return(1);
-		} else if (f_2 > f_1) {
-			return(-1);
    }
-		return(0);
  case DATA_VARCHAR:
  case DATA_CHAR:
-		return(my_charset_latin1.coll->strnncollsp(
+    /* latin1_swedish_ci is treated as a special case in InnoDB.
-			       &my_charset_latin1,
+    Because it is a fixed-length encoding (mbminlen=mbmaxlen=1),
-			       a, a_length, b, b_length));
+    non-NULL CHAR(n) values will always occupy n bytes and we
+    can invoke strnncollsp() instead of strnncollsp_nchars(). */
+    return my_charset_latin1.coll->strnncollsp(&my_charset_latin1,
+                                               a, a_length, b, b_length);
  case DATA_BLOB:
-		if (prtype & DATA_BINARY_TYPE) {
+    ut_ad(!(prtype & DATA_BINARY_TYPE)); /* our only caller tested this */
-			ib::error() << "Comparing a binary BLOB"
-				" using a character set collation!";
-			ut_ad(0);
-		}
    /* fall through */
  case DATA_VARMYSQL:
+    DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
+    if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
+                                      MYF(MY_WME)))
+      return cs->coll->strnncollsp(cs, a, a_length, b, b_length);
+    break;
  case DATA_MYSQL:
-		return(innobase_mysql_cmp(prtype,
+    DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
-					  a, a_length, b, b_length));
+    if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
-	default:
+                                      MYF(MY_WME)))
-		ib::fatal() << "Unknown data type number " << mtype;
+      return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length,
+                                          std::max(a_length, b_length));
  }
-	return(0);
+  ib::fatal() << "Unable to find charset-collation for " << prtype;
 }
 /** Compare two data fields.