MDEV-30605: Wrong result while using index for group-by

A GROUP BY query which uses "MIN(pk)" and has "pk<>const" in the WHERE clause would produce wrong result when handled with "Using index for group-by". Here "pk" column is the table's primary key. The problem was introduced by fix for MDEV-23634. It made the range optimizer to not produce ranges for conditions in form "pk != const". However, LooseScan code requires that the optimizer is able to convert the condition on the MIN/MAX column into an equivalent range. The range is used to locate the row that has the MIN/MAX value. LooseScan checks this in check_group_min_max_predicates(). This fix makes the code in that function to take into account that "pk != const" does not produce a range.

MDEV-30605: Wrong result while using index for group-by
A GROUP BY query which uses "MIN(pk)" and has "pk<>const" in the WHERE clause would produce wrong result when handled with "Using index for group-by". Here "pk" column is the table's primary key. The problem was introduced by fix for MDEV-23634. It made the range optimizer to not produce ranges for conditions in form "pk != const". However, LooseScan code requires that the optimizer is able to convert the condition on the MIN/MAX column into an equivalent range. The range is used to locate the row that has the MIN/MAX value. LooseScan checks this in check_group_min_max_predicates(). This fix makes the code in that function to take into account that "pk != const" does not produce a range.
be7ef656 · Sergei Petrunia · 6c196090 · be7ef656 · be7ef656 · be7ef656
Commit be7ef656 authored Mar 28, 2023 by Sergei Petrunia
Showing with 51 additions and 18 deletions

mysql-test/main/group_min_max.result mysql-test/main/group_min_max.result +12 -0

mysql-test/main/group_min_max.test mysql-test/main/group_min_max.test +11 -0

sql/opt_range.cc sql/opt_range.cc +28 -18

No files found.
--- a/mysql-test/main/group_min_max.result
+++ b/mysql-test/main/group_min_max.result
@@ -4083,5 +4083,17 @@ MIN(pk)
 1
 DROP TABLE t1, t2;
 #
+# MDEV-30605 Wrong result while using index for group-by
+#
+CREATE TABLE t1 (pk INT primary key, a int, key(a)) engine=innodb;
+INSERT INTO t1 VALUES (1,-1),(2,8),(3,5),(4,-1),(5,10), (6,-1);
+SELECT MIN(pk), a FROM t1 WHERE pk <> 1 GROUP BY a;
+MIN(pk)	a
+4	-1
+3	5
+2	8
+5	10
+DROP TABLE t1;
+#
 # End of 10.5 tests
 #
--- a/mysql-test/main/group_min_max.test
+++ b/mysql-test/main/group_min_max.test
@@ -1737,6 +1737,17 @@ SELECT SQL_BUFFER_RESULT MIN(pk) FROM t1, t2;
 SELECT MIN(pk) FROM t1, t2;
 DROP TABLE t1, t2;
+--echo #
+--echo # MDEV-30605 Wrong result while using index for group-by
+--echo #
+CREATE TABLE t1 (pk INT primary key, a int, key(a)) engine=innodb;
+INSERT INTO t1 VALUES (1,-1),(2,8),(3,5),(4,-1),(5,10), (6,-1);
+SELECT MIN(pk), a FROM t1 WHERE pk <> 1 GROUP BY a;
+DROP TABLE t1;
 --echo #
 --echo # End of 10.5 tests
 --echo #
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -461,7 +461,7 @@ void print_range_for_non_indexed_field(String *out, Field *field,
 static void print_min_range_operator(String *out, const ha_rkey_function flag);
 static void print_max_range_operator(String *out, const ha_rkey_function flag);
-static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field);
+static bool is_field_an_unique_index(Field *field);
 /*
  SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
@@ -7752,8 +7752,13 @@ SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param,
    If this condition is a "col1<>...", where there is a UNIQUE KEY(col1),
    do not construct a SEL_TREE from it. A condition that excludes just one
    row in the table is not selective (unless there are only a few rows)
+    Note: this logic must be in sync with code in
+    check_group_min_max_predicates(). That function walks an Item* condition
+    and checks if the range optimizer would produce an equivalent range for
+    it.
  */
-  if (is_field_an_unique_index(param, field))
+  if (param->using_real_indexes && is_field_an_unique_index(field))
    DBUG_RETURN(NULL);
  DBUG_RETURN(get_ne_mm_tree(param, field, value, value));
 }
@@ -7865,7 +7870,7 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
         - if there are a lot of constants, the overhead of building and
           processing enormous range list is not worth it.
      */
-      if (is_field_an_unique_index(param, field))
+      if (param->using_real_indexes && is_field_an_unique_index(field))
        DBUG_RETURN(0);
      /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
@@ -8574,14 +8579,9 @@ SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
    In the future we could also add "almost unique" indexes where any value is
    present only in a few rows (but necessarily exactly one row)
 */
-static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
+static bool is_field_an_unique_index(Field *field)
 {
  DBUG_ENTER("is_field_an_unique_index");
-  // The check for using_real_indexes is there because of the heuristics
-  // this function is used for.
-  if (param->using_real_indexes)
-  {
  key_map::Iterator it(field->key_start);
  uint key_no;
  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
@@ -8593,7 +8593,6 @@ static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
      DBUG_RETURN(true);
    }
  }
-  }
  DBUG_RETURN(false);
 }
@@ -13475,7 +13474,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
         - (C between const_i and const_j)
         - C IS NULL
         - C IS NOT NULL
-         - C != const
+         - C != const  (unless C is the primary key)
    SA4. If Q has a GROUP BY clause, there are no other aggregate functions
         except MIN and MAX. For queries with DISTINCT, aggregate functions
         are allowed.
@@ -14358,6 +14357,17 @@ check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
        if (!simple_pred(pred, args, &inv))
          DBUG_RETURN(FALSE);
+        /*
+          Follow the logic in Item_func_ne::get_func_mm_tree(): condition
+          in form "tbl.primary_key <> const" is not used to produce intervals.
+          If the condition doesn't have an equivalent interval, this means we
+          fail LooseScan's condition SA3. Return FALSE to indicate this.
+        */
+        if (pred_type == Item_func::NE_FUNC &&
+            is_field_an_unique_index(min_max_arg_item->field))
+          DBUG_RETURN(FALSE);
        if (args[0] && args[1]) // this is a binary function or BETWEEN
        {
          DBUG_ASSERT(pred->fixed_type_handler());