Commit be7ef656 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-30605: Wrong result while using index for group-by

A GROUP BY query which uses "MIN(pk)" and has "pk<>const" in the
WHERE clause would produce wrong result when handled with "Using index
for group-by".  Here "pk" column is the table's primary key.

The problem was introduced by fix for MDEV-23634. It made the range
optimizer to not produce ranges for conditions in form "pk != const".

However, LooseScan code requires that the optimizer is able to
convert the condition on the MIN/MAX column into an equivalent range.
The range is used to locate the row that has the MIN/MAX value.

LooseScan checks this in check_group_min_max_predicates(). This fix
makes the code in that function to take into account that "pk != const"
does not produce a range.
parent 6c196090
...@@ -4083,5 +4083,17 @@ MIN(pk) ...@@ -4083,5 +4083,17 @@ MIN(pk)
1 1
DROP TABLE t1, t2; DROP TABLE t1, t2;
# #
# MDEV-30605 Wrong result while using index for group-by
#
CREATE TABLE t1 (pk INT primary key, a int, key(a)) engine=innodb;
INSERT INTO t1 VALUES (1,-1),(2,8),(3,5),(4,-1),(5,10), (6,-1);
SELECT MIN(pk), a FROM t1 WHERE pk <> 1 GROUP BY a;
MIN(pk) a
4 -1
3 5
2 8
5 10
DROP TABLE t1;
#
# End of 10.5 tests # End of 10.5 tests
# #
...@@ -1737,6 +1737,17 @@ SELECT SQL_BUFFER_RESULT MIN(pk) FROM t1, t2; ...@@ -1737,6 +1737,17 @@ SELECT SQL_BUFFER_RESULT MIN(pk) FROM t1, t2;
SELECT MIN(pk) FROM t1, t2; SELECT MIN(pk) FROM t1, t2;
DROP TABLE t1, t2; DROP TABLE t1, t2;
--echo #
--echo # MDEV-30605 Wrong result while using index for group-by
--echo #
CREATE TABLE t1 (pk INT primary key, a int, key(a)) engine=innodb;
INSERT INTO t1 VALUES (1,-1),(2,8),(3,5),(4,-1),(5,10), (6,-1);
SELECT MIN(pk), a FROM t1 WHERE pk <> 1 GROUP BY a;
DROP TABLE t1;
--echo # --echo #
--echo # End of 10.5 tests --echo # End of 10.5 tests
--echo # --echo #
...@@ -461,7 +461,7 @@ void print_range_for_non_indexed_field(String *out, Field *field, ...@@ -461,7 +461,7 @@ void print_range_for_non_indexed_field(String *out, Field *field,
static void print_min_range_operator(String *out, const ha_rkey_function flag); static void print_min_range_operator(String *out, const ha_rkey_function flag);
static void print_max_range_operator(String *out, const ha_rkey_function flag); static void print_max_range_operator(String *out, const ha_rkey_function flag);
static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field); static bool is_field_an_unique_index(Field *field);
/* /*
SEL_IMERGE is a list of possible ways to do index merge, i.e. it is SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
...@@ -7752,8 +7752,13 @@ SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param, ...@@ -7752,8 +7752,13 @@ SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param,
If this condition is a "col1<>...", where there is a UNIQUE KEY(col1), If this condition is a "col1<>...", where there is a UNIQUE KEY(col1),
do not construct a SEL_TREE from it. A condition that excludes just one do not construct a SEL_TREE from it. A condition that excludes just one
row in the table is not selective (unless there are only a few rows) row in the table is not selective (unless there are only a few rows)
Note: this logic must be in sync with code in
check_group_min_max_predicates(). That function walks an Item* condition
and checks if the range optimizer would produce an equivalent range for
it.
*/ */
if (is_field_an_unique_index(param, field)) if (param->using_real_indexes && is_field_an_unique_index(field))
DBUG_RETURN(NULL); DBUG_RETURN(NULL);
DBUG_RETURN(get_ne_mm_tree(param, field, value, value)); DBUG_RETURN(get_ne_mm_tree(param, field, value, value));
} }
...@@ -7865,7 +7870,7 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param, ...@@ -7865,7 +7870,7 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
- if there are a lot of constants, the overhead of building and - if there are a lot of constants, the overhead of building and
processing enormous range list is not worth it. processing enormous range list is not worth it.
*/ */
if (is_field_an_unique_index(param, field)) if (param->using_real_indexes && is_field_an_unique_index(field))
DBUG_RETURN(0); DBUG_RETURN(0);
/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
...@@ -8574,14 +8579,9 @@ SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) ...@@ -8574,14 +8579,9 @@ SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
In the future we could also add "almost unique" indexes where any value is In the future we could also add "almost unique" indexes where any value is
present only in a few rows (but necessarily exactly one row) present only in a few rows (but necessarily exactly one row)
*/ */
static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field) static bool is_field_an_unique_index(Field *field)
{ {
DBUG_ENTER("is_field_an_unique_index"); DBUG_ENTER("is_field_an_unique_index");
// The check for using_real_indexes is there because of the heuristics
// this function is used for.
if (param->using_real_indexes)
{
key_map::Iterator it(field->key_start); key_map::Iterator it(field->key_start);
uint key_no; uint key_no;
while ((key_no= it++) != key_map::Iterator::BITMAP_END) while ((key_no= it++) != key_map::Iterator::BITMAP_END)
...@@ -8593,7 +8593,6 @@ static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field) ...@@ -8593,7 +8593,6 @@ static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
DBUG_RETURN(true); DBUG_RETURN(true);
} }
} }
}
DBUG_RETURN(false); DBUG_RETURN(false);
} }
...@@ -13475,7 +13474,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, ...@@ -13475,7 +13474,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
- (C between const_i and const_j) - (C between const_i and const_j)
- C IS NULL - C IS NULL
- C IS NOT NULL - C IS NOT NULL
- C != const - C != const (unless C is the primary key)
SA4. If Q has a GROUP BY clause, there are no other aggregate functions SA4. If Q has a GROUP BY clause, there are no other aggregate functions
except MIN and MAX. For queries with DISTINCT, aggregate functions except MIN and MAX. For queries with DISTINCT, aggregate functions
are allowed. are allowed.
...@@ -14358,6 +14357,17 @@ check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item, ...@@ -14358,6 +14357,17 @@ check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
if (!simple_pred(pred, args, &inv)) if (!simple_pred(pred, args, &inv))
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
/*
Follow the logic in Item_func_ne::get_func_mm_tree(): condition
in form "tbl.primary_key <> const" is not used to produce intervals.
If the condition doesn't have an equivalent interval, this means we
fail LooseScan's condition SA3. Return FALSE to indicate this.
*/
if (pred_type == Item_func::NE_FUNC &&
is_field_an_unique_index(min_max_arg_item->field))
DBUG_RETURN(FALSE);
if (args[0] && args[1]) // this is a binary function or BETWEEN if (args[0] && args[1]) // this is a binary function or BETWEEN
{ {
DBUG_ASSERT(pred->fixed_type_handler()); DBUG_ASSERT(pred->fixed_type_handler());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment