Bug #25551: inconsistent behaviour in grouping NULL, depending on index type

 The optimizer takes away columns from GROUP BY/DISTINCT if they constitute
 all the parts of an unique index.
 However if some of the columns can contain NULLs this cannot be done 
(because an UNIQUE index can have multiple rows with NULL values).
 Fixed by not using UNIQUE indexes with nullable columns to remove
 grouping columns from GROUP BY/DISTINCT.
parent b8d55cc4
...@@ -530,7 +530,8 @@ id select_type table type possible_keys key key_len ref rows Extra ...@@ -530,7 +530,8 @@ id select_type table type possible_keys key key_len ref rows Extra
EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b; EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 3 1 SIMPLE t1 ALL NULL NULL NULL NULL 3
CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b)); CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT,
PRIMARY KEY (a,b));
INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4); INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
EXPLAIN SELECT DISTINCT a FROM t2; EXPLAIN SELECT DISTINCT a FROM t2;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
...@@ -644,3 +645,26 @@ SELECT COUNT(*) FROM ...@@ -644,3 +645,26 @@ SELECT COUNT(*) FROM
COUNT(*) COUNT(*)
2 2
DROP TABLE t1, t2; DROP TABLE t1, t2;
CREATE TABLE t1 (a INT, UNIQUE (a));
INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3);
EXPLAIN SELECT DISTINCT a FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL a 5 NULL 6 Using index
SELECT DISTINCT a FROM t1;
a
NULL
1
2
3
4
EXPLAIN SELECT a FROM t1 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL a 5 NULL 6 Using index
SELECT a FROM t1 GROUP BY a;
a
NULL
1
2
3
4
DROP TABLE t1;
...@@ -364,7 +364,8 @@ EXPLAIN SELECT a FROM t1 GROUP BY a; ...@@ -364,7 +364,8 @@ EXPLAIN SELECT a FROM t1 GROUP BY a;
EXPLAIN SELECT a,b FROM t1 GROUP BY a,b; EXPLAIN SELECT a,b FROM t1 GROUP BY a,b;
EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b; EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;
CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b)); CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT,
PRIMARY KEY (a,b));
INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4); INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
EXPLAIN SELECT DISTINCT a FROM t2; EXPLAIN SELECT DISTINCT a FROM t2;
EXPLAIN SELECT DISTINCT a,a FROM t2; EXPLAIN SELECT DISTINCT a,a FROM t2;
...@@ -525,3 +526,17 @@ SELECT COUNT(*) FROM ...@@ -525,3 +526,17 @@ SELECT COUNT(*) FROM
(SELECT DISTINCT a FROM t2 WHERE a='oe' COLLATE latin1_german2_ci) dt; (SELECT DISTINCT a FROM t2 WHERE a='oe' COLLATE latin1_german2_ci) dt;
DROP TABLE t1, t2; DROP TABLE t1, t2;
#
# Bug #25551: inconsistent behaviour in grouping NULL, depending on index type
#
CREATE TABLE t1 (a INT, UNIQUE (a));
INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3);
EXPLAIN SELECT DISTINCT a FROM t1;
#result must have one row with NULL
SELECT DISTINCT a FROM t1;
EXPLAIN SELECT a FROM t1 GROUP BY a;
#result must have one row with NULL
SELECT a FROM t1 GROUP BY a;
DROP TABLE t1;
...@@ -848,10 +848,11 @@ JOIN::optimize() ...@@ -848,10 +848,11 @@ JOIN::optimize()
} }
/* /*
Check if we can optimize away GROUP BY/DISTINCT. Check if we can optimize away GROUP BY/DISTINCT.
We can do that if there are no aggregate functions and the We can do that if there are no aggregate functions, the
fields in DISTINCT clause (if present) and/or columns in GROUP BY fields in DISTINCT clause (if present) and/or columns in GROUP BY
(if present) contain direct references to all key parts of (if present) contain direct references to all key parts of
an unique index (in whatever order). an unique index (in whatever order) and if the key parts of the
unique index cannot contain NULLs.
Note that the unique keys for DISTINCT and GROUP BY should not Note that the unique keys for DISTINCT and GROUP BY should not
be the same (as long as they are unique). be the same (as long as they are unique).
...@@ -11856,7 +11857,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, ...@@ -11856,7 +11857,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
/* /*
Check if GROUP BY/DISTINCT can be optimized away because the set is Check if GROUP BY/DISTINCT can be optimized away because the set is
already known to be distinct. already known to be distinct.
SYNOPSIS SYNOPSIS
...@@ -11864,7 +11865,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, ...@@ -11864,7 +11865,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
table The table to operate on. table The table to operate on.
find_func function to iterate over the list and search find_func function to iterate over the list and search
for a field for a field
DESCRIPTION DESCRIPTION
Used in removing the GROUP BY/DISTINCT of the following types of Used in removing the GROUP BY/DISTINCT of the following types of
statements: statements:
...@@ -11875,12 +11876,13 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, ...@@ -11875,12 +11876,13 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
then <any combination of a,b,c>,{whatever} is also distinct then <any combination of a,b,c>,{whatever} is also distinct
This function checks if all the key parts of any of the unique keys This function checks if all the key parts of any of the unique keys
of the table are referenced by a list : either the select list of the table are referenced by a list : either the select list
through find_field_in_item_list or GROUP BY list through through find_field_in_item_list or GROUP BY list through
find_field_in_order_list. find_field_in_order_list.
If the above holds then we can safely remove the GROUP BY/DISTINCT, If the above holds and the key parts cannot contain NULLs then we
can safely remove the GROUP BY/DISTINCT,
as no result set can be more distinct than an unique key. as no result set can be more distinct than an unique key.
RETURN VALUE RETURN VALUE
1 found 1 found
0 not found. 0 not found.
...@@ -11903,7 +11905,8 @@ list_contains_unique_index(TABLE *table, ...@@ -11903,7 +11905,8 @@ list_contains_unique_index(TABLE *table,
key_part < key_part_end; key_part < key_part_end;
key_part++) key_part++)
{ {
if (!find_func(key_part->field, data)) if (key_part->field->maybe_null() ||
!find_func(key_part->field, data))
break; break;
} }
if (key_part == key_part_end) if (key_part == key_part_end)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment