Bug #25551: inconsistent behaviour in grouping NULL, depending on index type

The optimizer takes away columns from GROUP BY/DISTINCT if they constitute all the parts of an unique index. However if some of the columns can contain NULLs this cannot be done (because an UNIQUE index can have multiple rows with NULL values). Fixed by not using UNIQUE indexes with nullable columns to remove grouping columns from GROUP BY/DISTINCT.

Bug #25551: inconsistent behaviour in grouping NULL, depending on index type
The optimizer takes away columns from GROUP BY/DISTINCT if they constitute all the parts of an unique index. However if some of the columns can contain NULLs this cannot be done (because an UNIQUE index can have multiple rows with NULL values). Fixed by not using UNIQUE indexes with nullable columns to remove grouping columns from GROUP BY/DISTINCT.
2ba683f2 · gkodinov/kgeorge@macbook.gmz · b8d55cc4 · 2ba683f2 · 2ba683f2 · 2ba683f2
Commit 2ba683f2 authored Jan 31, 2007 by gkodinov/kgeorge@macbook.gmz
Hide whitespace changes
Inline Side-by-side

Showing with 52 additions and 10 deletions

mysql-test/r/distinct.result mysql-test/r/distinct.result +25 -1

mysql-test/t/distinct.test mysql-test/t/distinct.test +16 -1

sql/sql_select.cc sql/sql_select.cc +11 -8

No files found.
--- a/mysql-test/r/distinct.result
+++ b/mysql-test/r/distinct.result
@@ -530,7 +530,8 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	
-CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b));
+CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, 
+PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
 EXPLAIN SELECT DISTINCT a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -644,3 +645,26 @@ SELECT COUNT(*) FROM
 COUNT(*)
 2
 DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT, UNIQUE (a));
+INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3);
+EXPLAIN SELECT DISTINCT a FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	5	NULL	6	Using index
+SELECT DISTINCT a FROM t1;
+a
+NULL
+1
+2
+3
+4
+EXPLAIN SELECT a FROM t1 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	5	NULL	6	Using index
+SELECT a FROM t1 GROUP BY a;
+a
+NULL
+1
+2
+3
+4
+DROP TABLE t1;
--- a/mysql-test/t/distinct.test
+++ b/mysql-test/t/distinct.test
@@ -364,7 +364,8 @@ EXPLAIN SELECT a FROM t1 GROUP BY a;
 EXPLAIN SELECT a,b FROM t1 GROUP BY a,b;
 EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;

-CREATE TABLE t2(a INT, b INT, c INT, d INT, PRIMARY KEY (a,b));
+CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, 
+                PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
 EXPLAIN SELECT DISTINCT a FROM t2;
 EXPLAIN SELECT DISTINCT a,a FROM t2;
@@ -525,3 +526,17 @@ SELECT COUNT(*) FROM
  (SELECT DISTINCT a FROM t2 WHERE a='oe' COLLATE latin1_german2_ci) dt;

 DROP TABLE t1, t2;
+
+#
+# Bug #25551: inconsistent behaviour in grouping NULL, depending on index type
+#
+CREATE TABLE t1 (a INT, UNIQUE (a));
+INSERT INTO t1 VALUES (4),(null),(2),(1),(null),(3);
+EXPLAIN SELECT DISTINCT a FROM t1;
+#result must have one row with NULL
+SELECT DISTINCT a FROM t1;
+EXPLAIN SELECT a FROM t1 GROUP BY a;
+#result must have one row with NULL
+SELECT a FROM t1 GROUP BY a;
+
+DROP TABLE t1;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -848,10 +848,11 @@ JOIN::optimize()
  }
  /*
     Check if we can optimize away GROUP BY/DISTINCT.
-     We can do that if there are no aggregate functions and the
+     We can do that if there are no aggregate functions, the
     fields in DISTINCT clause (if present) and/or columns in GROUP BY
     (if present) contain direct references to all key parts of
-     an unique index (in whatever order).
+     an unique index (in whatever order) and if the key parts of the
+     unique index cannot contain NULLs.
     Note that the unique keys for DISTINCT and GROUP BY should not
     be the same (as long as they are unique).

@@ -11856,7 +11857,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,


 /*
-  Check if GROUP BY/DISTINCT can be optimized away because the set is 
+  Check if GROUP BY/DISTINCT can be optimized away because the set is
  already known to be distinct.
  
  SYNOPSIS
@@ -11864,7 +11865,7 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
    table                The table to operate on.
    find_func            function to iterate over the list and search
                         for a field
-  
+
  DESCRIPTION
    Used in removing the GROUP BY/DISTINCT of the following types of
    statements:
@@ -11875,12 +11876,13 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
      then <any combination of a,b,c>,{whatever} is also distinct

    This function checks if all the key parts of any of the unique keys
-    of the table are referenced by a list : either the select list 
+    of the table are referenced by a list : either the select list
    through find_field_in_item_list or GROUP BY list through
    find_field_in_order_list.
-    If the above holds then we can safely remove the GROUP BY/DISTINCT,
+    If the above holds and the key parts cannot contain NULLs then we 
+    can safely remove the GROUP BY/DISTINCT,
    as no result set can be more distinct than an unique key.
-  
+ 
  RETURN VALUE
    1                    found
    0                    not found.
@@ -11903,7 +11905,8 @@ list_contains_unique_index(TABLE *table,
           key_part < key_part_end;
           key_part++)
      {
-        if (!find_func(key_part->field, data))
+        if (key_part->field->maybe_null() || 
+            !find_func(key_part->field, data))
          break;
      }
      if (key_part == key_part_end)