Bug #30596 GROUP BY optimization gives wrong result order

The optimization that uses a unique index to remove GROUP BY did not ensure that the index was actually used, thus violating the ORDER BY that is implied by GROUP BY. Fixed by replacing GROUP BY with ORDER BY if the GROUP BY clause contains a unique index over non-nullable field(s). In case GROUP BY ... ORDER BY null is used, GROUP BY is simply removed.

Bug #30596 GROUP BY optimization gives wrong result order
The optimization that uses a unique index to remove GROUP BY did not ensure that the index was actually used, thus violating the ORDER BY that is implied by GROUP BY. Fixed by replacing GROUP BY with ORDER BY if the GROUP BY clause contains a unique index over non-nullable field(s). In case GROUP BY ... ORDER BY null is used, GROUP BY is simply removed.
98d34d62 · mhansson/martin@linux-st28.site · 3608dbdc · 98d34d62 · 98d34d62 · 98d34d62
Commit 98d34d62 authored Aug 28, 2007 by mhansson/martin@linux-st28.site
6 changed files
--- a/mysql-test/include/mix1.inc
+++ b/mysql-test/include/mix1.inc
@@ -969,6 +969,33 @@ ROLLBACK;
 ROLLBACK;
 DROP TABLE t1;
+#
+# Bug#30596: GROUP BY optimization gives wrong result order
+#  
+CREATE TABLE t1(
+  a INT, 
+  b INT NOT NULL, 
+  c INT NOT NULL, 
+  d INT, 
+  UNIQUE KEY (c,b)
+) engine=innodb;
+INSERT INTO t1 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d;
+SELECT c,b,d FROM t1 GROUP BY c,b,d;
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+EXPLAIN SELECT c,b,d FROM t1 ORDER BY c,b,d;
+SELECT c,b,d FROM t1 ORDER BY c,b,d;
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b;
+SELECT c,b,d FROM t1 GROUP BY c,b;
+EXPLAIN SELECT c,b   FROM t1 GROUP BY c,b;
+SELECT c,b   FROM t1 GROUP BY c,b;
+DROP TABLE t1;
 --echo End of 5.0 tests
 # Fix for BUG#19243 "wrong LAST_INSERT_ID() after ON DUPLICATE KEY

--- a/mysql-test/r/distinct.result
+++ b/mysql-test/r/distinct.result
@@ -526,10 +526,10 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	3	Using index
 EXPLAIN SELECT a,b FROM t1 GROUP BY a,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
 EXPLAIN SELECT DISTINCT a,b FROM t1 GROUP BY a,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
 CREATE TABLE t2(a INT, b INT NOT NULL, c INT NOT NULL, d INT, 
 PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
@@ -554,7 +554,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 CREATE UNIQUE INDEX c_b_unq ON t2 (c,b);
 EXPLAIN SELECT DISTINCT a,b,d FROM t2 GROUP BY c,b,d;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	Using filesort
 DROP TABLE t1,t2;
 create table t1 (id int, dsc varchar(50));
 insert into t1 values (1, "line number one"), (2, "line number two"), (3, "line number three");

--- a/mysql-test/r/group_by.result
+++ b/mysql-test/r/group_by.result
@@ -1093,7 +1093,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	144	
 EXPLAIN SELECT a FROM t1 IGNORE INDEX FOR GROUP BY (PRIMARY,i2) GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	144	Using index
+1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	144	Using index; Using filesort
 EXPLAIN SELECT a FROM t1 IGNORE INDEX FOR ORDER BY (PRIMARY,i2) ORDER BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	PRIMARY	4	NULL	144	Using index; Using filesort
@@ -1176,3 +1176,94 @@ old	OFF
 SET @@old = off;
 ERROR HY000: Variable 'old' is a read only variable
 DROP TABLE t1, t2;
+CREATE TABLE t1(
+a INT, 
+b INT NOT NULL, 
+c INT NOT NULL, 
+d INT, 
+UNIQUE KEY (c,b)
+);
+INSERT INTO t1 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
+CREATE TABLE t2(
+a INT,
+b INT,
+UNIQUE KEY(a,b)
+);
+INSERT INTO t2 VALUES (NULL, NULL), (NULL, NULL), (NULL, 1), (1, NULL), (1, 1), (1,2);
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
+SELECT c,b,d FROM t1 GROUP BY c,b,d;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	
+SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+c	b	d
+1	1	50
+3	2	40
+3	1	4
+EXPLAIN SELECT c,b,d FROM t1 ORDER BY c,b,d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
+SELECT c,b,d FROM t1 ORDER BY c,b,d;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
+SELECT c,b,d FROM t1 GROUP BY c,b;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b   FROM t1 GROUP BY c,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	8	NULL	3	Using index
+SELECT c,b   FROM t1 GROUP BY c,b;
+c	b
+1	1
+3	1
+3	2
+EXPLAIN SELECT a,b from t2 ORDER BY a,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	NULL	a	10	NULL	6	Using index
+SELECT a,b from t2 ORDER BY a,b;
+a	b
+NULL	NULL
+NULL	NULL
+NULL	1
+1	NULL
+1	1
+1	2
+EXPLAIN SELECT a,b from t2 GROUP BY a,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	NULL	a	10	NULL	6	Using index
+SELECT a,b from t2 GROUP BY a,b;
+a	b
+NULL	NULL
+NULL	1
+1	NULL
+1	1
+1	2
+EXPLAIN SELECT a from t2 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	NULL	a	10	NULL	6	Using index
+SELECT a from t2 GROUP BY a;
+a
+NULL
+1
+EXPLAIN SELECT b from t2 GROUP BY b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	NULL	a	10	NULL	6	Using index; Using temporary; Using filesort
+SELECT b from t2 GROUP BY b;
+b
+NULL
+1
+2
+DROP TABLE t1;
--- a/mysql-test/r/innodb_mysql.result
+++ b/mysql-test/r/innodb_mysql.result
@@ -1141,6 +1141,55 @@ a	b
 ROLLBACK;
 ROLLBACK;
 DROP TABLE t1;
+CREATE TABLE t1(
+a INT, 
+b INT NOT NULL, 
+c INT NOT NULL, 
+d INT, 
+UNIQUE KEY (c,b)
+) engine=innodb;
+INSERT INTO t1 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
+SELECT c,b,d FROM t1 GROUP BY c,b,d;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	
+SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b,d FROM t1 ORDER BY c,b,d;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	Using filesort
+SELECT c,b,d FROM t1 ORDER BY c,b,d;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	8	NULL	3	
+SELECT c,b,d FROM t1 GROUP BY c,b;
+c	b	d
+1	1	50
+3	1	4
+3	2	40
+EXPLAIN SELECT c,b   FROM t1 GROUP BY c,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	c	8	NULL	3	Using index
+SELECT c,b   FROM t1 GROUP BY c,b;
+c	b
+1	1
+3	1
+3	2
+DROP TABLE t1;
 End of 5.0 tests
 CREATE TABLE `t2` (
 `k` int(11) NOT NULL auto_increment,

--- a/mysql-test/t/group_by.test
+++ b/mysql-test/t/group_by.test
@@ -861,3 +861,47 @@ SHOW VARIABLES LIKE 'old';
 SET @@old = off;  
 DROP TABLE t1, t2;
+#
+# Bug#30596: GROUP BY optimization gives wrong result order
+#
+CREATE TABLE t1(
+  a INT, 
+  b INT NOT NULL, 
+  c INT NOT NULL, 
+  d INT, 
+  UNIQUE KEY (c,b)
+);
+INSERT INTO t1 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
+CREATE TABLE t2(
+  a INT,
+  b INT,
+  UNIQUE KEY(a,b)
+);
+INSERT INTO t2 VALUES (NULL, NULL), (NULL, NULL), (NULL, 1), (1, NULL), (1, 1), (1,2);
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d;
+SELECT c,b,d FROM t1 GROUP BY c,b,d;
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+SELECT c,b,d FROM t1 GROUP BY c,b,d ORDER BY NULL;
+EXPLAIN SELECT c,b,d FROM t1 ORDER BY c,b,d;
+SELECT c,b,d FROM t1 ORDER BY c,b,d;
+EXPLAIN SELECT c,b,d FROM t1 GROUP BY c,b;
+SELECT c,b,d FROM t1 GROUP BY c,b;
+EXPLAIN SELECT c,b   FROM t1 GROUP BY c,b;
+SELECT c,b   FROM t1 GROUP BY c,b;
+EXPLAIN SELECT a,b from t2 ORDER BY a,b;
+SELECT a,b from t2 ORDER BY a,b;
+EXPLAIN SELECT a,b from t2 GROUP BY a,b;
+SELECT a,b from t2 GROUP BY a,b;
+EXPLAIN SELECT a from t2 GROUP BY a;
+SELECT a from t2 GROUP BY a;
+EXPLAIN SELECT b from t2 GROUP BY b;
+SELECT b from t2 GROUP BY b;
+DROP TABLE t1;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1042,6 +1042,20 @@ JOIN::optimize()
                                 find_field_in_order_list,
                                 (void *) group_list))
    {
+      /*
+        We have found that grouping can be removed since groups correspond to
+        only one row anyway, but we still have to guarantee correct result
+        order. The line below effectively rewrites the query from GROUP BY
+        <fields> to ORDER BY <fields>. One exception is if skip_sort_order is
+        set (see above), then we can simply skip GROUP BY.
+      */
+      order= skip_sort_order ? 0 : group_list;
+      /*
+        If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be 
+        rewritten to IGNORE INDEX FOR ORDER BY(fields).
+      */
+      join_tab->table->keys_in_use_for_order_by=
+        join_tab->table->keys_in_use_for_group_by;
      group_list= 0;
      group= 0;
    }