Adjust costs for doing index scan in cost_group_min_max()

The idea is that when doing a tree dive (once per group), we need to compare key values, which is fast. For each new group, we have to compare the full where clause for the row. Compared to original code, the cost of group_min_max() has slightly increased which affects some test with only a few rows. main.group_min_max and main.distinct have been modified to show the effect of the change. The patch also adjust the number of groups in case of quick selects: - For simple WHERE clauses, ensure that we have at least as many groups as we have conditions on the used group-by key parts. The assumption is that each condition will create at least one group. - Ensure that there are no more groups than rows found by quick_select Test changes: - For some small tables there has been a change of Using index for group-by -> Using index for group-by (scanning) Range -> Index and Using index for group-by -> Using index

Adjust costs for doing index scan in cost_group_min_max()
The idea is that when doing a tree dive (once per group), we need to compare key values, which is fast. For each new group, we have to compare the full where clause for the row. Compared to original code, the cost of group_min_max() has slightly increased which affects some test with only a few rows. main.group_min_max and main.distinct have been modified to show the effect of the change. The patch also adjust the number of groups in case of quick selects: - For simple WHERE clauses, ensure that we have at least as many groups as we have conditions on the used group-by key parts. The assumption is that each condition will create at least one group. - Ensure that there are no more groups than rows found by quick_select Test changes: - For some small tables there has been a change of Using index for group-by -> Using index for group-by (scanning) Range -> Index and Using index for group-by -> Using index
6fa74517 · Monty · Sergei Petrunia · bc9805e9 · 6fa74517 · 6fa74517
Commit 6fa74517 authored Oct 08, 2021 by Monty Committed by Sergei Petrunia Feb 02, 2023
18 changed files
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -19749,7 +19749,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	
 1	PRIMARY	<derived2>	ref	key0	key0	5	test.t3.id	2	
 3	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	3	
-2	DERIVED	cp2	range	NULL	a	5	NULL	7	Using index for group-by
+2	DERIVED	cp2	index	NULL	a	5	NULL	7	Using index
 explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3
 where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
 EXPLAIN
@@ -19826,13 +19826,13 @@ EXPLAIN
                {
                  "table": {
                    "table_name": "cp2",
-                    "access_type": "range",
+                    "access_type": "index",
                    "key": "a",
                    "key_length": "5",
                    "used_key_parts": ["a"],
                    "rows": 7,
                    "filtered": 100,
-                    "using_index_for_group_by": true
+                    "using_index": true
                  }
                }
              ]

--- a/mysql-test/main/distinct.result
+++ b/mysql-test/main/distinct.result
@@ -538,10 +538,10 @@ PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
 EXPLAIN SELECT DISTINCT a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	PRIMARY	4	NULL	3	Using index for group-by
+1	SIMPLE	t2	index	NULL	PRIMARY	8	NULL	3	Using index
 EXPLAIN SELECT DISTINCT a,a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	PRIMARY	4	NULL	3	Using index for group-by
+1	SIMPLE	t2	index	NULL	PRIMARY	8	NULL	3	Using index
 EXPLAIN SELECT DISTINCT b,a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	index	NULL	PRIMARY	8	NULL	3	Using index
@@ -754,9 +754,6 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1),
 (1, 2, 1),
 (1, 2, 2),
 (1, 2, 3);
-EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	PRIMARY	16	NULL	6	Using index for group-by; Using temporary
 SELECT DISTINCT a, b, d, c FROM t1;
 a	b	d	c
 1	1	0	1
@@ -765,6 +762,13 @@ a	b	d	c
 1	2	0	1
 1	2	0	2
 1	2	0	3
+EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	16	NULL	6	Using index
+INSERT INTO t1 SELECT seq/10,seq/10,seq/10,seq/10,seq from seq_1_to_100;
+EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	PRIMARY	16	NULL	10	Using index for group-by; Using temporary
 DROP TABLE t1;
 #
 # Bug #46159: simple query that never returns

--- a/mysql-test/main/distinct.test
+++ b/mysql-test/main/distinct.test
@@ -4,6 +4,7 @@
 #

 --source include/default_optimizer_switch.inc
+--source include/have_sequence.inc
 --disable_warnings
 drop table if exists t1,t2,t3;
 --enable_warnings
@@ -574,9 +575,10 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1),
                               (1, 2, 2),
                               (1, 2, 3);

-EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
-
 SELECT DISTINCT a, b, d, c FROM t1;
+EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
+INSERT INTO t1 SELECT seq/10,seq/10,seq/10,seq/10,seq from seq_1_to_100;
+EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;

 DROP TABLE t1;


--- a/mysql-test/main/group_by.result
+++ b/mysql-test/main/group_by.result
@@ -1578,7 +1578,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN SELECT a FROM t1 FORCE INDEX FOR JOIN (i2) 
 FORCE INDEX FOR GROUP BY (i2) GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	i2	4	NULL	144	Using index for group-by
+1	SIMPLE	t1	index	NULL	i2	9	NULL	144	Using index
 EXPLAIN SELECT a FROM t1 USE INDEX () IGNORE INDEX (i2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	144	
@@ -1701,7 +1701,7 @@ NULL	1
 1	2
 EXPLAIN SELECT a from t2 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	a	5	NULL	6	Using index for group-by
+1	SIMPLE	t2	index	NULL	a	10	NULL	6	Using index
 SELECT a from t2 GROUP BY a;
 a
 NULL
@@ -1714,6 +1714,11 @@ b
 NULL
 1
 2
+insert into t2 SELECT NULL, NULL from seq_1_to_10;
+# Expect: Using index for group-by
+EXPLAIN SELECT b from t2 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	NULL	a	5	NULL	9	Using index for group-by
 DROP TABLE t1;
 DROP TABLE t2;
 CREATE TABLE t1 ( a INT, b INT );
@@ -2452,7 +2457,7 @@ test.t1	analyze	status	OK

 EXPLAIN SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	b	b	9	NULL	2	Using where; Using index for group-by; Using temporary
+1	SIMPLE	t1	ref	b	b	4	const	6	Using where; Using index; Using temporary

 SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 MIN(a)	b
@@ -2460,7 +2465,7 @@ MIN(a)	b

 EXPLAIN SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	b	b	9	NULL	2	Using where; Using index for group-by
+1	SIMPLE	t1	ref	b	b	4	const	6	Using where; Using index

 SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 MIN(a)	b

--- a/mysql-test/main/group_by.test
+++ b/mysql-test/main/group_by.test
+--source include/have_sequence.inc
+
 # Initialise
 --disable_warnings
 drop table if exists t1,t2,t3;
@@ -1164,6 +1166,11 @@ SELECT a from t2 GROUP BY a;
 EXPLAIN SELECT b from t2 GROUP BY b;
 SELECT b from t2 GROUP BY b;

+# Show that we are using 'range' when there is more NULL rows in the table
+insert into t2 SELECT NULL, NULL from seq_1_to_10;
+--echo # Expect: Using index for group-by
+EXPLAIN SELECT b from t2 GROUP BY a;
+
 DROP TABLE t1;
 DROP TABLE t2;


--- a/mysql-test/main/group_min_max.result
+++ b/mysql-test/main/group_min_max.result
@@ -2353,6 +2353,9 @@ t1;
 id2	id3	id5	id4	id3	id6	id5	id1
 1	1	1	1	1	1	1	1
 DROP TABLE t1,t2,t3,t4,t5,t6;
+#
+# Bug#22342: No results returned for query using max and group by
+#
 CREATE TABLE t1 (a int, b int, KEY (a,b), KEY b (b));
 INSERT INTO t1 VALUES
 (1,1),(1,2),(1,0),(1,3),
@@ -2361,17 +2364,28 @@ ANALYZE TABLE t1;
 Table	Op	Msg_type	Msg_text
 test.t1	analyze	status	Engine-independent statistics collected
 test.t1	analyze	status	OK
+CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
+INSERT INTO t2 SELECT a,b,b FROM t1;
 explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a,b	a	10	NULL	2	Using where; Using index for group-by
+1	SIMPLE	t1	range	a,b	a	10	NULL	6	Using where; Using index
+insert into t1 select 1,seq from seq_1_to_100;
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a,b	a	10	NULL	6	Using where; Using index
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a,b	a	10	NULL	1	Using where; Using index for group-by
 SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
 MAX(b)	a
 1	1
 SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
 MIN(b)	a
 2	1
-CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
-INSERT INTO t2 SELECT a,b,b FROM t1;
 explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	range	PRIMARY	PRIMARY	12	NULL	1	Using where; Using index
@@ -3657,14 +3671,34 @@ KEY (f1,f2)
 ) ;
 insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), 
 (3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	f1	5	NULL	8	Using index for group-by (scanning)
 SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
 f1	COUNT(DISTINCT f2)
 1	3
 2	1
 3	4
+insert into t1 select seq/10,char(64+mod(seq,4)) from seq_1_to_100;
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	f1	5	NULL	10	Using index for group-by
+SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+f1	COUNT(DISTINCT f2)
+0	4
+1	4
+2	4
+3	5
+4	4
+5	4
+6	4
+7	4
+8	4
+9	4
+10	4
 explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	f1	5	NULL	8	Using index for group-by
+1	SIMPLE	t1	range	NULL	f1	5	NULL	10	Using index for group-by
 drop table t1;
 # End of test#50539.
 #
@@ -3760,7 +3794,7 @@ CREATE INDEX break_it ON t1 (a, b);
 EXPLAIN
 SELECT distinct a, b FROM t1 where a = '3' ORDER BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	break_it	break_it	10	NULL	2	Using where; Using index for group-by; Using filesort
+1	SIMPLE	t1	ref	break_it	break_it	5	const	6	Using where; Using index; Using filesort
 SELECT distinct a, b FROM t1 where a = '3' ORDER BY b;
 a	b
 3	1

--- a/mysql-test/main/group_min_max.test
+++ b/mysql-test/main/group_min_max.test
@@ -899,23 +899,27 @@ t1;

 DROP TABLE t1,t2,t3,t4,t5,t6;

-#
-# Bug#22342: No results returned for query using max and group by
-#
+--echo #
+--echo # Bug#22342: No results returned for query using max and group by
+--echo #
 CREATE TABLE t1 (a int, b int, KEY (a,b), KEY b (b));
 INSERT INTO t1 VALUES
  (1,1),(1,2),(1,0),(1,3),
  (1,-1),(1,-2),(1,-3),(1,-4);
 ANALYZE TABLE t1;
+CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
+INSERT INTO t2 SELECT a,b,b FROM t1;

 explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+insert into t1 select 1,seq from seq_1_to_100;
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+analyze table t1;
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+
 SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
 SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
-CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
-INSERT INTO t2 SELECT a,b,b FROM t1;
 explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
 SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
-
 DROP TABLE t1,t2;

 #
@@ -1439,10 +1443,12 @@ CREATE TABLE t1 (
 ) ;
 insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), 
 (3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
-
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+insert into t1 select seq/10,char(64+mod(seq,4)) from seq_1_to_100;
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
 SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
 explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
- 
 drop table t1;
 --echo # End of test#50539.


--- a/mysql-test/main/group_min_max_innodb.result
+++ b/mysql-test/main/group_min_max_innodb.result
@@ -180,7 +180,7 @@ F	17
 EXPLAIN SELECT c1, max(i2) FROM t1 WHERE (c1 = 'C' OR c1 = 'F' ) AND ( i2 = 17 )
 GROUP BY c1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	k1	k1	5	NULL	1	Using where; Using index for group-by
+1	SIMPLE	t1	range	k1	k1	5	NULL	2	Using where; Using index for group-by
 SELECT c1, max(i2) FROM t1 WHERE (c1 = 'C' OR c1 = 'F' ) AND ( i2 = 17 )
 GROUP BY c1;
 c1	max(i2)
@@ -200,7 +200,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2
 WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )
 GROUP BY c1,i1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	k2	k2	9	NULL	60	Using where; Using index for group-by
+1	SIMPLE	t2	range	k2	k2	5	NULL	60	Using where; Using index
 SELECT c1, i1, max(i2) FROM t2
 WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )
 GROUP BY c1,i1;
@@ -211,7 +211,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2
 WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ))
 GROUP BY c1,i1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	k2	k2	9	NULL	60	Using where; Using index for group-by
+1	SIMPLE	t2	range	k2	k2	5	NULL	60	Using where; Using index
 SELECT c1, i1, max(i2) FROM t2 
 WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ))
 GROUP BY c1,i1;

--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
--- a/mysql-test/main/partition_range.result
+++ b/mysql-test/main/partition_range.result
@@ -4,13 +4,16 @@ drop table if exists t1, t2;
 #
 CREATE TABLE t1 (a INT,b INT,KEY a (a,b));
 INSERT INTO `t1` VALUES (0,580092),(3000,894076),(4000,805483),(4000,913540),(6000,611137),(8000,171602),(9000,599495),(9000,746305),(10000,272829),(10000,847519),(12000,258869),(12000,929028),(13000,288970),(15000,20971),(15000,105839),(16000,788272),(17000,76914),(18000,827274),(19000,802258),(20000,123677),(20000,587729),(22000,701449),(25000,31565),(25000,230782),(25000,442887),(25000,733139),(25000,851020);
+SELECT COUNT(*) from t1 where a IN (10000, 1000000, 3000);
+COUNT(*)
+3
 EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	5	NULL	1	Using where; Using index for group-by
+1	SIMPLE	t1	range	a	a	5	NULL	4	Using where; Using index
 alter table t1 partition by hash(a) partitions 1;
 EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	5	NULL	1	Using where; Using index for group-by
+1	SIMPLE	t1	range	a	a	5	NULL	4	Using where; Using index
 alter table t1 remove partitioning;
 insert into t1 (a,b) select seq,seq from seq_4001_to_4100;
 insert into t1 (a,b) select seq,seq from seq_10001_to_10100;

--- a/mysql-test/main/partition_range.test
+++ b/mysql-test/main/partition_range.test
@@ -17,6 +17,7 @@ drop table if exists t1, t2;
 CREATE TABLE t1 (a INT,b INT,KEY a (a,b));
 INSERT INTO `t1` VALUES (0,580092),(3000,894076),(4000,805483),(4000,913540),(6000,611137),(8000,171602),(9000,599495),(9000,746305),(10000,272829),(10000,847519),(12000,258869),(12000,929028),(13000,288970),(15000,20971),(15000,105839),(16000,788272),(17000,76914),(18000,827274),(19000,802258),(20000,123677),(20000,587729),(22000,701449),(25000,31565),(25000,230782),(25000,442887),(25000,733139),(25000,851020);

+SELECT COUNT(*) from t1 where a IN (10000, 1000000, 3000);
 EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a;

 alter table t1 partition by hash(a) partitions 1;

--- a/mysql-test/main/subselect_mat.result
+++ b/mysql-test/main/subselect_mat.result
@@ -1142,7 +1142,7 @@ a
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
+1	PRIMARY	t1	index	NULL	it1a	4	NULL	7	100.00	Using index
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1154,7 +1154,7 @@ create index iab on t1(a, b);
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
+1	PRIMARY	t1	index	NULL	it1a	4	NULL	7	100.00	Using index
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1166,7 +1166,7 @@ explain extended
 select a from t1 group by a
 having a in (select c from t2 where d >= some(select e from t3 where max(b)=e));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	iab	4	NULL	7	100.00	Using index for group-by
+1	PRIMARY	t1	index	NULL	iab	8	NULL	7	100.00	Using index
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	4	100.00	Using where
 Warnings:

--- a/mysql-test/main/subselect_sj_mat.result
+++ b/mysql-test/main/subselect_sj_mat.result
@@ -1181,7 +1181,7 @@ a
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
+1	PRIMARY	t1	index	NULL	it1a	4	NULL	7	100.00	Using index
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1193,7 +1193,7 @@ create index iab on t1(a, b);
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
+1	PRIMARY	t1	index	NULL	it1a	4	NULL	7	100.00	Using index
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1205,7 +1205,7 @@ explain extended
 select a from t1 group by a
 having a in (select c from t2 where d >= some(select e from t3 where max(b)=e));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	iab	4	NULL	7	100.00	Using index for group-by
+1	PRIMARY	t1	index	NULL	iab	8	NULL	7	100.00	Using index
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	4	100.00	Using where
 Warnings:

--- a/mysql-test/suite/gcol/r/gcol_select_innodb.result
+++ b/mysql-test/suite/gcol/r/gcol_select_innodb.result
@@ -146,7 +146,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by (scanning)
 ###
 ### filesort & range-based utils
 ###

--- a/mysql-test/suite/gcol/r/gcol_select_myisam.result
+++ b/mysql-test/suite/gcol/r/gcol_select_myisam.result
@@ -146,7 +146,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by (scanning)
 ###
 ### filesort & range-based utils
 ###

--- a/mysql-test/suite/vcol/r/vcol_select_innodb.result
+++ b/mysql-test/suite/vcol/r/vcol_select_innodb.result
@@ -135,7 +135,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by (scanning)
 ###
 ### filesort & range-based utils
 ###

--- a/mysql-test/suite/vcol/r/vcol_select_myisam.result
+++ b/mysql-test/suite/vcol/r/vcol_select_myisam.result
@@ -133,7 +133,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by (scanning)
 ###
 ### filesort & range-based utils
 ###

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -13728,6 +13728,12 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
    The group-by list is a permutation of the select attributes, according
    to their order in the index.

+  EXAMPLES of handled queries
+    select max(keypart2) from t1 group by keypart1
+    select max(keypart2) from t1 where  keypart2 <= const group by keypart1
+    select distinct keypart1 from table;
+    select count(distinct keypart1) from table;
+
  TODO
  - What happens if the query groups by the MIN/MAX field, and there is no
    other field as in: "select MY_MIN(a) from t1 group by a" ?
@@ -14820,7 +14826,22 @@ get_field_keypart(KEY *index, Field *field)
    This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
    the number of rows returned.

+    The used algorithm used for finding the rows is:
+
+    For each range (if no ranges, the range is the whole table)
+      Do an index search for the start of the range
+      As long as the found key is withing the range
+        If the found row matches the where clause, use the row otherwise skip it
+        Scan index for next group, jumping over all identical keys, done in
+        QUICK_GROUP_MIN_MAX_SELECT::next_prefix
+        If the engine does not support a native next_prefix, we will
+        either scan the index for the next value or do a new index dive
+        with 'find next bigger key'.
+
  NOTES
+    See get_best_group_min_max() for which kind of queries this function
+    will be called.
+
    The cost computation distinguishes several cases:
    1) No equality predicates over non-group attributes (thus no key_infix).
       If groups are bigger than blocks on the average, then we assume that it
@@ -14887,17 +14908,18 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
  if (!group_key_parts)
  {
    /* Summary over the whole table */
-    keys_per_group= table_records;
+    keys_per_group= MY_MAX(table_records,1);
  }
  else
  {
    keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts -
                                                             1);
+    if (keys_per_group == 0) /* If there is no statistics try to guess */
+    {
+      /* each group contains 10% of all records */
+      keys_per_group= (table_records / 10) + 1;
+    }
  }
-
-  if (keys_per_group == 0) /* If there is no statistics try to guess */
-    /* each group contains 10% of all records */
-    keys_per_group= (table_records / 10) + 1;
  num_groups= (table_records / keys_per_group) + 1;

  /* Apply the selectivity of the quick select for group prefixes. */
@@ -14906,7 +14928,21 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
    quick_prefix_selectivity= (double) quick_prefix_records /
                              (double) table_records;
    num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity);
-    set_if_bigger(num_groups, 1);
+    /*
+      Expect at least as many groups as there is ranges in the index
+      WHERE a in (1,2,3) GROUP BY a
+
+      This is mostly relevant for queries with few records, which is
+      something we have a lot of in our test suites.
+      In theory it is possible to scan the index_tree and for cases
+      where all ranges are eq ranges, we could calculate the exact number
+      of groups. This is probably an overkill so for now we estimate
+      the lower level of number of groups by the range elements in the
+      tree.
+    */
+    set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1));
+    /* There cannot be more groups than matched records */
+    set_if_smaller(num_groups, quick_prefix_records);
  }
  /* Ensure we don't have more groups than rows in table */
  set_if_smaller(num_groups, table_records);
@@ -14943,21 +14979,22 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
    group. To make the CPU cost reflect this, we estimate the CPU cost
    as the sum of:
    1. Cost for evaluating the condition for each num_group
-       (1/TIME_FOR_COMPARE) (similarly as for index scan).
+       (1/TIME_FOR_COMPARE_IDX) (similarly as for index scan).
    2. Cost for navigating the index structure (assuming a b-tree).
       Note: We only add the cost for one index comparision per block. For a
             b-tree the number of comparisons will be larger. However the cost
             is low as all of the upper level b-tree blocks should be in
             memory.
       TODO: This cost should be provided by the storage engine.
+    3. Cost for comparing the row with the where clause
  */
  const double tree_traversal_cost=
    ceil(log(static_cast<double>(table_records))/
         log(static_cast<double>(keys_per_block))) * 
-    1/(2*TIME_FOR_COMPARE);
+    1/(TIME_FOR_COMPARE_IDX);

  const double cpu_cost= (num_groups *
-                          (tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX));
+                          (tree_traversal_cost + 1/TIME_FOR_COMPARE));

  *read_cost= io_cost + cpu_cost;
  *records= num_groups;