From 3e0f63c18fdfae9280fb406c4677c613d1838a64 Mon Sep 17 00:00:00 2001
From: Igor Babaev <igor@askmonty.org>
Date: Tue, 22 Apr 2014 14:39:57 -0700
Subject: [PATCH] Fixed the problem of mdev-5947. Back-ported from the mysql
 5.6 code line the patch with the following comment:

  Fix for Bug#11757108 CHANGE IN EXECUTION PLAN FOR COUNT_DISTINCT_GROUP_ON_KEY
                       CAUSES PEFORMANCE REGRESSION

  The cause for the performance regression is that the access strategy for the
  GROUP BY query is changed form using "index scan" in mysql-5.1 to use "loose
  index scan" in mysql-5.5. The index used for group by is unique and thus each
  "loose scan" group will only contain one record. Since loose scan needs to
  re-position on each "loose scan" group this query will do a re-position for
  each index entry. Compared to just reading the next index entry as a normal
  index scan does, the use of loose scan for this query becomes more expensive.

  The cause for selecting to use loose scan for this query is that in the current
  code when the size of the "loose scan" group is one, the formula for
  calculating the cost estimates becomes almost identical to the cost of using
  normal index scan. Differences in use of integer versus floating point arithmetic
  can cause one or the other access strategy to be selected.

  The main issue with the formula for estimating the cost of using loose scan is
  that it does not take into account that it is more costly to do a re-position
  for each "loose scan" group compared to just reading the next index entry.
  Both index scan and loose scan estimates the cpu cost as:

    "number of entries needed too read/scan" * ROW_EVALUATE_COST

  The results from testing with the query in this bug indicates that the real
  cost for doing re-position four to eight times higher than just reading the
  next index entry. Thus, the cpu cost estimate for loose scan should be increased.
  To account for the extra work to re-position in the index we increase the
  cost for loose index scan to include the cost of navigating the index.
  This is modelled as a function of the height of the b-tree:

    navigation cost= ceil(log(records in table)/log(indexes per block))
                   * ROWID_COMPARE_COST;

  This will avoid loose index scan being used for indexes where the "loose scan"
  group contains very few index entries.
---
 mysql-test/r/distinct.result          |  2 +-
 mysql-test/r/func_group_innodb.result |  6 ++++--
 mysql-test/r/group_by.result          | 12 ++++++------
 mysql-test/r/group_min_max.result     | 12 +++++++-----
 mysql-test/t/func_group_innodb.test   |  7 ++++++-
 mysql-test/t/group_by.test            |  5 ++---
 mysql-test/t/group_min_max.test       |  6 ++++--
 sql/opt_range.cc                      | 25 +++++++++++++++++++------
 8 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/mysql-test/r/distinct.result b/mysql-test/r/distinct.result
index 6f68483f684..bd7deee43e9 100644
--- a/mysql-test/r/distinct.result
+++ b/mysql-test/r/distinct.result
@@ -756,7 +756,7 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1),
 (1, 2, 3);
 EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	a	16	NULL	7	Using index for group-by
+1	SIMPLE	t1	index	NULL	a	16	NULL	6	Using index
 SELECT DISTINCT a, b, d, c FROM t1;
 a	b	d	c
 1	1	0	1
diff --git a/mysql-test/r/func_group_innodb.result b/mysql-test/r/func_group_innodb.result
index 9489960d28c..52d5922df95 100644
--- a/mysql-test/r/func_group_innodb.result
+++ b/mysql-test/r/func_group_innodb.result
@@ -236,12 +236,14 @@ SET storage_engine=@old_engine;
 #
 CREATE TABLE t1(a BLOB, b VARCHAR(255) CHARSET LATIN1, c INT,
 KEY(b, c, a(765))) ENGINE=INNODB;
-INSERT INTO t1(a, b, c) VALUES ('', 'a', 0), ('', 'a', null), ('', 'a', 0);
+INSERT INTO t1(a, b, c) VALUES 
+('', 'a', 0), ('', 'a', null), ('', 'a', 0), ('', 'a', null), ('', 'a', 0);
+ANALYZE TABLE t1;
 SELECT MIN(c) FROM t1 GROUP BY b;
 MIN(c)
 0
 EXPLAIN SELECT MIN(c) FROM t1 GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	b	263	NULL	4	Using index for group-by
+1	SIMPLE	t1	range	NULL	b	263	NULL	3	Using index for group-by
 DROP TABLE t1;
 End of 5.5 tests
diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result
index dfe28f0e05a..9b86ccd264e 100644
--- a/mysql-test/r/group_by.result
+++ b/mysql-test/r/group_by.result
@@ -1524,7 +1524,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN SELECT a FROM t1 FORCE INDEX FOR JOIN (i2) 
 FORCE INDEX FOR GROUP BY (i2) GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	i2	4	NULL	145	Using index for group-by
+1	SIMPLE	t1	index	NULL	i2	9	NULL	144	Using index
 EXPLAIN SELECT a FROM t1 USE INDEX () IGNORE INDEX (i2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	144	
@@ -1957,12 +1957,12 @@ UNIQUE INDEX idx (col1));
 INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),
 (11),(12),(13),(14),(15),(16),(17),(18),(19),(20);
 EXPLAIN SELECT col1 AS field1, col1 AS field2
-FROM t1 GROUP BY field1, field2+0;;
+FROM t1 GROUP BY field1, field2;;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	NULL	idx	5	NULL	20	Using index; Using temporary; Using filesort
+1	SIMPLE	t1	index	NULL	idx	5	NULL	20	Using index
 FLUSH STATUS;
 SELECT col1 AS field1, col1 AS field2
-FROM t1 GROUP BY field1, field2+0;;
+FROM t1 GROUP BY field1, field2;;
 field1	field2
 1	1
 2	2
@@ -1986,7 +1986,7 @@ field1	field2
 20	20
 SHOW SESSION STATUS LIKE 'Sort_scan%';
 Variable_name	Value
-Sort_scan	1
+Sort_scan	0
 EXPLAIN SELECT SQL_BIG_RESULT col1 AS field1, col1 AS field2
 FROM t1 GROUP BY field1, field2;;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -2320,7 +2320,7 @@ a int,
 b varchar(1),
 KEY (b,a)
 );
-INSERT INTO t1 VALUES (1,NULL),(0,'a');
+INSERT INTO t1 VALUES (1,NULL),(0,'a'),(1,NULL),(0,'a');
 
 EXPLAIN SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
diff --git a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result
index 3f8bb6395e9..d43135359b9 100644
--- a/mysql-test/r/group_min_max.result
+++ b/mysql-test/r/group_min_max.result
@@ -1,6 +1,6 @@
 drop table if exists t1;
 create table t1 (
-a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
+a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' '
 );
 insert into t1 (a1, a2, b, c, d) values
 ('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
@@ -43,7 +43,7 @@ Table	Op	Msg_type	Msg_text
 test.t1	analyze	status	Table is already up to date
 drop table if exists t2;
 create table t2 (
-a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(64) default ' '
+a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' '
 );
 insert into t2 select * from t1;
 insert into t2 (a1, a2, b, c, d) values
@@ -2649,6 +2649,7 @@ DROP TABLE t1;
 CREATE TABLE t (a INT, b INT, INDEX (a,b));
 INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
 INSERT INTO t SELECT * FROM t;
+INSERT INTO t SELECT * FROM t;
 # test MIN
 #should use range with index for group by
 EXPLAIN
@@ -3281,6 +3282,7 @@ drop table t1;
 #
 CREATE TABLE t1 (a int, b int, KEY (b, a)) ;
 INSERT INTO t1 VALUES (0,99),(9,99),(4,0),(7,0),(99,0),(7,0),(8,0),(99,0),(1,0);
+INSERT INTO t1 VALUES (0,99),(9,99),(4,0),(7,0),(99,0),(7,0),(8,0),(99,0),(1,0);
 CREATE TABLE t2 (c int) ;
 INSERT INTO t2 VALUES (0),(1);
 EXPLAIN
@@ -3303,10 +3305,10 @@ MIN(a)	b
 EXPLAIN
 SELECT MIN(a), b FROM t1 WHERE a > ( SELECT min(c) FROM t2, t1 t1a, t1 t1b WHERE c = 0 ) GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	NULL	b	10	NULL	9	Using where; Using index
+1	PRIMARY	t1	index	NULL	b	10	NULL	18	Using where; Using index
 2	SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	2	Using where
-2	SUBQUERY	t1a	index	NULL	b	10	NULL	9	Using index; Using join buffer (flat, BNL join)
-2	SUBQUERY	t1b	index	NULL	b	10	NULL	9	Using index; Using join buffer (incremental, BNL join)
+2	SUBQUERY	t1a	index	NULL	b	10	NULL	18	Using index; Using join buffer (flat, BNL join)
+2	SUBQUERY	t1b	index	NULL	b	10	NULL	18	Using index; Using join buffer (incremental, BNL join)
 SELECT MIN(a), b FROM t1 WHERE a > ( SELECT min(c) FROM t2, t1 t1a, t1 t1b WHERE c = 0 ) GROUP BY b;
 MIN(a)	b
 1	0
diff --git a/mysql-test/t/func_group_innodb.test b/mysql-test/t/func_group_innodb.test
index 06405808d79..c62d3d08496 100644
--- a/mysql-test/t/func_group_innodb.test
+++ b/mysql-test/t/func_group_innodb.test
@@ -180,7 +180,12 @@ SET storage_engine=@old_engine;
 
 CREATE TABLE t1(a BLOB, b VARCHAR(255) CHARSET LATIN1, c INT,
                 KEY(b, c, a(765))) ENGINE=INNODB;
-INSERT INTO t1(a, b, c) VALUES ('', 'a', 0), ('', 'a', null), ('', 'a', 0);
+INSERT INTO t1(a, b, c) VALUES 
+('', 'a', 0), ('', 'a', null), ('', 'a', 0), ('', 'a', null), ('', 'a', 0);
+                               
+-- disable_result_log
+ANALYZE TABLE t1;
+-- enable_result_log
 
 SELECT MIN(c) FROM t1 GROUP BY b;
 EXPLAIN SELECT MIN(c) FROM t1 GROUP BY b;
diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test
index 8ee17d2b2d3..e92780f0523 100644
--- a/mysql-test/t/group_by.test
+++ b/mysql-test/t/group_by.test
@@ -1334,7 +1334,7 @@ INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),
    (11),(12),(13),(14),(15),(16),(17),(18),(19),(20);
 
 let $query0=SELECT col1 AS field1, col1 AS field2
-            FROM t1 GROUP BY field1, field2+0;
+            FROM t1 GROUP BY field1, field2;
 
 # Needs to be range to exercise bug
 --eval EXPLAIN $query0;
@@ -1496,8 +1496,7 @@ CREATE TABLE t1 (
   b varchar(1),
   KEY (b,a)
 );
-
-INSERT INTO t1 VALUES (1,NULL),(0,'a');
+INSERT INTO t1 VALUES (1,NULL),(0,'a'),(1,NULL),(0,'a');
 
 let $query=
   SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
diff --git a/mysql-test/t/group_min_max.test b/mysql-test/t/group_min_max.test
index ef1ee58c97e..f1a287054ca 100644
--- a/mysql-test/t/group_min_max.test
+++ b/mysql-test/t/group_min_max.test
@@ -15,7 +15,7 @@ drop table if exists t1;
 --enable_warnings
 
 create table t1 (
-  a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
+  a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' '
 );
 
 insert into t1 (a1, a2, b, c, d) values
@@ -65,7 +65,7 @@ drop table if exists t2;
 --enable_warnings
 
 create table t2 (
-  a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(64) default ' '
+  a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' '
 );
 insert into t2 select * from t1;
 # add few rows with NULL's in the MIN/MAX column
@@ -1058,6 +1058,7 @@ DROP TABLE t1;
 CREATE TABLE t (a INT, b INT, INDEX (a,b));
 INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
 INSERT INTO t SELECT * FROM t;
+INSERT INTO t SELECT * FROM t;
 
 --echo # test MIN
 --echo #should use range with index for group by
@@ -1257,6 +1258,7 @@ drop table t1;
 
 CREATE TABLE t1 (a int, b int, KEY (b, a)) ;
 INSERT INTO t1 VALUES (0,99),(9,99),(4,0),(7,0),(99,0),(7,0),(8,0),(99,0),(1,0);
+INSERT INTO t1 VALUES (0,99),(9,99),(4,0),(7,0),(99,0),(7,0),(8,0),(99,0),(1,0);
 CREATE TABLE t2 (c int) ;
 INSERT INTO t2 VALUES (0),(1);
 
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 6d2e8c7340f..0981361b98d 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -13424,7 +13424,7 @@ SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
 
   DESCRIPTION
     This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
-    the number of rows returned. It updates this->read_cost and this->records.
+    the number of rows returned.
 
   NOTES
     The cost computation distinguishes several cases:
@@ -13480,7 +13480,6 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
   double p_overlap; /* Probability that a sub-group overlaps two blocks. */
   double quick_prefix_selectivity;
   double io_cost;
-  double cpu_cost= 0; /* TODO: CPU cost of index_read calls? */
   DBUG_ENTER("cost_group_min_max");
 
   table_records= table->stat_records();
@@ -13528,11 +13527,25 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
              (double) num_blocks;
 
   /*
-    TODO: If there is no WHERE clause and no other expressions, there should be
-    no CPU cost. We leave it here to make this cost comparable to that of index
-    scan as computed in SQL_SELECT::test_quick_select().
+    CPU cost must be comparable to that of an index scan as computed
+    in SQL_SELECT::test_quick_select(). When the groups are small,
+    e.g. for a unique index, using index scan will be cheaper since it
+    reads the next record without having to re-position to it on every
+    group. To make the CPU cost reflect this, we estimate the CPU cost
+    as the sum of:
+    1. Cost for evaluating the condition (similarly as for index scan).
+    2. Cost for navigating the index structure (assuming a b-tree).
+       Note: We only add the cost for one comparision per block. For a
+             b-tree the number of comparisons will be larger.
+       TODO: This cost should be provided by the storage engine.
   */
-  cpu_cost= (double) num_groups / TIME_FOR_COMPARE;
+  const double tree_traversal_cost= 
+    ceil(log(static_cast<double>(table_records))/
+         log(static_cast<double>(keys_per_block))) * 
+    1/double(2*TIME_FOR_COMPARE); 
+
+  const double cpu_cost= num_groups *
+                         (tree_traversal_cost + 1/double(TIME_FOR_COMPARE));
 
   *read_cost= io_cost + cpu_cost;
   *records= num_groups;
-- 
2.30.9