Limit calculated rows to the number of rows in the table

The result file changes are mainly that number of rows is one smaller for some queries with DISTINCT or GROUP BY

Limit calculated rows to the number of rows in the table
The result file changes are mainly that number of rows is one smaller for some queries with DISTINCT or GROUP BY
87d4d723 · Monty · Sergei Petrunia · c443dbff · 87d4d723 · 87d4d723
Commit 87d4d723 authored Oct 06, 2021 by Monty Committed by Sergei Petrunia Jan 30, 2023
12 changed files
--- a/mysql-test/main/distinct.result
+++ b/mysql-test/main/distinct.result
@@ -538,10 +538,10 @@ PRIMARY KEY (a,b));
 INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
 EXPLAIN SELECT DISTINCT a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	PRIMARY	4	NULL	4	Using index for group-by
+1	SIMPLE	t2	range	NULL	PRIMARY	4	NULL	3	Using index for group-by
 EXPLAIN SELECT DISTINCT a,a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	PRIMARY	4	NULL	4	Using index for group-by
+1	SIMPLE	t2	range	NULL	PRIMARY	4	NULL	3	Using index for group-by
 EXPLAIN SELECT DISTINCT b,a FROM t2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	index	NULL	PRIMARY	8	NULL	3	Using index
@@ -756,7 +756,7 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1),
 (1, 2, 3);
 EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	PRIMARY	16	NULL	7	Using index for group-by; Using temporary
+1	SIMPLE	t1	range	NULL	PRIMARY	16	NULL	6	Using index for group-by; Using temporary
 SELECT DISTINCT a, b, d, c FROM t1;
 a	b	d	c
 1	1	0	1

--- a/mysql-test/main/explain_innodb.result
+++ b/mysql-test/main/explain_innodb.result
@@ -15,6 +15,6 @@ explain
 SELECT * FROM (SELECT id FROM t1 GROUP BY id) dt WHERE 1=0;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE
-2	DERIVED	t1	range	NULL	id	53	NULL	2	Using index for group-by
+2	DERIVED	t1	range	NULL	id	53	NULL	1	Using index for group-by
 SET GLOBAL slow_query_log = @sql_tmp;
 drop table t1;
--- a/mysql-test/main/group_by.result
+++ b/mysql-test/main/group_by.result
@@ -1578,7 +1578,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN SELECT a FROM t1 FORCE INDEX FOR JOIN (i2) 
 FORCE INDEX FOR GROUP BY (i2) GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	i2	4	NULL	145	Using index for group-by
+1	SIMPLE	t1	range	NULL	i2	4	NULL	144	Using index for group-by
 EXPLAIN SELECT a FROM t1 USE INDEX () IGNORE INDEX (i2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	144	
@@ -1701,7 +1701,7 @@ NULL	1
 1	2
 EXPLAIN SELECT a from t2 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	a	5	NULL	7	Using index for group-by
+1	SIMPLE	t2	range	NULL	a	5	NULL	6	Using index for group-by
 SELECT a from t2 GROUP BY a;
 a
 NULL

--- a/mysql-test/main/group_min_max.result
+++ b/mysql-test/main/group_min_max.result
@@ -3664,7 +3664,7 @@ f1	COUNT(DISTINCT f2)
 3	4
 explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	f1	5	NULL	9	Using index for group-by
+1	SIMPLE	t1	range	NULL	f1	5	NULL	8	Using index for group-by
 drop table t1;
 # End of test#50539.
 #

--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -1408,7 +1408,7 @@ test.t1	analyze	status	Engine-independent statistics collected
 test.t1	analyze	status	OK
 EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	a	20	NULL	8	Using where; Using index for group-by
+1	SIMPLE	t1	range	NULL	a	20	NULL	7	Using where; Using index for group-by
 select * from information_schema.OPTIMIZER_TRACE;
 QUERY	TRACE	MISSING_BYTES_BEYOND_MAX_MEM_SIZE	INSUFFICIENT_PRIVILEGES
 EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
@@ -1495,8 +1495,8 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
                        "index": "a",
                        "covering": true,
                        "ranges": ["(2,3) <= (b,c) <= (2,3)"],
-                        "rows": 8,
+                        "rows": 7,
-                        "cost": 2.2
+                        "cost": 2.05
                      }
                    ]
                  },
@@ -1507,8 +1507,8 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
                    "min_aggregate": true,
                    "max_aggregate": false,
                    "distinct_aggregate": false,
-                    "rows": 8,
+                    "rows": 7,
-                    "cost": 2.2,
+                    "cost": 2.05,
                    "key_parts_used_for_access": ["a", "b", "c"],
                    "ranges": ["(2,3) <= (b,c) <= (2,3)"],
                    "chosen": true
@@ -1521,13 +1521,13 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
                      "min_aggregate": true,
                      "max_aggregate": false,
                      "distinct_aggregate": false,
-                      "rows": 8,
+                      "rows": 7,
-                      "cost": 2.2,
+                      "cost": 2.05,
                      "key_parts_used_for_access": ["a", "b", "c"],
                      "ranges": ["(2,3) <= (b,c) <= (2,3)"]
                    },
-                    "rows_for_plan": 8,
+                    "rows_for_plan": 7,
-                    "cost_for_plan": 2.2,
+                    "cost_for_plan": 2.05,
                    "chosen": true
                  }
                }
@@ -1545,16 +1545,16 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
                      "considered_access_paths": [
                        {
                          "access_type": "index_merge",
-                          "resulting_rows": 8,
+                          "resulting_rows": 7,
-                          "cost": 2.2,
+                          "cost": 2.05,
                          "chosen": true,
                          "use_tmp_table": true
                        }
                      ],
                      "chosen_access_method": {
                        "type": "index_merge",
-                        "records": 8,
+                        "records": 7,
-                        "cost": 2.2,
+                        "cost": 2.05,
                        "uses_join_buffering": false
                      }
                    }
@@ -1564,9 +1564,9 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3  group by a	{
              {
                "plan_prefix": [],
                "table": "t1",
-                "rows_for_plan": 8,
+                "rows_for_plan": 7,
-                "cost_for_plan": 3.8,
+                "cost_for_plan": 3.45,
-                "cost_for_sorting": 8
+                "cost_for_sorting": 7
              }
            ]
          },

--- a/mysql-test/main/subselect_mat.result
+++ b/mysql-test/main/subselect_mat.result
@@ -1142,7 +1142,7 @@ a
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	8	100.00	Using index for group-by
+1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1154,7 +1154,7 @@ create index iab on t1(a, b);
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	8	100.00	Using index for group-by
+1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1166,7 +1166,7 @@ explain extended
 select a from t1 group by a
 having a in (select c from t2 where d >= some(select e from t3 where max(b)=e));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	iab	4	NULL	8	100.00	Using index for group-by
+1	PRIMARY	t1	range	NULL	iab	4	NULL	7	100.00	Using index for group-by
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	4	100.00	Using where
 Warnings:

--- a/mysql-test/main/subselect_sj_mat.result
+++ b/mysql-test/main/subselect_sj_mat.result
@@ -1181,7 +1181,7 @@ a
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	8	100.00	Using index for group-by
+1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1193,7 +1193,7 @@ create index iab on t1(a, b);
 explain extended
 select a from t1 group by a having a in (select c from t2 where d >= 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	it1a	4	NULL	8	100.00	Using index for group-by
+1	PRIMARY	t1	range	NULL	it1a	4	NULL	7	100.00	Using index for group-by
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
@@ -1205,7 +1205,7 @@ explain extended
 select a from t1 group by a
 having a in (select c from t2 where d >= some(select e from t3 where max(b)=e));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	range	NULL	iab	4	NULL	8	100.00	Using index for group-by
+1	PRIMARY	t1	range	NULL	iab	4	NULL	7	100.00	Using index for group-by
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	7	100.00	Using where
 3	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	4	100.00	Using where
 Warnings:

--- a/mysql-test/suite/gcol/r/gcol_select_innodb.result
+++ b/mysql-test/suite/gcol/r/gcol_select_innodb.result
@@ -146,7 +146,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	6	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
 ###
 ### filesort & range-based utils
 ###

--- a/mysql-test/suite/gcol/r/gcol_select_myisam.result
+++ b/mysql-test/suite/gcol/r/gcol_select_myisam.result
@@ -146,7 +146,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	6	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
 ###
 ### filesort & range-based utils
 ###

--- a/mysql-test/suite/vcol/r/vcol_select_innodb.result
+++ b/mysql-test/suite/vcol/r/vcol_select_innodb.result
@@ -135,7 +135,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	6	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
 ###
 ### filesort & range-based utils
 ###

--- a/mysql-test/suite/vcol/r/vcol_select_myisam.result
+++ b/mysql-test/suite/vcol/r/vcol_select_myisam.result
@@ -133,7 +133,7 @@ count(distinct c)
 3
 explain select count(distinct c) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	c	5	NULL	6	Using index for group-by
+1	SIMPLE	t1	range	NULL	c	5	NULL	5	Using index for group-by
 ###
 ### filesort & range-based utils
 ###

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3235,7 +3235,7 @@ double records_in_column_ranges(PARAM *param, uint idx,
  SEL_ARG_RANGE_SEQ seq;
  KEY_MULTI_RANGE range;
  range_seq_t seq_it;
-  double rows;
+  double rows, table_records;
  Field *field;
  uint flags= 0;
  double total_rows= 0;
@@ -3290,13 +3290,17 @@ double records_in_column_ranges(PARAM *param, uint idx,
      total_rows= DBL_MAX;
      break;
    }
-    total_rows += rows;
+    total_rows+= rows;
  }
  if (total_rows == 0)
    total_rows= MY_MIN(1, rows2double(param->table->stat_records()));
-  return total_rows;
+  table_records= rows2double(param->table->stat_records());
-} 
+  if (total_rows > table_records)
+    DBUG_PRINT("error", ("table_records: %g < total_records: %g",
+                         table_records, total_rows));
+  return MY_MIN(total_rows, table_records);
+}
 /*
@@ -14813,6 +14817,8 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
    num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity);
    set_if_bigger(num_groups, 1);
  }
+  /* Ensure we don't have more groups than rows in table */
+  set_if_smaller(num_groups, table_records);
  if (used_key_parts > group_key_parts)
  { /*