Commit 6fa74517 authored by Monty's avatar Monty Committed by Sergei Petrunia

Adjust costs for doing index scan in cost_group_min_max()

The idea is that when doing a tree dive (once per group), we need to
compare key values, which is fast.  For each new group, we have to
compare the full where clause for the row.
Compared to original code, the cost of group_min_max() has slightly
increased which affects some test with only a few rows.
main.group_min_max and main.distinct have been modified to show the
effect of the change.

The patch also adjust the number of groups in case of quick selects:
- For simple WHERE clauses, ensure that we have at least as many groups
  as we have conditions on the used group-by key parts.
  The assumption is that each condition will create at least one group.
- Ensure that there are no more groups than rows found by quick_select

Test changes:
- For some small tables there has been a change of
  Using index for group-by -> Using index for group-by (scanning)
  Range -> Index and Using index for group-by -> Using index
parent bc9805e9
......@@ -19749,7 +19749,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 func 1
1 PRIMARY <derived2> ref key0 key0 5 test.t3.id 2
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 3
2 DERIVED cp2 range NULL a 5 NULL 7 Using index for group-by
2 DERIVED cp2 index NULL a 5 NULL 7 Using index
explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
EXPLAIN
......@@ -19826,13 +19826,13 @@ EXPLAIN
{
"table": {
"table_name": "cp2",
"access_type": "range",
"access_type": "index",
"key": "a",
"key_length": "5",
"used_key_parts": ["a"],
"rows": 7,
"filtered": 100,
"using_index_for_group_by": true
"using_index": true
}
}
]
......
......@@ -538,10 +538,10 @@ PRIMARY KEY (a,b));
INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4);
EXPLAIN SELECT DISTINCT a FROM t2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL PRIMARY 4 NULL 3 Using index for group-by
1 SIMPLE t2 index NULL PRIMARY 8 NULL 3 Using index
EXPLAIN SELECT DISTINCT a,a FROM t2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL PRIMARY 4 NULL 3 Using index for group-by
1 SIMPLE t2 index NULL PRIMARY 8 NULL 3 Using index
EXPLAIN SELECT DISTINCT b,a FROM t2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index NULL PRIMARY 8 NULL 3 Using index
......@@ -754,9 +754,6 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1),
(1, 2, 1),
(1, 2, 2),
(1, 2, 3);
EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL PRIMARY 16 NULL 6 Using index for group-by; Using temporary
SELECT DISTINCT a, b, d, c FROM t1;
a b d c
1 1 0 1
......@@ -765,6 +762,13 @@ a b d c
1 2 0 1
1 2 0 2
1 2 0 3
EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL a 16 NULL 6 Using index
INSERT INTO t1 SELECT seq/10,seq/10,seq/10,seq/10,seq from seq_1_to_100;
EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL PRIMARY 16 NULL 10 Using index for group-by; Using temporary
DROP TABLE t1;
#
# Bug #46159: simple query that never returns
......
......@@ -4,6 +4,7 @@
#
--source include/default_optimizer_switch.inc
--source include/have_sequence.inc
--disable_warnings
drop table if exists t1,t2,t3;
--enable_warnings
......@@ -574,9 +575,10 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1),
(1, 2, 2),
(1, 2, 3);
EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
SELECT DISTINCT a, b, d, c FROM t1;
EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
INSERT INTO t1 SELECT seq/10,seq/10,seq/10,seq/10,seq from seq_1_to_100;
EXPLAIN SELECT DISTINCT a, b, d, c FROM t1;
DROP TABLE t1;
......
......@@ -1578,7 +1578,7 @@ id select_type table type possible_keys key key_len ref rows Extra
EXPLAIN SELECT a FROM t1 FORCE INDEX FOR JOIN (i2)
FORCE INDEX FOR GROUP BY (i2) GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL i2 4 NULL 144 Using index for group-by
1 SIMPLE t1 index NULL i2 9 NULL 144 Using index
EXPLAIN SELECT a FROM t1 USE INDEX () IGNORE INDEX (i2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 144
......@@ -1701,7 +1701,7 @@ NULL 1
1 2
EXPLAIN SELECT a from t2 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL a 5 NULL 6 Using index for group-by
1 SIMPLE t2 index NULL a 10 NULL 6 Using index
SELECT a from t2 GROUP BY a;
a
NULL
......@@ -1714,6 +1714,11 @@ b
NULL
1
2
insert into t2 SELECT NULL, NULL from seq_1_to_10;
# Expect: Using index for group-by
EXPLAIN SELECT b from t2 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL a 5 NULL 9 Using index for group-by
DROP TABLE t1;
DROP TABLE t2;
CREATE TABLE t1 ( a INT, b INT );
......@@ -2452,7 +2457,7 @@ test.t1 analyze status OK
EXPLAIN SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range b b 9 NULL 2 Using where; Using index for group-by; Using temporary
1 SIMPLE t1 ref b b 4 const 6 Using where; Using index; Using temporary
SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
MIN(a) b
......@@ -2460,7 +2465,7 @@ MIN(a) b
EXPLAIN SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range b b 9 NULL 2 Using where; Using index for group-by
1 SIMPLE t1 ref b b 4 const 6 Using where; Using index
SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
MIN(a) b
......
--source include/have_sequence.inc
# Initialise
--disable_warnings
drop table if exists t1,t2,t3;
......@@ -1164,6 +1166,11 @@ SELECT a from t2 GROUP BY a;
EXPLAIN SELECT b from t2 GROUP BY b;
SELECT b from t2 GROUP BY b;
# Show that we are using 'range' when there is more NULL rows in the table
insert into t2 SELECT NULL, NULL from seq_1_to_10;
--echo # Expect: Using index for group-by
EXPLAIN SELECT b from t2 GROUP BY a;
DROP TABLE t1;
DROP TABLE t2;
......
......@@ -2353,6 +2353,9 @@ t1;
id2 id3 id5 id4 id3 id6 id5 id1
1 1 1 1 1 1 1 1
DROP TABLE t1,t2,t3,t4,t5,t6;
#
# Bug#22342: No results returned for query using max and group by
#
CREATE TABLE t1 (a int, b int, KEY (a,b), KEY b (b));
INSERT INTO t1 VALUES
(1,1),(1,2),(1,0),(1,3),
......@@ -2361,17 +2364,28 @@ ANALYZE TABLE t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
INSERT INTO t2 SELECT a,b,b FROM t1;
explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a,b a 10 NULL 2 Using where; Using index for group-by
1 SIMPLE t1 range a,b a 10 NULL 6 Using where; Using index
insert into t1 select 1,seq from seq_1_to_100;
explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a,b a 10 NULL 6 Using where; Using index
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a,b a 10 NULL 1 Using where; Using index for group-by
SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
MAX(b) a
1 1
SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
MIN(b) a
2 1
CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
INSERT INTO t2 SELECT a,b,b FROM t1;
explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range PRIMARY PRIMARY 12 NULL 1 Using where; Using index
......@@ -3657,14 +3671,34 @@ KEY (f1,f2)
) ;
insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'),
(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL f1 5 NULL 8 Using index for group-by (scanning)
SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
f1 COUNT(DISTINCT f2)
1 3
2 1
3 4
insert into t1 select seq/10,char(64+mod(seq,4)) from seq_1_to_100;
explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL f1 5 NULL 10 Using index for group-by
SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
f1 COUNT(DISTINCT f2)
0 4
1 4
2 4
3 5
4 4
5 4
6 4
7 4
8 4
9 4
10 4
explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL f1 5 NULL 8 Using index for group-by
1 SIMPLE t1 range NULL f1 5 NULL 10 Using index for group-by
drop table t1;
# End of test#50539.
#
......@@ -3760,7 +3794,7 @@ CREATE INDEX break_it ON t1 (a, b);
EXPLAIN
SELECT distinct a, b FROM t1 where a = '3' ORDER BY b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range break_it break_it 10 NULL 2 Using where; Using index for group-by; Using filesort
1 SIMPLE t1 ref break_it break_it 5 const 6 Using where; Using index; Using filesort
SELECT distinct a, b FROM t1 where a = '3' ORDER BY b;
a b
3 1
......
......@@ -899,23 +899,27 @@ t1;
DROP TABLE t1,t2,t3,t4,t5,t6;
#
# Bug#22342: No results returned for query using max and group by
#
--echo #
--echo # Bug#22342: No results returned for query using max and group by
--echo #
CREATE TABLE t1 (a int, b int, KEY (a,b), KEY b (b));
INSERT INTO t1 VALUES
(1,1),(1,2),(1,0),(1,3),
(1,-1),(1,-2),(1,-3),(1,-4);
ANALYZE TABLE t1;
CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
INSERT INTO t2 SELECT a,b,b FROM t1;
explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
insert into t1 select 1,seq from seq_1_to_100;
explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
analyze table t1;
explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c));
INSERT INTO t2 SELECT a,b,b FROM t1;
explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
DROP TABLE t1,t2;
#
......@@ -1439,10 +1443,12 @@ CREATE TABLE t1 (
) ;
insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'),
(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
insert into t1 select seq/10,char(64+mod(seq,4)) from seq_1_to_100;
explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
drop table t1;
--echo # End of test#50539.
......
......@@ -180,7 +180,7 @@ F 17
EXPLAIN SELECT c1, max(i2) FROM t1 WHERE (c1 = 'C' OR c1 = 'F' ) AND ( i2 = 17 )
GROUP BY c1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range k1 k1 5 NULL 1 Using where; Using index for group-by
1 SIMPLE t1 range k1 k1 5 NULL 2 Using where; Using index for group-by
SELECT c1, max(i2) FROM t1 WHERE (c1 = 'C' OR c1 = 'F' ) AND ( i2 = 17 )
GROUP BY c1;
c1 max(i2)
......@@ -200,7 +200,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2
WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )
GROUP BY c1,i1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range k2 k2 9 NULL 60 Using where; Using index for group-by
1 SIMPLE t2 range k2 k2 5 NULL 60 Using where; Using index
SELECT c1, i1, max(i2) FROM t2
WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )
GROUP BY c1,i1;
......@@ -211,7 +211,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2
WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ))
GROUP BY c1,i1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range k2 k2 9 NULL 60 Using where; Using index for group-by
1 SIMPLE t2 range k2 k2 5 NULL 60 Using where; Using index
SELECT c1, i1, max(i2) FROM t2
WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ))
GROUP BY c1,i1;
......
This diff is collapsed.
......@@ -4,13 +4,16 @@ drop table if exists t1, t2;
#
CREATE TABLE t1 (a INT,b INT,KEY a (a,b));
INSERT INTO `t1` VALUES (0,580092),(3000,894076),(4000,805483),(4000,913540),(6000,611137),(8000,171602),(9000,599495),(9000,746305),(10000,272829),(10000,847519),(12000,258869),(12000,929028),(13000,288970),(15000,20971),(15000,105839),(16000,788272),(17000,76914),(18000,827274),(19000,802258),(20000,123677),(20000,587729),(22000,701449),(25000,31565),(25000,230782),(25000,442887),(25000,733139),(25000,851020);
SELECT COUNT(*) from t1 where a IN (10000, 1000000, 3000);
COUNT(*)
3
EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 1 Using where; Using index for group-by
1 SIMPLE t1 range a a 5 NULL 4 Using where; Using index
alter table t1 partition by hash(a) partitions 1;
EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 1 Using where; Using index for group-by
1 SIMPLE t1 range a a 5 NULL 4 Using where; Using index
alter table t1 remove partitioning;
insert into t1 (a,b) select seq,seq from seq_4001_to_4100;
insert into t1 (a,b) select seq,seq from seq_10001_to_10100;
......
......@@ -17,6 +17,7 @@ drop table if exists t1, t2;
CREATE TABLE t1 (a INT,b INT,KEY a (a,b));
INSERT INTO `t1` VALUES (0,580092),(3000,894076),(4000,805483),(4000,913540),(6000,611137),(8000,171602),(9000,599495),(9000,746305),(10000,272829),(10000,847519),(12000,258869),(12000,929028),(13000,288970),(15000,20971),(15000,105839),(16000,788272),(17000,76914),(18000,827274),(19000,802258),(20000,123677),(20000,587729),(22000,701449),(25000,31565),(25000,230782),(25000,442887),(25000,733139),(25000,851020);
SELECT COUNT(*) from t1 where a IN (10000, 1000000, 3000);
EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a;
alter table t1 partition by hash(a) partitions 1;
......
......@@ -1142,7 +1142,7 @@ a
explain extended
select a from t1 group by a having a in (select c from t2 where d >= 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by
1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
......@@ -1154,7 +1154,7 @@ create index iab on t1(a, b);
explain extended
select a from t1 group by a having a in (select c from t2 where d >= 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by
1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
......@@ -1166,7 +1166,7 @@ explain extended
select a from t1 group by a
having a in (select c from t2 where d >= some(select e from t3 where max(b)=e));
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 range NULL iab 4 NULL 7 100.00 Using index for group-by
1 PRIMARY t1 index NULL iab 8 NULL 7 100.00 Using index
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 7 100.00 Using where
3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where
Warnings:
......
......@@ -1181,7 +1181,7 @@ a
explain extended
select a from t1 group by a having a in (select c from t2 where d >= 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by
1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
......@@ -1193,7 +1193,7 @@ create index iab on t1(a, b);
explain extended
select a from t1 group by a having a in (select c from t2 where d >= 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by
1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`))))
......@@ -1205,7 +1205,7 @@ explain extended
select a from t1 group by a
having a in (select c from t2 where d >= some(select e from t3 where max(b)=e));
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 range NULL iab 4 NULL 7 100.00 Using index for group-by
1 PRIMARY t1 index NULL iab 8 NULL 7 100.00 Using index
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 7 100.00 Using where
3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where
Warnings:
......
......@@ -146,7 +146,7 @@ count(distinct c)
3
explain select count(distinct c) from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning)
###
### filesort & range-based utils
###
......
......@@ -146,7 +146,7 @@ count(distinct c)
3
explain select count(distinct c) from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning)
###
### filesort & range-based utils
###
......
......@@ -135,7 +135,7 @@ count(distinct c)
3
explain select count(distinct c) from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning)
###
### filesort & range-based utils
###
......
......@@ -133,7 +133,7 @@ count(distinct c)
3
explain select count(distinct c) from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by
1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning)
###
### filesort & range-based utils
###
......
......@@ -13728,6 +13728,12 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
The group-by list is a permutation of the select attributes, according
to their order in the index.
EXAMPLES of handled queries
select max(keypart2) from t1 group by keypart1
select max(keypart2) from t1 where keypart2 <= const group by keypart1
select distinct keypart1 from table;
select count(distinct keypart1) from table;
TODO
- What happens if the query groups by the MIN/MAX field, and there is no
other field as in: "select MY_MIN(a) from t1 group by a" ?
......@@ -14820,7 +14826,22 @@ get_field_keypart(KEY *index, Field *field)
This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
the number of rows returned.
The used algorithm used for finding the rows is:
For each range (if no ranges, the range is the whole table)
Do an index search for the start of the range
As long as the found key is withing the range
If the found row matches the where clause, use the row otherwise skip it
Scan index for next group, jumping over all identical keys, done in
QUICK_GROUP_MIN_MAX_SELECT::next_prefix
If the engine does not support a native next_prefix, we will
either scan the index for the next value or do a new index dive
with 'find next bigger key'.
NOTES
See get_best_group_min_max() for which kind of queries this function
will be called.
The cost computation distinguishes several cases:
1) No equality predicates over non-group attributes (thus no key_infix).
If groups are bigger than blocks on the average, then we assume that it
......@@ -14887,17 +14908,18 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
if (!group_key_parts)
{
/* Summary over the whole table */
keys_per_group= table_records;
keys_per_group= MY_MAX(table_records,1);
}
else
{
keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts -
1);
if (keys_per_group == 0) /* If there is no statistics try to guess */
{
/* each group contains 10% of all records */
keys_per_group= (table_records / 10) + 1;
}
}
if (keys_per_group == 0) /* If there is no statistics try to guess */
/* each group contains 10% of all records */
keys_per_group= (table_records / 10) + 1;
num_groups= (table_records / keys_per_group) + 1;
/* Apply the selectivity of the quick select for group prefixes. */
......@@ -14906,7 +14928,21 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
quick_prefix_selectivity= (double) quick_prefix_records /
(double) table_records;
num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity);
set_if_bigger(num_groups, 1);
/*
Expect at least as many groups as there is ranges in the index
WHERE a in (1,2,3) GROUP BY a
This is mostly relevant for queries with few records, which is
something we have a lot of in our test suites.
In theory it is possible to scan the index_tree and for cases
where all ranges are eq ranges, we could calculate the exact number
of groups. This is probably an overkill so for now we estimate
the lower level of number of groups by the range elements in the
tree.
*/
set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1));
/* There cannot be more groups than matched records */
set_if_smaller(num_groups, quick_prefix_records);
}
/* Ensure we don't have more groups than rows in table */
set_if_smaller(num_groups, table_records);
......@@ -14943,21 +14979,22 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
group. To make the CPU cost reflect this, we estimate the CPU cost
as the sum of:
1. Cost for evaluating the condition for each num_group
(1/TIME_FOR_COMPARE) (similarly as for index scan).
(1/TIME_FOR_COMPARE_IDX) (similarly as for index scan).
2. Cost for navigating the index structure (assuming a b-tree).
Note: We only add the cost for one index comparision per block. For a
b-tree the number of comparisons will be larger. However the cost
is low as all of the upper level b-tree blocks should be in
memory.
TODO: This cost should be provided by the storage engine.
3. Cost for comparing the row with the where clause
*/
const double tree_traversal_cost=
ceil(log(static_cast<double>(table_records))/
log(static_cast<double>(keys_per_block))) *
1/(2*TIME_FOR_COMPARE);
1/(TIME_FOR_COMPARE_IDX);
const double cpu_cost= (num_groups *
(tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX));
(tree_traversal_cost + 1/TIME_FOR_COMPARE));
*read_cost= io_cost + cpu_cost;
*records= num_groups;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment