Commit aeb62282 authored by Sergey Petrunya's avatar Sergey Petrunya

MDEV-5985: EITS: selectivity estimates look illogical for join and non-key equalities

Part#1. 

table_cond_selectivity() should discount selectivity of table' 
conditions only when ity counts that selectivity to begin with. 

For non-ref-based access methods (ALL/range/index_merge/etc),
we start with sel=1.0 and hence do not need to discount any
selectivities.
parent 349e31d5
...@@ -243,6 +243,40 @@ foo 2011-04-12 05:18:08 foo baz qux ...@@ -243,6 +243,40 @@ foo 2011-04-12 05:18:08 foo baz qux
bar 2013-09-19 11:37:03 bar baz qux bar 2013-09-19 11:37:03 bar baz qux
drop table t1,t2; drop table t1,t2;
# #
# MDEV-5985: EITS: selectivity estimates look illogical for join and non-key equalities
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t2 as select * from t1;
set histogram_size=100;
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
analyze table t2 persistent for all;
Table Op Msg_type Msg_text
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
# Filtered will be 4.95, 9.90
explain extended select * from t1 A, t2 B where A.a < 40 and B.a < 100;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE A ALL NULL NULL NULL NULL 1000 4.95 Using where
1 SIMPLE B ALL NULL NULL NULL NULL 1000 9.90 Using where; Using join buffer (flat, BNL join)
Warnings:
Note 1003 select `test`.`A`.`a` AS `a`,`test`.`B`.`a` AS `a` from `test`.`t1` `A` join `test`.`t2` `B` where ((`test`.`A`.`a` < 40) and (`test`.`B`.`a` < 100))
# Here, B.filtered should not become 100%:
explain extended select * from t1 A, t2 B where A.a < 40 and B.a < 100 and B.a=A.a;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE A ALL NULL NULL NULL NULL 1000 4.95 Using where
1 SIMPLE B ALL NULL NULL NULL NULL 1000 4.95 Using where; Using join buffer (flat, BNL join)
Warnings:
Note 1003 select `test`.`A`.`a` AS `a`,`test`.`B`.`a` AS `a` from `test`.`t1` `A` join `test`.`t2` `B` where ((`test`.`B`.`a` = `test`.`A`.`a`) and (`test`.`A`.`a` < 40) and (`test`.`A`.`a` < 100))
drop table t0,t1,t2;
#
# End of the test file # End of the test file
# #
set use_stat_tables= @save_use_stat_tables; set use_stat_tables= @save_use_stat_tables;
......
...@@ -189,6 +189,27 @@ select * from t1,t2 where t1.id = t2.t1_id and t2.f2='qux' and t2.f1='baz'; ...@@ -189,6 +189,27 @@ select * from t1,t2 where t1.id = t2.t1_id and t2.f2='qux' and t2.f1='baz';
drop table t1,t2; drop table t1,t2;
--echo #
--echo # MDEV-5985: EITS: selectivity estimates look illogical for join and non-key equalities
--echo #
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t2 as select * from t1;
set histogram_size=100;
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
analyze table t1 persistent for all;
analyze table t2 persistent for all;
--echo # Filtered will be 4.95, 9.90
explain extended select * from t1 A, t2 B where A.a < 40 and B.a < 100;
--echo # Here, B.filtered should not become 100%:
explain extended select * from t1 A, t2 B where A.a < 40 and B.a < 100 and B.a=A.a;
drop table t0,t1,t2;
--echo # --echo #
--echo # End of the test file --echo # End of the test file
--echo # --echo #
......
...@@ -7436,7 +7436,13 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7436,7 +7436,13 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
If the field f from the table is equal to a field from one the If the field f from the table is equal to a field from one the
earlier joined tables then the selectivity of the range conditions earlier joined tables then the selectivity of the range conditions
over the field f must be discounted. over the field f must be discounted.
We need to discount selectivity only if we're using ref-based
access method (and have sel!=1).
If we use ALL/range/index_merge, then sel==1, and no need to discount.
*/ */
if (pos->key != NULL)
{
for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
{ {
if (!bitmap_is_set(read_set, field->field_index) || if (!bitmap_is_set(read_set, field->field_index) ||
...@@ -7454,6 +7460,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, ...@@ -7454,6 +7460,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
} }
} }
} }
}
sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
keyparts, ref_keyuse_steps); keyparts, ref_keyuse_steps);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment