Commit 4329ec5d authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-30812: Improve output cardinality estimates for hash join

Introduce @@optimizer_switch flag: hash_join_cardinality

When it is on, use EITS statistics to produce tighter bounds for
hash join output cardinality.

Amended by Monty.

Reviewed by: Monty <monty@mariadb.org>
parent 2eb7bf1e
create table t1 (a int, b int, c int);
insert into t1 select seq,seq/2, seq/4 from seq_1_to_100;
create table t2 (a int, b int, c int);
insert into t2 select seq, seq/2, seq/4 from seq_1_to_200;
analyze table t1,t2 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
set optimizer_trace=1;
set join_cache_level=6;
set optimizer_switch='hash_join_cardinality=on';
explain select *
from t1, t2
where t1.a=t2.a and t1.a=t2.b and t1.c=t2.c;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 15 test.t1.a,test.t1.a,test.t1.c 200 Using where; Using join buffer (flat, BNLH join)
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
JS
[
{
"hash_join_columns":
[
{
"field": "a",
"avg_frequency": 1
},
{
"field": "b",
"avg_frequency": 2
},
{
"field": "c",
"avg_frequency": 3.9216
}
],
"rows": 1
}
]
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
ROWS_FOR_PLAN
[100]
explain select *
from t1, t2 where t1.c=t2.c;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 5 test.t1.c 200 Using where; Using join buffer (flat, BNLH join)
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
JS
[
{
"hash_join_columns":
[
{
"field": "c",
"avg_frequency": 3.9216
}
],
"rows": 3.9216
}
]
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
ROWS_FOR_PLAN
[392.16]
explain select *
from t1 straight_join t2 where t1.c=t2.c and t2.a<30;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 5 test.t1.c 200 Using where; Using join buffer (flat, BNLH join)
set @json= (select trace from information_schema.optimizer_trace);
# Note that rows is the same:
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
JS
[
{
"hash_join_columns":
[
{
"field": "c",
"avg_frequency": 3.9216
}
],
"rows": 3.9216
}
]
# Despite available selectivity:
select json_detailed(json_extract(@json, '$**.selectivity_for_columns')) as JS;
JS
[
[
{
"column_name": "a",
"ranges":
["NULL < a < 30"],
"selectivity_from_histogram": 0.1484375
}
]
]
drop table t1,t2;
--source include/have_sequence.inc
# Embedded doesn't have optimizer trace:
--source include/not_embedded.inc
create table t1 (a int, b int, c int);
insert into t1 select seq,seq/2, seq/4 from seq_1_to_100;
create table t2 (a int, b int, c int);
insert into t2 select seq, seq/2, seq/4 from seq_1_to_200;
analyze table t1,t2 persistent for all;
set optimizer_trace=1;
set join_cache_level=6;
set optimizer_switch='hash_join_cardinality=on';
explain select *
from t1, t2
where t1.a=t2.a and t1.a=t2.b and t1.c=t2.c;
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
explain select *
from t1, t2 where t1.c=t2.c;
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
explain select *
from t1 straight_join t2 where t1.c=t2.c and t2.a<30;
set @json= (select trace from information_schema.optimizer_trace);
--echo # Note that rows is the same:
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
--echo # Despite available selectivity:
select json_detailed(json_extract(@json, '$**.selectivity_for_columns')) as JS;
drop table t1,t2;
......@@ -734,7 +734,8 @@ The following specify which files/extra groups are read (specified before remain
extended_keys, exists_to_in, orderby_uses_equalities,
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
condition_pushdown_from_having, not_null_range_scan
condition_pushdown_from_having, not_null_range_scan,
hash_join_cardinality
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
......
......@@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch';
set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off';
-- Tracker : SESSION_TRACK_SYSTEM_VARIABLES
-- optimizer_switch
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off
Warnings:
Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release
......
......@@ -2299,7 +2299,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,default
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_TRACE
......
......@@ -2459,7 +2459,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,default
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_TRACE
......
......@@ -234,6 +234,7 @@
#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
#define OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY (1ULL << 36)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
......
This diff is collapsed.
......@@ -2756,6 +2756,7 @@ export const char *optimizer_switch_names[]=
"rowid_filter",
"condition_pushdown_from_having",
"not_null_range_scan",
"hash_join_cardinality",
"default",
NullS
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment