Commit ed3e6f66 authored by Sergei Petrunia's avatar Sergei Petrunia Committed by Oleksandr Byelkin

MDEV-26301: Split optimization refills: Optimizer Trace coverage

Add Optimizer Trace printouts.
parent ce7ffe61
...@@ -449,6 +449,11 @@ select * from v2 { ...@@ -449,6 +449,11 @@ select * from v2 {
} }
] ]
}, },
{
"check_split_materialized": {
"not_applicable": "no candidate field can be accessed through ref"
}
},
{ {
"best_join_order": ["t1"] "best_join_order": ["t1"]
}, },
...@@ -772,6 +777,11 @@ explain select * from v1 { ...@@ -772,6 +777,11 @@ explain select * from v1 {
} }
] ]
}, },
{
"check_split_materialized": {
"not_applicable": "group list has no candidates"
}
},
{ {
"best_join_order": ["t1"] "best_join_order": ["t1"]
}, },
...@@ -8861,5 +8871,109 @@ SET optimizer_trace=DEFAULT; ...@@ -8861,5 +8871,109 @@ SET optimizer_trace=DEFAULT;
DROP VIEW v; DROP VIEW v;
DROP TABLE t; DROP TABLE t;
# #
# MDEV-26301: Split optimization improvements: Optimizer Trace coverage
#
create table t1(a int, b int);
insert into t1 select seq,seq from seq_1_to_5;
create table t2(a int, b int, key(a));
insert into t2
select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
create table t3(a int, b int, key(a));
insert into t3
select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
analyze table t1,t2,t3 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status Table is already up to date
test.t3 analyze status Engine-independent statistics collected
test.t3 analyze status Table is already up to date
create table t10 (
grp_id int,
col1 int,
key(grp_id)
);
insert into t10
select
A.seq,
B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
create table t11 (
col1 int,
col2 int
);
insert into t11
select A.seq, A.seq from seq_1_to_10 A;
analyze table t10,t11 persistent for all;
Table Op Msg_type Msg_text
test.t10 analyze status Engine-independent statistics collected
test.t10 analyze status Table is already up to date
test.t11 analyze status Engine-independent statistics collected
test.t11 analyze status OK
set optimizer_trace=1;
explain
select * from
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id) T on T.grp_id=t1.b;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5
1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
from information_schema.optimizer_trace;
JS
[
{
"split_candidates":
["t10.grp_id"]
}
]
select
json_detailed(
json_remove(
json_extract(trace, '$**.choose_best_splitting')
, '$[0].split_plan_search[0]'
)
) as JS
from information_schema.optimizer_trace;
JS
[
{
"considered_keys":
[
{
"table_name": "t10",
"index": "grp_id",
"rec_per_key": 100,
"param_tables": 1
}
],
"refills": 5,
"spl_pd_boundary": 2,
"split_plan_search":
[],
"lead_table": "t10",
"index": "grp_id",
"parts": 1,
"split_sel": 0.001,
"cost": 2536,
"records": 100,
"refills": 5,
"chosen": true
}
]
drop table t1,t2,t3,t10,t11;
set optimizer_trace=DEFAULT;
#
# End of 10.4 tests # End of 10.4 tests
# #
...@@ -696,6 +696,76 @@ SET optimizer_trace=DEFAULT; ...@@ -696,6 +696,76 @@ SET optimizer_trace=DEFAULT;
DROP VIEW v; DROP VIEW v;
DROP TABLE t; DROP TABLE t;
--echo #
--echo # MDEV-26301: Split optimization improvements: Optimizer Trace coverage
--echo #
# 5 values
create table t1(a int, b int);
insert into t1 select seq,seq from seq_1_to_5;
# 5 value groups of size 2 each
create table t2(a int, b int, key(a));
insert into t2
select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
# 5 value groups of size 3 each
create table t3(a int, b int, key(a));
insert into t3
select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
analyze table t1,t2,t3 persistent for all;
create table t10 (
grp_id int,
col1 int,
key(grp_id)
);
# 100 groups of 100 values each
insert into t10
select
A.seq,
B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
# and X10 multiplier
create table t11 (
col1 int,
col2 int
);
insert into t11
select A.seq, A.seq from seq_1_to_10 A;
analyze table t10,t11 persistent for all;
set optimizer_trace=1;
explain
select * from
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id) T on T.grp_id=t1.b;
select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
from information_schema.optimizer_trace;
select
json_detailed(
json_remove(
json_extract(trace, '$**.choose_best_splitting')
, '$[0].split_plan_search[0]'
)
) as JS
from information_schema.optimizer_trace;
drop table t1,t2,t3,t10,t11;
set optimizer_trace=DEFAULT;
--echo # --echo #
--echo # End of 10.4 tests --echo # End of 10.4 tests
--echo # --echo #
...@@ -228,6 +228,7 @@ ...@@ -228,6 +228,7 @@
#include "mariadb.h" #include "mariadb.h"
#include "sql_select.h" #include "sql_select.h"
#include "opt_trace.h"
/* Info on a splitting field */ /* Info on a splitting field */
struct SplM_field_info struct SplM_field_info
...@@ -387,6 +388,9 @@ bool JOIN::check_for_splittable_materialized() ...@@ -387,6 +388,9 @@ bool JOIN::check_for_splittable_materialized()
if (!partition_list) if (!partition_list)
return false; return false;
Json_writer_object trace_wrapper(thd);
Json_writer_object trace_split(thd, "check_split_materialized");
ORDER *ord; ORDER *ord;
Dynamic_array<SplM_field_ext_info> candidates; Dynamic_array<SplM_field_ext_info> candidates;
...@@ -432,7 +436,10 @@ bool JOIN::check_for_splittable_materialized() ...@@ -432,7 +436,10 @@ bool JOIN::check_for_splittable_materialized()
} }
} }
if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2) if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2)
{
trace_split.add("not_applicable", "group list has no candidates");
return false; return false;
}
/* /*
For each table from this join find the keys that can be used for ref access For each table from this join find the keys that can be used for ref access
...@@ -491,7 +498,11 @@ bool JOIN::check_for_splittable_materialized() ...@@ -491,7 +498,11 @@ bool JOIN::check_for_splittable_materialized()
} }
if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9) if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9)
{
trace_split.add("not_applicable",
"no candidate field can be accessed through ref");
return false; return false;
}
/* /*
Create a structure of the type SplM_opt_info and fill it with Create a structure of the type SplM_opt_info and fill it with
...@@ -509,10 +520,15 @@ bool JOIN::check_for_splittable_materialized() ...@@ -509,10 +520,15 @@ bool JOIN::check_for_splittable_materialized()
spl_opt_info->tables_usable_for_splitting= 0; spl_opt_info->tables_usable_for_splitting= 0;
spl_opt_info->spl_field_cnt= spl_field_cnt; spl_opt_info->spl_field_cnt= spl_field_cnt;
spl_opt_info->spl_fields= spl_field; spl_opt_info->spl_fields= spl_field;
{
Json_writer_array trace_range(thd, "split_candidates");
for (cand= cand_start; cand < cand_end; cand++) for (cand= cand_start; cand < cand_end; cand++)
{ {
if (!cand->is_usable_for_ref_access) if (!cand->is_usable_for_ref_access)
continue; continue;
trace_range.add(cand->producing_item);
spl_field->producing_item= cand->producing_item; spl_field->producing_item= cand->producing_item;
spl_field->underlying_field= cand->underlying_field; spl_field->underlying_field= cand->underlying_field;
spl_field->mat_field= cand->mat_field; spl_field->mat_field= cand->mat_field;
...@@ -520,6 +536,7 @@ bool JOIN::check_for_splittable_materialized() ...@@ -520,6 +536,7 @@ bool JOIN::check_for_splittable_materialized()
cand->underlying_field->table->map; cand->underlying_field->table->map;
spl_field++; spl_field++;
} }
}
/* Attach this info to the table T */ /* Attach this info to the table T */
derived->table->set_spl_opt_info(spl_opt_info); derived->table->set_spl_opt_info(spl_opt_info);
...@@ -773,7 +790,7 @@ void JOIN::add_keyuses_for_splitting() ...@@ -773,7 +790,7 @@ void JOIN::add_keyuses_for_splitting()
bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end)); bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end));
if (ext_keyuses_for_splitting->push(keyuse_ext_end)) if (ext_keyuses_for_splitting->push(keyuse_ext_end))
goto err; goto err;
// psergey-todo: trace anything here?
spl_opt_info->unsplit_card= join_record_count; spl_opt_info->unsplit_card= join_record_count;
rec_len= table->s->rec_buff_length; rec_len= table->s->rec_buff_length;
...@@ -946,7 +963,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -946,7 +963,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
uint best_key= 0; uint best_key= 0;
uint best_key_parts= 0; uint best_key_parts= 0;
table_map best_param_tables; table_map best_param_tables;
Json_writer_object trace_obj(thd, "choose_best_splitting");
Json_writer_array trace_arr(thd, "considered_keys");
/* /*
Check whether there are keys that can be used to join T employing splitting Check whether there are keys that can be used to join T employing splitting
and if so, select the best out of such keys and if so, select the best out of such keys
...@@ -999,6 +1017,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -999,6 +1017,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
best_rec_per_key= rec_per_key; best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start; best_key_keyuse_ext_start= key_keyuse_ext_start;
best_param_tables= needed_in_prefix; best_param_tables= needed_in_prefix;
// trace table, key_name, parts, needed_tables.
Json_writer_object cur_index(thd);
cur_index.
add("table_name", best_table->alias.ptr()).
add("index", best_table->key_info[best_key].name).
add("rec_per_key", best_rec_per_key).
add("param_tables", best_param_tables);
} }
keyuse_ext++; keyuse_ext++;
} }
...@@ -1006,6 +1031,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1006,6 +1031,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
} }
while (keyuse_ext->table == table); while (keyuse_ext->table == table);
} }
trace_arr.end();
spl_opt_info->last_plan= 0; spl_opt_info->last_plan= 0;
double refills= DBL_MAX; double refills= DBL_MAX;
table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables; table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables;
...@@ -1031,6 +1058,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1031,6 +1058,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
break; break;
} }
} }
trace_obj.add("refills", refills).
add("spl_pd_boundary", *spl_pd_boundary);
/* /*
The key for splitting was chosen, look for the plan for this key The key for splitting was chosen, look for the plan for this key
in the cache in the cache
...@@ -1042,11 +1073,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1042,11 +1073,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
The plan for the chosen key has not been found in the cache. The plan for the chosen key has not been found in the cache.
Build a new plan and save info on it in the cache Build a new plan and save info on it in the cache
*/ */
Json_writer_array wrapper(thd, "split_plan_search");
table_map all_table_map= (((table_map) 1) << join->table_count) - 1; table_map all_table_map= (((table_map) 1) << join->table_count) - 1;
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, excluded_tables, true); best_key, excluded_tables, true);
choose_plan(join, all_table_map & ~join->const_table_map); choose_plan(join, all_table_map & ~join->const_table_map);
wrapper.end();
/* /*
Check that the chosen plan is really a splitting plan. Check that the chosen plan is really a splitting plan.
If not or if there is not enough memory to save the plan in the cache If not or if there is not enough memory to save the plan in the cache
...@@ -1064,6 +1097,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1064,6 +1097,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
{ {
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, excluded_tables, false); best_key, excluded_tables, false);
trace_obj.add("split_plan_discarded", "constructed unapplicable query plan");
return 0; return 0;
} }
...@@ -1089,9 +1123,20 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1089,9 +1123,20 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table, reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, excluded_tables, false); best_key, excluded_tables, false);
} }
else
trace_obj.add("cached_plan_found", 1);
if (spl_plan) if (spl_plan)
{ {
trace_obj.
add("lead_table", spl_plan->table->alias.ptr()).
add("index", spl_plan->table->key_info[spl_plan->key].name).
add("parts", spl_plan->parts).
add("split_sel", spl_plan->split_sel).
add("cost", spl_plan->cost).
add("records", (ha_rows) (records * spl_plan->split_sel)).
add("refills", refills);
if (refills * spl_plan->cost < spl_opt_info->unsplit_cost) if (refills * spl_plan->cost < spl_opt_info->unsplit_cost)
{ {
/* /*
...@@ -1100,7 +1145,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1100,7 +1145,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
*/ */
spl_opt_info->last_plan= spl_plan; spl_opt_info->last_plan= spl_plan;
spl_opt_info->last_refills= refills; spl_opt_info->last_refills= refills;
trace_obj.add("chosen", true);
} }
else
trace_obj.add("chosen", false);
} }
} }
...@@ -1132,7 +1180,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, ...@@ -1132,7 +1180,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
@details @details
This function injects equalities pushed into a derived table T for which This function injects equalities pushed into a derived table T for which
the split optimization has been chosen by the optimizer. The function the split optimization has been chosen by the optimizer. The function
is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op(). is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt().
All equalities usable for splitting T whose right parts do not depend on All equalities usable for splitting T whose right parts do not depend on
any of the 'excluded_tables' can be pushed into the where clause of the any of the 'excluded_tables' can be pushed into the where clause of the
derived table T. derived table T.
......
...@@ -7444,7 +7444,6 @@ best_access_path(JOIN *join, ...@@ -7444,7 +7444,6 @@ best_access_path(JOIN *join,
DBUG_ENTER("best_access_path"); DBUG_ENTER("best_access_path");
Json_writer_object trace_wrapper(thd, "best_access_path"); Json_writer_object trace_wrapper(thd, "best_access_path");
Json_writer_array trace_paths(thd, "considered_access_paths");
bitmap_clear_all(eq_join_set); bitmap_clear_all(eq_join_set);
...@@ -7455,6 +7454,7 @@ best_access_path(JOIN *join, ...@@ -7455,6 +7454,7 @@ best_access_path(JOIN *join,
remaining_tables, remaining_tables,
&spl_pd_boundary); &spl_pd_boundary);
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse) if (s->keyuse)
{ /* Use key if possible */ { /* Use key if possible */
KEYUSE *keyuse; KEYUSE *keyuse;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment