Commit 4a3f135a authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Added/corrected/improved comments.
parent 77c03bcf
...@@ -1139,7 +1139,7 @@ insert into t1 values (5); ...@@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1); explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table 2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2 group by b1); select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1) min(a1)
set @@optimizer_switch='default,materialization=off'; set @@optimizer_switch='default,materialization=off';
...@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off'; ...@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2); explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table 2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2); select min(a1) from t1 where 7 in (select b1 from t2);
min(a1) min(a1)
set @@optimizer_switch='default,materialization=off'; set @@optimizer_switch='default,materialization=off';
......
This diff is collapsed.
...@@ -319,7 +319,7 @@ public: ...@@ -319,7 +319,7 @@ public:
/* /*
Possible methods to execute an IN predicate. These are set by the optimizer Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison. based on user-set optimizer switches, semantic analysis and cost comparison.
*/ */
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */ #define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */ #define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
......
...@@ -185,6 +185,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) ...@@ -185,6 +185,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
else else
{ {
DBUG_PRINT("info", ("Subquery can't be converted to semi-join")); DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
/* Test if the user has set a legal combination of optimizer switches. */
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) && if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION)) !optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0)); my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
...@@ -3543,16 +3544,10 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where) ...@@ -3543,16 +3544,10 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
/** /**
Setup for execution all subqueries of a query, for which the optimizer Optimize all subqueries of a query that have were flattened into a semijoin.
chose hash semi-join.
@details Iterate over all immediate child subqueries of the query, and if @details
they are under an IN predicate, and the optimizer chose to compute it via Optimize all immediate children subqueries of a query.
materialization:
- optimize each subquery,
- choose an optimial execution strategy for the IN predicate - either
materialization, or an IN=>EXISTS transformation with an approriate
engine.
This phase must be called after substitute_for_best_equal_field() because This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality, that function may replace items with other items from a multiple equality,
...@@ -3570,6 +3565,42 @@ bool JOIN::optimize_unflattened_subqueries() ...@@ -3570,6 +3565,42 @@ bool JOIN::optimize_unflattened_subqueries()
} }
/**
Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
based on cost.
@param join_tables the set of tables joined in the subquery
@notes
The method chooses between the materialization and IN=>EXISTS rewrite
strategies for the execution of a non-flattened subquery IN predicate.
The cost-based decision is made as follows:
1. compute materialize_strategy_cost based on the unmodified subquery
2. reoptimize the subquery taking into account the IN-EXISTS predicates
3. compute in_exists_strategy_cost based on the reoptimized plan
4. compare and set the cheaper strategy
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_strategy = MATERIALIZATION
else
in_strategy = IN_TO_EXISTS
5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
revert to IN_TO_EXISTS
6. if (in_strategy == MATERIALIZATION)
revert the subquery plan to the original one before reoptimizing
else
inject the IN=>EXISTS predicates into the new EXISTS subquery plan
The implementation itself is a bit more complicated because it takes into
account two more factors:
- whether the user allowed both strategies through an optimizer_switch, and
- if materialization was the cheaper strategy, whether it can be executed
or not.
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::choose_subquery_plan(table_map join_tables) bool JOIN::choose_subquery_plan(table_map join_tables)
{ {
/* The original QEP of the subquery. */ /* The original QEP of the subquery. */
...@@ -3627,7 +3658,10 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3627,7 +3658,10 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
&outer_read_time, &outer_record_count); &outer_read_time, &outer_record_count);
else else
{ {
/* TODO: outer_join can be NULL for DELETE statements. */ /*
TODO: outer_join can be NULL for DELETE statements.
How to compute its cost?
*/
outer_read_time= 1; /* TODO */ outer_read_time= 1; /* TODO */
outer_record_count= 1; /* TODO */ outer_record_count= 1; /* TODO */
} }
...@@ -3694,13 +3728,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3694,13 +3728,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
} }
/* /*
If (1) materialization is a possible strategy based on static analysis If (1) materialization is a possible strategy based on semantic analysis
during the prepare phase, then if during the prepare phase, then if
(2) it is more expensive than the IN->EXISTS transformation, and (2) it is more expensive than the IN->EXISTS transformation, and
(3) it is not possible to create usable indexes for the materialization (3) it is not possible to create usable indexes for the materialization
strategy, strategy,
fall back to IN->EXISTS. fall back to IN->EXISTS.
otherwise use materialization. otherwise
use materialization.
*/ */
if (in_subs->in_strategy & SUBS_MATERIALIZATION && if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine()) in_subs->setup_mat_engine())
...@@ -3752,6 +3787,11 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -3752,6 +3787,11 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables) if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
{ {
/*
The subquery was not reoptimized either because the user allowed only the
IN-EXISTS strategy, or because materialization was not possible based on
semantic analysis. Clenup the original plan and reoptimize.
*/
for (uint i= 0; i < tables; i++) for (uint i= 0; i < tables; i++)
{ {
join_tab[i].keyuse= NULL; join_tab[i].keyuse= NULL;
......
...@@ -19264,8 +19264,18 @@ bool JOIN::change_result(select_result *res) ...@@ -19264,8 +19264,18 @@ bool JOIN::change_result(select_result *res)
/** /**
Save the original query execution plan so that the caller can revert to it Save a query execution plan so that the caller can revert to it if needed,
if needed. and reset the current query plan so that it can be reoptimized.
@param save_keyuse[out] a KEYUSE array to save JOIN::keyuse
@param save_best_positions[out] array to save JOIN::best_positions
@param save_join_tab_keyuse[out] array of KEYUSE pointers to save each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys[out] an array of bitmaps to save
each JOIN_TAB::checked_keys
@retval 0 OK
@retval 1 memory allocation error
*/ */
int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse, int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions, POSITION *save_best_positions,
...@@ -19298,8 +19308,14 @@ int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse, ...@@ -19298,8 +19308,14 @@ int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
/** /**
Restore the query plan saved before reoptimization with additional Restore a query plan previously saved by the caller.
conditions.
@param save_keyuse a KEYUSE array to restore into JOIN::keyuse
@param save_best_positions array to restore into JOIN::best_positions
@param save_join_tab_keyuse array of KEYUSE pointers to restore each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys an array of bitmaps to restore
each JOIN_TAB::checked_keys
*/ */
void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse, void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
...@@ -19328,8 +19344,29 @@ void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse, ...@@ -19328,8 +19344,29 @@ void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
/** /**
Reoptimize a query plan taking into account an additional conjunct to the Reoptimize a query plan taking into account an additional conjunct to the
WHERE clause. WHERE clause.
@param added_where An extra conjunct to the WHERE clause to reoptimize with
@param join_tables The set of tables to reoptimize
@param save_best_positions The join order of the original plan to restore to
if needed.
@notes
Given a query plan that already optimized taking into account some WHERE clause
'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. The
reoptimization works as follows:
1. Call update_ref_and_keys *only* for the new conditions 'added_where'
that are about to be injected into the query.
2. Expand if necessary the original KEYUSE array JOIN::keyuse to
accommodate the new REF accesses computed for the 'added_where' condition.
3. Add the new KEYUSEs into JOIN::keyuse.
4. Re-sort and re-filter the JOIN::keyuse array with the newly added
KEYUSE elements.
@retval 0 OK
@retval 1 memory allocation error
*/ */
int JOIN::reoptimize(Item *added_where, table_map join_tables, int JOIN::reoptimize(Item *added_where, table_map join_tables,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment