Commit 4a3f135a authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Added/corrected/improved comments.
parent 77c03bcf
......@@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1)
set @@optimizer_switch='default,materialization=off';
......@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
select min(a1) from t1 where 7 in (select b1 from t2);
min(a1)
set @@optimizer_switch='default,materialization=off';
......
This diff is collapsed.
......@@ -319,7 +319,7 @@ class Item_exists_subselect :public Item_subselect
/*
Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison.
based on user-set optimizer switches, semantic analysis and cost comparison.
*/
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
......
......@@ -185,6 +185,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
else
{
DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
/* Test if the user has set a legal combination of optimizer switches. */
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) &&
!optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION))
my_error(ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES, MYF(0));
......@@ -3543,16 +3544,10 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
/**
Setup for execution all subqueries of a query, for which the optimizer
chose hash semi-join.
Optimize all subqueries of a query that have were flattened into a semijoin.
@details Iterate over all immediate child subqueries of the query, and if
they are under an IN predicate, and the optimizer chose to compute it via
materialization:
- optimize each subquery,
- choose an optimial execution strategy for the IN predicate - either
materialization, or an IN=>EXISTS transformation with an approriate
engine.
@details
Optimize all immediate children subqueries of a query.
This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality,
......@@ -3570,6 +3565,42 @@ bool JOIN::optimize_unflattened_subqueries()
}
/**
Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
based on cost.
@param join_tables the set of tables joined in the subquery
@notes
The method chooses between the materialization and IN=>EXISTS rewrite
strategies for the execution of a non-flattened subquery IN predicate.
The cost-based decision is made as follows:
1. compute materialize_strategy_cost based on the unmodified subquery
2. reoptimize the subquery taking into account the IN-EXISTS predicates
3. compute in_exists_strategy_cost based on the reoptimized plan
4. compare and set the cheaper strategy
if (materialize_strategy_cost >= in_exists_strategy_cost)
in_strategy = MATERIALIZATION
else
in_strategy = IN_TO_EXISTS
5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
revert to IN_TO_EXISTS
6. if (in_strategy == MATERIALIZATION)
revert the subquery plan to the original one before reoptimizing
else
inject the IN=>EXISTS predicates into the new EXISTS subquery plan
The implementation itself is a bit more complicated because it takes into
account two more factors:
- whether the user allowed both strategies through an optimizer_switch, and
- if materialization was the cheaper strategy, whether it can be executed
or not.
@retval FALSE success.
@retval TRUE error occurred.
*/
bool JOIN::choose_subquery_plan(table_map join_tables)
{
/* The original QEP of the subquery. */
......@@ -3627,7 +3658,10 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
&outer_read_time, &outer_record_count);
else
{
/* TODO: outer_join can be NULL for DELETE statements. */
/*
TODO: outer_join can be NULL for DELETE statements.
How to compute its cost?
*/
outer_read_time= 1; /* TODO */
outer_record_count= 1; /* TODO */
}
......@@ -3694,13 +3728,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
}
/*
If (1) materialization is a possible strategy based on static analysis
If (1) materialization is a possible strategy based on semantic analysis
during the prepare phase, then if
(2) it is more expensive than the IN->EXISTS transformation, and
(3) it is not possible to create usable indexes for the materialization
strategy,
fall back to IN->EXISTS.
otherwise use materialization.
otherwise
use materialization.
*/
if (in_subs->in_strategy & SUBS_MATERIALIZATION &&
in_subs->setup_mat_engine())
......@@ -3752,6 +3787,11 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
if (!in_exists_reoptimized && in_to_exists_where && const_tables != tables)
{
/*
The subquery was not reoptimized either because the user allowed only the
IN-EXISTS strategy, or because materialization was not possible based on
semantic analysis. Clenup the original plan and reoptimize.
*/
for (uint i= 0; i < tables; i++)
{
join_tab[i].keyuse= NULL;
......
......@@ -19264,8 +19264,18 @@ bool JOIN::change_result(select_result *res)
/**
Save the original query execution plan so that the caller can revert to it
if needed.
Save a query execution plan so that the caller can revert to it if needed,
and reset the current query plan so that it can be reoptimized.
@param save_keyuse[out] a KEYUSE array to save JOIN::keyuse
@param save_best_positions[out] array to save JOIN::best_positions
@param save_join_tab_keyuse[out] array of KEYUSE pointers to save each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys[out] an array of bitmaps to save
each JOIN_TAB::checked_keys
@retval 0 OK
@retval 1 memory allocation error
*/
int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
POSITION *save_best_positions,
......@@ -19298,8 +19308,14 @@ int JOIN::save_query_plan(DYNAMIC_ARRAY *save_keyuse,
/**
Restore the query plan saved before reoptimization with additional
conditions.
Restore a query plan previously saved by the caller.
@param save_keyuse a KEYUSE array to restore into JOIN::keyuse
@param save_best_positions array to restore into JOIN::best_positions
@param save_join_tab_keyuse array of KEYUSE pointers to restore each
JOIN_TAB::keyuse pointer
@param save_join_tab_checked_keys an array of bitmaps to restore
each JOIN_TAB::checked_keys
*/
void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
......@@ -19330,6 +19346,27 @@ void JOIN::restore_query_plan(DYNAMIC_ARRAY *save_keyuse,
/**
Reoptimize a query plan taking into account an additional conjunct to the
WHERE clause.
@param added_where An extra conjunct to the WHERE clause to reoptimize with
@param join_tables The set of tables to reoptimize
@param save_best_positions The join order of the original plan to restore to
if needed.
@notes
Given a query plan that already optimized taking into account some WHERE clause
'C', reoptimize this plan with a new WHERE clause 'C AND added_where'. The
reoptimization works as follows:
1. Call update_ref_and_keys *only* for the new conditions 'added_where'
that are about to be injected into the query.
2. Expand if necessary the original KEYUSE array JOIN::keyuse to
accommodate the new REF accesses computed for the 'added_where' condition.
3. Add the new KEYUSEs into JOIN::keyuse.
4. Re-sort and re-filter the JOIN::keyuse array with the newly added
KEYUSE elements.
@retval 0 OK
@retval 1 memory allocation error
*/
int JOIN::reoptimize(Item *added_where, table_map join_tables,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment