Commit 18ad3bdc authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

Fixes for multiple problems/bugs/test failures that resulted from moving
subquery optimization from the execution phase to the optimization phase.
parent aa195b25
......@@ -947,6 +947,8 @@ public:
virtual bool register_field_in_read_map(uchar *arg) { return 0; }
virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
virtual bool eliminate_subselect_processor(uchar *arg) { return 0; }
virtual bool set_fake_select_as_master_processor(uchar *arg) { return 0; }
/*
The next function differs from the previous one that a bitmap to be updated
is passed as uchar *arg.
......
......@@ -1964,6 +1964,18 @@ Item *Item_in_optimizer::transform(Item_transformer transformer, uchar *argument
}
bool Item_in_optimizer::is_expensive_processor(uchar *arg)
{
return args[1]->is_expensive_processor(arg);
}
bool Item_in_optimizer::is_expensive()
{
return args[1]->is_expensive();
}
longlong Item_func_eq::val_int()
{
DBUG_ASSERT(fixed == 1);
......@@ -5342,7 +5354,7 @@ Item *Item_func_nop_all::neg_transformer(THD *thd)
/* "NOT (e $cmp$ ANY (SELECT ...)) -> e $rev_cmp$" ALL (SELECT ...) */
Item_func_not_all *new_item= new Item_func_not_all(args[0]);
Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
allany->func= allany->func_creator(FALSE);
allany->create_comp_func(FALSE);
allany->all= !allany->all;
allany->upper_item= new_item;
return new_item;
......@@ -5354,7 +5366,7 @@ Item *Item_func_not_all::neg_transformer(THD *thd)
Item_func_nop_all *new_item= new Item_func_nop_all(args[0]);
Item_allany_subselect *allany= (Item_allany_subselect*)args[0];
allany->all= !allany->all;
allany->func= allany->func_creator(TRUE);
allany->create_comp_func(TRUE);
allany->upper_item= new_item;
return new_item;
}
......
......@@ -265,6 +265,8 @@ public:
void keep_top_level_cache();
Item *transform(Item_transformer transformer, uchar *arg);
virtual Item *expr_cache_insert_transformer(uchar *thd_arg);
bool is_expensive_processor(uchar *arg);
bool is_expensive();
};
class Comp_creator
......
This diff is collapsed.
......@@ -108,6 +108,9 @@ public:
/* subquery is transformed */
bool changed;
/* TIMOUR: this is temporary, remove it. */
bool is_min_max_optimized;
/* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
bool is_correlated;
......@@ -180,6 +183,8 @@ public:
enum_parsing_place place() { return parsing_place; }
bool walk(Item_processor processor, bool walk_subquery, uchar *arg);
bool mark_as_eliminated_processor(uchar *arg);
bool eliminate_subselect_processor(uchar *arg);
bool set_fake_select_as_master_processor(uchar *arg);
bool enumerate_field_refs_processor(uchar *arg);
bool check_vcol_func_processor(uchar *int_arg)
{
......@@ -326,8 +331,6 @@ public:
class Item_in_subselect :public Item_exists_subselect
{
public:
Item *left_expr;
protected:
/*
Cache of the left operand of the subquery predicate. Allocated in the
......@@ -350,10 +353,30 @@ protected:
Item_in_optimizer *optimizer;
bool was_null;
bool abort_on_null;
public:
/* Used to trigger on/off conditions that were pushed down to subselect */
bool *pushed_cond_guards;
Comp_creator *func;
protected:
bool init_cond_guards();
trans_res select_in_like_transformer(JOIN *join);
trans_res single_value_transformer(JOIN *join);
trans_res row_value_transformer(JOIN * join);
trans_res create_single_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
trans_res create_row_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
public:
Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */
int sj_convert_priority;
/*
......@@ -410,7 +433,8 @@ public:
Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), upper_item(0)
pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED),
upper_item(0)
{}
void cleanup();
subs_type substype() { return IN_SUBS; }
......@@ -422,28 +446,8 @@ public:
was_null= 0;
}
trans_res select_transformer(JOIN *join);
trans_res select_in_like_transformer(JOIN *join, Comp_creator *func);
trans_res single_value_transformer(JOIN *join, Comp_creator *func);
trans_res row_value_transformer(JOIN * join);
trans_res single_value_in_to_exists_transformer(JOIN * join,
Comp_creator *func);
trans_res create_single_value_in_to_exists_cond(JOIN * join,
Comp_creator *func,
Item **where_term,
Item **having_term);
trans_res inject_single_value_in_to_exists_cond(JOIN * join,
Comp_creator *func,
Item *where_term,
Item *having_term);
trans_res row_value_in_to_exists_transformer(JOIN * join);
trans_res create_row_value_in_to_exists_cond(JOIN * join,
Item **where_term,
Item **having_term);
trans_res inject_row_value_in_to_exists_cond(JOIN * join,
Item *where_term,
Item *having_term);
bool create_in_to_exists_cond(JOIN * join_arg);
bool inject_in_to_exists_cond(JOIN * join_arg);
virtual bool exec();
longlong val_int();
......@@ -459,11 +463,12 @@ public:
bool fix_fields(THD *thd, Item **ref);
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
void update_used_tables();
bool setup_engine();
bool setup_mat_engine();
bool init_left_expr_cache();
/* Inform 'this' that it was computed, and contains a valid result. */
void set_first_execution() { if (first_execution) first_execution= FALSE; }
bool is_expensive_processor(uchar *arg);
bool is_expensive() { return TRUE; }
bool expr_cache_is_needed(THD *thd);
/*
......@@ -485,7 +490,6 @@ class Item_allany_subselect :public Item_in_subselect
{
public:
chooser_compare_func_creator func_creator;
Comp_creator *func;
bool all;
Item_allany_subselect(Item * left_expr, chooser_compare_func_creator fc,
......@@ -494,6 +498,7 @@ public:
// only ALL subquery has upper not
subs_type substype() { return all?ALL_SUBS:ANY_SUBS; }
trans_res select_transformer(JOIN *join);
void create_comp_func(bool invert) { func= func_creator(invert); }
virtual void print(String *str, enum_query_type query_type);
};
......
......@@ -67,12 +67,15 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
{
THD *thd=join->thd;
st_select_lex *select_lex= join->select_lex;
st_select_lex_unit* parent_unit= select_lex->master_unit();
DBUG_ENTER("check_and_do_in_subquery_rewrites");
/*
If
1) this join is inside a subquery (of any type except FROM-clause
subquery) and
2) we aren't just normalizing a VIEW
3) The join and its select_lex object do not represent the 'fake'
select used to compute the result of a UNION.
Then perform early unconditional subquery transformations:
- Convert subquery predicate into semi-join, or
......@@ -85,7 +88,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
*/
Item_subselect *subselect;
if (!thd->lex->view_prepare_mode && // (1)
(subselect= select_lex->master_unit()->item)) // (2)
(subselect= parent_unit->item))// && // (2)
// select_lex == parent_unit->fake_select_lex) // (3)
{
Item_in_subselect *in_subs= NULL;
if (subselect->substype() == Item_subselect::IN_SUBS)
......@@ -129,6 +133,9 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
if (failure)
DBUG_RETURN(-1); /* purecov: deadcode */
}
if (select_lex == parent_unit->fake_select_lex)
DBUG_RETURN(0);
DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
/*
Check if we're in subquery that is a candidate for flattening into a
......@@ -154,7 +161,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
!join->having && !select_lex->with_sum_func && // 4
thd->thd_marker.emb_on_expr_nest && // 5
select_lex->outer_select()->join && // 6
select_lex->master_unit()->first_select()->leaf_tables && // 7
parent_unit->first_select()->leaf_tables && // 7
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
select_lex->outer_select()->leaf_tables && // 9
!((join->select_options | // 10
......@@ -212,7 +219,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&
in_subs && // 1
!select_lex->is_part_of_union() && // 2
select_lex->master_unit()->first_select()->leaf_tables && // 3
parent_unit->first_select()->leaf_tables && // 3
thd->lex->sql_command == SQLCOM_SELECT && // *
select_lex->outer_select()->leaf_tables && // 3A
subquery_types_allow_materialization(in_subs) &&
......@@ -223,17 +230,29 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
!in_subs->is_correlated && // 5
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
{
/*
Materialization is possible, later the optimize phase of each
subquery will choose either materialization or in-to-exists based
on cost.
*/
in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
}
else if (in_subs)
{
/* Materialization is not possible at all. */
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
}
/*
Transform each subquery predicate according to its overloaded
transformer.
*/
Item_subselect::trans_res trans_res;
if ((trans_res= subselect->select_transformer(join)) !=
Item_subselect::RES_OK)
{
DBUG_RETURN((trans_res == Item_subselect::RES_ERROR));
}
}
}
DBUG_RETURN(0);
}
......@@ -3505,3 +3524,56 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
}
bool JOIN::choose_subquery_plan()
{
double mat_strategy_cost; /* The cost to compute IN via materialization. */
double in_exists_strategy_cost; /* The cost of the IN->EXISTS strategy. */
bool res;
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
DBUG_ASSERT(select_lex->master_unit()->item &&
(select_lex->master_unit()->item->substype() ==
Item_subselect::IN_SUBS ||
select_lex->master_unit()->item->substype() ==
Item_subselect::ALL_SUBS ||
select_lex->master_unit()->item->substype() ==
Item_subselect::ANY_SUBS));
Item_in_subselect *in_subs= (Item_in_subselect*)
select_lex->master_unit()->item;
/* Always revert to IN->EXISTS. */
mat_strategy_cost= 1;
in_exists_strategy_cost= 0;
if (mat_strategy_cost < in_exists_strategy_cost)
{
in_subs->exec_method = Item_in_subselect::MATERIALIZATION;
if (in_subs->setup_mat_engine())
{
/*
In some cases it is not possible to create usable indexes for the
materialization strategy, so fall back to IN->EXISTS.
*/
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
}
}
else
in_subs->exec_method= Item_in_subselect::IN_TO_EXISTS;
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION)
{
// TODO: should we unset the UNCACHEABLE_DEPENDENT flag fro
// select_lex->uncacheable; ?
// This affects how we execute JOIN::join_free - full or not.
// inner_join->restore_plan (keyuse, best_positions, best_read)
;
}
else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS)
res= in_subs->inject_in_to_exists_cond(this);
else
DBUG_ASSERT(FALSE);
return res;
}
......@@ -2630,6 +2630,7 @@ void Query_arena::free_items()
for (; free_list; free_list= next)
{
next= free_list->next;
DBUG_ASSERT(free_list != next);
free_list->delete_self();
}
/* Postcondition: free_list is 0 */
......
......@@ -92,6 +92,10 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
}
}
/* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
if (select_lex->optimize_unflattened_subqueries())
DBUG_RETURN(TRUE);
const_cond= (!conds || conds->const_item());
safe_update=test(thd->options & OPTION_SAFE_UPDATES);
if (safe_update && const_cond)
......
......@@ -1734,17 +1734,29 @@ void st_select_lex_node::fast_exclude()
}
/*
excluding select_lex structure (except first (first select can't be
Exclude a node from the tree lex structure, but leave it in the global
list of nodes.
*/
void st_select_lex_node::exclude_from_tree()
{
if ((*prev= next))
next->prev= prev;
}
/*
Exclude select_lex structure (except first (first select can't be
deleted, because it is most upper select))
*/
void st_select_lex_node::exclude()
{
//exclude from global list
/* exclude from global list */
fast_exclude();
//exclude from other structures
if ((*prev= next))
next->prev= prev;
/* exclude from other structures */
exclude_from_tree();
/*
We do not need following statements, because prev pointer of first
list element point to master->slave
......@@ -3076,6 +3088,46 @@ bool st_select_lex::add_index_hint (THD *thd, char *str, uint length)
str, length));
}
bool st_select_lex::optimize_unflattened_subqueries()
{
for (SELECT_LEX_UNIT *un= first_inner_unit(); un; un= un->next_unit())
{
Item_subselect *subquery_predicate= un->item;
if (subquery_predicate)
{
Item_in_subselect *item_in= NULL;
if (subquery_predicate->substype() == Item_subselect::IN_SUBS ||
subquery_predicate->substype() == Item_subselect::ALL_SUBS ||
subquery_predicate->substype() == Item_subselect::ANY_SUBS)
item_in= (Item_in_subselect*) subquery_predicate;
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{
JOIN *inner_join= sl->join;
SELECT_LEX *save_select= un->thd->lex->current_select;
int res;
/*
Make sure that we do not create IN->EXISTS conditions for
subquery predicates that were substituted by Item_maxmin_subselect
or by Item_singlerow_subselect.
*/
DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized));
if (item_in && item_in->create_in_to_exists_cond(inner_join))
return TRUE;
un->set_limit(un->global_parameters);
un->thd->lex->current_select= sl;
res= inner_join->optimize();
un->thd->lex->current_select= save_select;
if (res)
return TRUE;
}
}
}
return FALSE;
}
/**
A routine used by the parser to decide whether we are specifying a full
partitioning or if only partitions to add or to split.
......@@ -3093,4 +3145,3 @@ bool st_lex::is_partition_management() const
(alter_info.flags == ALTER_ADD_PARTITION ||
alter_info.flags == ALTER_REORGANIZE_PARTITION));
}
......@@ -439,6 +439,7 @@ public:
st_select_lex_node(): linkage(UNSPECIFIED_TYPE) {}
virtual ~st_select_lex_node() {}
inline st_select_lex_node* get_master() { return master; }
inline void set_master(st_select_lex_node* master_arg) { master= master_arg; }
virtual void init_query();
virtual void init_select();
void include_down(st_select_lex_node *upper);
......@@ -446,6 +447,7 @@ public:
void include_standalone(st_select_lex_node *sel, st_select_lex_node **ref);
void include_global(st_select_lex_node **plink);
void exclude();
void exclude_from_tree();
virtual st_select_lex_unit* master_unit()= 0;
virtual st_select_lex* outer_select()= 0;
......@@ -839,6 +841,12 @@ public:
void clear_index_hints(void) { index_hints= NULL; }
bool is_part_of_union() { return master_unit()->is_union(); }
/*
Optimize all subqueries that have not been flattened into semi-joins.
This functionality is a method of SELECT_LEX instead of JOIN because
some SQL statements as DELETE do not have a corresponding JOIN object.
*/
bool optimize_unflattened_subqueries();
private:
/* current index hint kind. used in filling up index_hints */
enum index_hint_type current_index_hint_type;
......
......@@ -926,9 +926,27 @@ JOIN::optimize()
{
DBUG_PRINT("info",("No tables"));
error= 0;
/* Create all structures needed for materialized subquery execution. */
if (setup_subquery_materialization())
if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
if (in_to_exists_where || in_to_exists_having)
{
/*
TIMOUR: TODO: refactor this block and JOIN::choose_subquery_plan
*/
Item_in_subselect *in_subs= (Item_in_subselect*)
select_lex->master_unit()->item;
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION)
; // setup materialized execution structures
else if (in_subs->exec_method == Item_in_subselect::IN_TO_EXISTS)
{
if (in_subs->inject_in_to_exists_cond(this))
DBUG_RETURN(1);
tmp_having= having;
}
else
DBUG_ASSERT(FALSE);
}
DBUG_RETURN(0);
}
error= -1; // Error is sent to client
......@@ -1286,7 +1304,7 @@ JOIN::optimize()
init_ftfuncs(thd, select_lex, test(order));
/* Create all structures needed for materialized subquery execution. */
if (setup_subquery_materialization())
if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
int res;
......@@ -1381,6 +1399,34 @@ JOIN::optimize()
if (join_tab->is_using_loose_index_scan())
tmp_table_param.precomputed_group_by= TRUE;
error= 0;
DBUG_RETURN(0);
setup_subq_exit:
/*
Even with zero matching rows, subqueries in the HAVING clause may
need to be evaluated if there are aggregate functions in the query.
If we planned to materialize the subquery, we need to set it up
properly before prematurely leaving optimize().
*/
if (optimize_unflattened_subqueries())
DBUG_RETURN(1);
error= 0;
DBUG_RETURN(0);
}
/**
Create and initialize objects neeed for the execution of a query plan.
*/
int JOIN::init_execution()
{
DBUG_ENTER("JOIN::init_execution");
DBUG_ASSERT(optimized);
initialized= true;
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
{
......@@ -1499,19 +1545,6 @@ JOIN::optimize()
DBUG_RETURN(-1); /* purecov: inspected */
}
error= 0;
DBUG_RETURN(0);
setup_subq_exit:
/*
Even with zero matching rows, subqueries in the HAVING clause may
need to be evaluated if there are aggregate functions in the
query. If we have planned to materialize the subquery, we need to
set it up properly before prematurely leaving optimize().
*/
if (setup_subquery_materialization())
DBUG_RETURN(1);
error= 0;
DBUG_RETURN(0);
}
......@@ -1775,6 +1808,9 @@ JOIN::exec()
int tmp_error;
DBUG_ENTER("JOIN::exec");
if (!initialized && init_execution())
DBUG_VOID_RETURN;
thd_proc_info(thd, "executing");
error= 0;
if (procedure)
......@@ -2604,25 +2640,9 @@ err:
@retval TRUE error occurred.
*/
bool JOIN::setup_subquery_materialization()
bool JOIN::optimize_unflattened_subqueries()
{
for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un;
un= un->next_unit())
{
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{
Item_subselect *subquery_predicate= sl->master_unit()->item;
if (subquery_predicate &&
subquery_predicate->substype() == Item_subselect::IN_SUBS)
{
Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate;
if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION &&
in_subs->setup_engine())
return TRUE;
}
}
}
return FALSE;
return select_lex->optimize_unflattened_subqueries();
}
......@@ -3143,6 +3163,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
sizeof(POSITION)*join->const_tables);
join->best_read=1.0;
}
if ((join->in_to_exists_where || join->in_to_exists_having)
&& join->choose_subquery_plan())
goto error;
/* Generate an execution plan from the found optimal join order. */
DBUG_RETURN(join->thd->killed || get_best_combination(join));
......
......@@ -1573,8 +1573,15 @@ public:
bool union_part; ///< this subselect is part of union
bool optimized; ///< flag to avoid double optimization in EXPLAIN
bool initialized; ///< flag to avoid double init_execution calls
Array<Item_in_subselect> sj_subselects;
/*
Additional WHERE and HAVING predicates to be considered for IN=>EXISTS
subquery transformation of a JOIN object.
*/
Item *in_to_exists_where;
Item *in_to_exists_having;
/* Temporary tables used to weed-out semi-join duplicates */
List<TABLE> sj_tmp_tables;
......@@ -1649,6 +1656,7 @@ public:
ref_pointer_array_size= 0;
zero_result_cause= 0;
optimized= 0;
initialized= 0;
cond_equal= 0;
group_optimized_away= 0;
......@@ -1662,6 +1670,8 @@ public:
no_const_tables= FALSE;
first_select= sub_select;
in_to_exists_where= NULL;
in_to_exists_having= NULL;
}
int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
......@@ -1670,12 +1680,13 @@ public:
SELECT_LEX_UNIT *unit);
int optimize();
int reinit();
int init_execution();
void exec();
int destroy();
void restore_tmp();
bool alloc_func_list();
bool flatten_subqueries();
bool setup_subquery_materialization();
bool optimize_unflattened_subqueries();
bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
bool before_group_by, bool recompute= FALSE);
......@@ -1735,6 +1746,7 @@ public:
NULL : join_tab+const_tables;
}
bool setup_subquery_caches();
bool choose_subquery_plan();
private:
/**
TRUE if the query contains an aggregate function but has no GROUP
......
......@@ -186,6 +186,8 @@ st_select_lex_unit::init_prepare_fake_select_lex(THD *thd_arg)
{
(*order->item)->walk(&Item::change_context_processor, 0,
(uchar*) &fake_select_lex->context);
(*order->item)->walk(&Item::set_fake_select_as_master_processor, 0,
(uchar*) fake_select_lex);
}
}
......@@ -271,6 +273,18 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit);
/*
Remove all references from the select_lex_units to the subqueries that
are inside the ORDER BY clause.
*/
if (can_skip_order_by)
{
for (ORDER *ord= (ORDER *)sl->order_list.first; ord; ord= ord->next)
{
(*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
}
}
saved_error= join->prepare(&sl->ref_pointer_array,
(TABLE_LIST*) sl->table_list.first,
sl->with_wild,
......
......@@ -290,6 +290,10 @@ int mysql_update(THD *thd,
DBUG_RETURN(1); /* purecov: inspected */
}
/* Apply the IN=>EXISTS transformation to all subqueries and optimize them. */
if (select_lex->optimize_unflattened_subqueries())
DBUG_RETURN(TRUE);
if (select_lex->inner_refs_list.elements &&
fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
DBUG_RETURN(1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment