Commit 8ec5e13f authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

  Phase 3: Implementation of re-optimization of subqueries with injected predicates
           and cost comparison between Materialization and IN->EXISTS strategies.

The commit contains the following known problems:
- The implementation of EXPLAIN has not been re-engineered to reflect the
  changes in subquery optimization. EXPLAIN for subqueries is called during
  the execute phase, which results in different code paths during JOIN::optimize
  and thus in differing EXPLAIN messages for constant/system tables.
- There are some valgrind warnings that need investigation
- Several EXPLAINs with minor differences need to be reconsidered after fixing
  the EXPLAIN problem above.

This patch also adds one extra optimizer_switch: 'in_to_exists' for complete
manual control of the subquery execution strategies.
parent 50888477
...@@ -840,7 +840,12 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22) ...@@ -840,7 +840,12 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0 1 0 0
2 0 0 2 0 0
11 0 0 11 0 0
# 2nd and 3rd columns should be same for x == 11 only # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12) x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 0 1 0 0
......
...@@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22) ...@@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0 1 0 0
2 0 0 2 0 0
11 0 0 11 0 0
# 2nd and 3rd columns should be same for x == 11 only # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12) x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 1 1 0 0
2 0 1 2 0 0
11 1 1 11 1 1
DROP TABLE t1; DROP TABLE t1;
# both columns should be same # both columns should be same
......
...@@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1; ...@@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1;
# The x alias is used below to workaround bug #40674. # The x alias is used below to workaround bug #40674.
# Regression tests for sum function on outer column in subselect from dual: # Regression tests for sum function on outer column in subselect from dual:
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
--echo # 2nd and 3rd columns should be same for x == 11 only --echo # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1; SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
DROP TABLE t1; DROP TABLE t1;
......
This diff is collapsed.
...@@ -108,9 +108,6 @@ class Item_subselect :public Item_result_field ...@@ -108,9 +108,6 @@ class Item_subselect :public Item_result_field
/* subquery is transformed */ /* subquery is transformed */
bool changed; bool changed;
/* TIMOUR: this is temporary, remove it. */
bool is_min_max_optimized;
/* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */ /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
bool is_correlated; bool is_correlated;
...@@ -121,6 +118,12 @@ class Item_subselect :public Item_result_field ...@@ -121,6 +118,12 @@ class Item_subselect :public Item_result_field
Item_subselect(); Item_subselect();
virtual subs_type substype() { return UNKNOWN_SUBS; } virtual subs_type substype() { return UNKNOWN_SUBS; }
bool is_in_predicate()
{
return (substype() == Item_subselect::IN_SUBS ||
substype() == Item_subselect::ALL_SUBS ||
substype() == Item_subselect::ANY_SUBS);
}
/* /*
We need this method, because some compilers do not allow 'this' We need this method, because some compilers do not allow 'this'
...@@ -314,6 +317,18 @@ class Item_exists_subselect :public Item_subselect ...@@ -314,6 +317,18 @@ class Item_exists_subselect :public Item_subselect
}; };
/*
Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison.
*/
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */
#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
/* Partial matching substrategies of MATERIALIZATION. */
#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
/** /**
Representation of IN subquery predicates of the form Representation of IN subquery predicates of the form
"left_expr IN (SELECT ...)". "left_expr IN (SELECT ...)".
...@@ -362,19 +377,13 @@ class Item_in_subselect :public Item_exists_subselect ...@@ -362,19 +377,13 @@ class Item_in_subselect :public Item_exists_subselect
trans_res select_in_like_transformer(JOIN *join); trans_res select_in_like_transformer(JOIN *join);
trans_res single_value_transformer(JOIN *join); trans_res single_value_transformer(JOIN *join);
trans_res row_value_transformer(JOIN * join); trans_res row_value_transformer(JOIN * join);
bool fix_having(Item *having, st_select_lex *select_lex);
trans_res create_single_in_to_exists_cond(JOIN * join, trans_res create_single_in_to_exists_cond(JOIN * join,
Item **where_item, Item **where_item,
Item **having_item); Item **having_item);
trans_res inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
trans_res create_row_in_to_exists_cond(JOIN * join, trans_res create_row_in_to_exists_cond(JOIN * join,
Item **where_item, Item **where_item,
Item **having_item); Item **having_item);
trans_res inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
public: public:
Item *left_expr; Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */ /* Priority of this predicate in the convert-to-semi-join-nest process. */
...@@ -407,14 +416,8 @@ class Item_in_subselect :public Item_exists_subselect ...@@ -407,14 +416,8 @@ class Item_in_subselect :public Item_exists_subselect
*/ */
bool sjm_scan_allowed; bool sjm_scan_allowed;
/* The method chosen to execute the IN predicate. */ /* A bitmap of possible execution strategies for an IN predicate. */
enum enum_exec_method { uchar in_strategy;
NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */
IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
MATERIALIZATION /* IN will be executed via subquery materialization. */
};
enum_exec_method exec_method;
bool *get_cond_guard(int i) bool *get_cond_guard(int i)
{ {
...@@ -433,7 +436,7 @@ class Item_in_subselect :public Item_exists_subselect ...@@ -433,7 +436,7 @@ class Item_in_subselect :public Item_exists_subselect
Item_in_subselect() Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0), is_constant(FALSE), optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED), pushed_cond_guards(NULL), func(NULL), in_strategy(0),
upper_item(0) upper_item(0)
{} {}
void cleanup(); void cleanup();
...@@ -446,8 +449,8 @@ class Item_in_subselect :public Item_exists_subselect ...@@ -446,8 +449,8 @@ class Item_in_subselect :public Item_exists_subselect
was_null= 0; was_null= 0;
} }
trans_res select_transformer(JOIN *join); trans_res select_transformer(JOIN *join);
bool create_in_to_exists_cond(JOIN * join_arg); bool create_in_to_exists_cond(JOIN *join_arg);
bool inject_in_to_exists_cond(JOIN * join_arg); bool inject_in_to_exists_cond(JOIN *join_arg);
virtual bool exec(); virtual bool exec();
longlong val_int(); longlong val_int();
......
...@@ -563,20 +563,20 @@ class Default_object_creation_ctx : public Object_creation_ctx ...@@ -563,20 +563,20 @@ class Default_object_creation_ctx : public Object_creation_ctx
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16 #define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
#define OPTIMIZER_SWITCH_FIRSTMATCH 32 #define OPTIMIZER_SWITCH_FIRSTMATCH 32
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64 #define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128 #define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256 #define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512 #define OPTIMIZER_SWITCH_SEMIJOIN 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024 #define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11) #define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11)
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12)
#ifdef DBUG_OFF #ifdef DBUG_OFF
# define OPTIMIZER_SWITCH_LAST (1<<12)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13) # define OPTIMIZER_SWITCH_LAST (1<<13)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif #endif
#ifdef DBUG_OFF #ifdef DBUG_OFF
...@@ -589,6 +589,7 @@ class Default_object_creation_ctx : public Object_creation_ctx ...@@ -589,6 +589,7 @@ class Default_object_creation_ctx : public Object_creation_ctx
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \ OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
...@@ -603,6 +604,7 @@ class Default_object_creation_ctx : public Object_creation_ctx ...@@ -603,6 +604,7 @@ class Default_object_creation_ctx : public Object_creation_ctx
OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \ OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \ OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
......
...@@ -304,7 +304,7 @@ static const char *optimizer_switch_names[]= ...@@ -304,7 +304,7 @@ static const char *optimizer_switch_names[]=
"index_merge","index_merge_union","index_merge_sort_union", "index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection", "index_merge_intersection",
"index_condition_pushdown", "index_condition_pushdown",
"firstmatch","loosescan","materialization", "semijoin", "firstmatch","loosescan","materialization","in_to_exists","semijoin",
"partial_match_rowid_merge", "partial_match_rowid_merge",
"partial_match_table_scan", "partial_match_table_scan",
"subquery_cache", "subquery_cache",
...@@ -325,6 +325,7 @@ static const unsigned int optimizer_switch_names_len[]= ...@@ -325,6 +325,7 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("firstmatch") - 1, sizeof("firstmatch") - 1,
sizeof("loosescan") - 1, sizeof("loosescan") - 1,
sizeof("materialization") - 1, sizeof("materialization") - 1,
sizeof("in_to_exists") - 1,
sizeof("semijoin") - 1, sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1, sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1, sizeof("partial_match_table_scan") - 1,
...@@ -412,6 +413,7 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on," ...@@ -412,6 +413,7 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"firstmatch=on," "firstmatch=on,"
"loosescan=on," "loosescan=on,"
"materialization=on," "materialization=on,"
"in_to_exists=on,"
"semijoin=on," "semijoin=on,"
"partial_match_rowid_merge=on," "partial_match_rowid_merge=on,"
"partial_match_table_scan=on," "partial_match_table_scan=on,"
...@@ -7233,7 +7235,7 @@ The minimum value for this variable is 4096.", ...@@ -7233,7 +7235,7 @@ The minimum value for this variable is 4096.",
{"optimizer_switch", OPT_OPTIMIZER_SWITCH, {"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, " "optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, " "index_merge_union, index_merge_sort_union, index_merge_intersection, "
"index_condition_pushdown, firstmatch, loosescan, materialization, " "index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, " "semijoin, partial_match_rowid_merge, partial_match_table_scan, "
"subquery_cache" "subquery_cache"
#ifndef DBUG_OFF #ifndef DBUG_OFF
......
This diff is collapsed.
...@@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION ...@@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION
eng "Unknown option '%-.64s'" eng "Unknown option '%-.64s'"
ER_BAD_OPTION_VALUE ER_BAD_OPTION_VALUE
eng "Incorrect value '%-.64s' for option '%-.64s'" eng "Incorrect value '%-.64s' for option '%-.64s'"
ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."
...@@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd, ...@@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd,
select_limit == 1, and there should be no offset_limit. select_limit == 1, and there should be no offset_limit.
*/ */
(((subs_type == Item_subselect::IN_SUBS) && (((subs_type == Item_subselect::IN_SUBS) &&
((Item_in_subselect*)item)->exec_method == ((Item_in_subselect*)item)->in_strategy &
Item_in_subselect::MATERIALIZATION) ? SUBS_MATERIALIZATION) ?
TRUE : TRUE :
(select_limit->val_int() == 1LL) && (select_limit->val_int() == 1LL) &&
offset_limit == 0)); offset_limit == 0));
...@@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened_subqueries() ...@@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened_subqueries()
Item_subselect *subquery_predicate= un->item; Item_subselect *subquery_predicate= un->item;
if (subquery_predicate) if (subquery_predicate)
{ {
Item_in_subselect *item_in= NULL;
if (subquery_predicate->substype() == Item_subselect::IN_SUBS ||
subquery_predicate->substype() == Item_subselect::ALL_SUBS ||
subquery_predicate->substype() == Item_subselect::ANY_SUBS)
item_in= (Item_in_subselect*) subquery_predicate;
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select()) for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{ {
JOIN *inner_join= sl->join; JOIN *inner_join= sl->join;
SELECT_LEX *save_select= un->thd->lex->current_select; SELECT_LEX *save_select= un->thd->lex->current_select;
int res; int res;
/*
Make sure that we do not create IN->EXISTS conditions for
subquery predicates that were substituted by Item_maxmin_subselect
or by Item_singlerow_subselect.
*/
DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized));
if (item_in && item_in->create_in_to_exists_cond(inner_join))
return TRUE;
/* We need only 1 row to determine existence */ /* We need only 1 row to determine existence */
un->set_limit(un->global_parameters); un->set_limit(un->global_parameters);
un->thd->lex->current_select= sl; un->thd->lex->current_select= sl;
......
This diff is collapsed.
...@@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(uint strategy) ...@@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(uint strategy)
class JOIN :public Sql_alloc class JOIN :public Sql_alloc
{ {
private:
JOIN(const JOIN &rhs); /**< not implemented */ JOIN(const JOIN &rhs); /**< not implemented */
JOIN& operator=(const JOIN &rhs); /**< not implemented */ JOIN& operator=(const JOIN &rhs); /**< not implemented */
protected:
/* Support for plan reoptimization with rewritten conditions. */
int reoptimize(Item *added_where, table_map join_tables,
POSITION *save_best_positions);
int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
public: public:
JOIN_TAB *join_tab,**best_ref; JOIN_TAB *join_tab,**best_ref;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
...@@ -1746,7 +1759,7 @@ class JOIN :public Sql_alloc ...@@ -1746,7 +1759,7 @@ class JOIN :public Sql_alloc
NULL : join_tab+const_tables; NULL : join_tab+const_tables;
} }
bool setup_subquery_caches(); bool setup_subquery_caches();
bool choose_subquery_plan(); bool choose_subquery_plan(table_map join_tables);
private: private:
/** /**
TRUE if the query contains an aggregate function but has no GROUP TRUE if the query contains an aggregate function but has no GROUP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment