Commit 8ec5e13f authored by unknown's avatar unknown

MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

  Phase 3: Implementation of re-optimization of subqueries with injected predicates
           and cost comparison between Materialization and IN->EXISTS strategies.

The commit contains the following known problems:
- The implementation of EXPLAIN has not been re-engineered to reflect the
  changes in subquery optimization. EXPLAIN for subqueries is called during
  the execute phase, which results in different code paths during JOIN::optimize
  and thus in differing EXPLAIN messages for constant/system tables.
- There are some valgrind warnings that need investigation
- Several EXPLAINs with minor differences need to be reconsidered after fixing
  the EXPLAIN problem above.

This patch also adds one extra optimizer_switch: 'in_to_exists' for complete
manual control of the subquery execution strategies.
parent 50888477
......@@ -840,7 +840,12 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0
2 0 0
11 0 0
# 2nd and 3rd columns should be same for x == 11 only
# 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 0
......
......@@ -844,11 +844,16 @@ x ROW(11, 12) = (SELECT MAX(x), 22) ROW(11, 12) IN (SELECT MAX(x), 22)
1 0 0
2 0 0
11 0 0
# 2nd and 3rd columns should be same for x == 11 only
# 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
x ROW(11, 12) = (SELECT MAX(x), 12) ROW(11, 12) IN (SELECT MAX(x), 12)
1 0 1
2 0 1
1 0 0
2 0 0
11 1 1
DROP TABLE t1;
# both columns should be same
......
......@@ -681,7 +681,8 @@ SELECT a, ROW(11, 12) = (SELECT a, 12), ROW(11, 12) IN (SELECT a, 12) FROM t1;
# The x alias is used below to workaround bug #40674.
# Regression tests for sum function on outer column in subselect from dual:
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 22), ROW(11, 12) IN (SELECT MAX(x), 22) FROM t1;
--echo # 2nd and 3rd columns should be same for x == 11 only
--echo # 2nd and 3rd columns should be same
EXPLAIN SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
SELECT a AS x, ROW(11, 12) = (SELECT MAX(x), 12), ROW(11, 12) IN (SELECT MAX(x), 12) FROM t1;
DROP TABLE t1;
......
This diff is collapsed.
......@@ -108,9 +108,6 @@ class Item_subselect :public Item_result_field
/* subquery is transformed */
bool changed;
/* TIMOUR: this is temporary, remove it. */
bool is_min_max_optimized;
/* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
bool is_correlated;
......@@ -121,6 +118,12 @@ class Item_subselect :public Item_result_field
Item_subselect();
virtual subs_type substype() { return UNKNOWN_SUBS; }
bool is_in_predicate()
{
return (substype() == Item_subselect::IN_SUBS ||
substype() == Item_subselect::ALL_SUBS ||
substype() == Item_subselect::ANY_SUBS);
}
/*
We need this method, because some compilers do not allow 'this'
......@@ -314,6 +317,18 @@ class Item_exists_subselect :public Item_subselect
};
/*
Possible methods to execute an IN predicate. These are set by the optimizer
based on user-set optimizer switches, syntactic analysis and cost comparison.
*/
#define SUBS_NOT_TRANSFORMED 0 /* No execution method was chosen for this IN. */
#define SUBS_SEMI_JOIN 1 /* IN was converted to semi-join. */
#define SUBS_IN_TO_EXISTS 2 /* IN was converted to correlated EXISTS. */
#define SUBS_MATERIALIZATION 4 /* Execute IN via subquery materialization. */
/* Partial matching substrategies of MATERIALIZATION. */
#define SUBS_PARTIAL_MATCH_ROWID_MERGE 8
#define SUBS_PARTIAL_MATCH_TABLE_SCAN 16
/**
Representation of IN subquery predicates of the form
"left_expr IN (SELECT ...)".
......@@ -362,19 +377,13 @@ class Item_in_subselect :public Item_exists_subselect
trans_res select_in_like_transformer(JOIN *join);
trans_res single_value_transformer(JOIN *join);
trans_res row_value_transformer(JOIN * join);
bool fix_having(Item *having, st_select_lex *select_lex);
trans_res create_single_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res inject_single_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
trans_res create_row_in_to_exists_cond(JOIN * join,
Item **where_item,
Item **having_item);
trans_res inject_row_in_to_exists_cond(JOIN * join,
Item *where_item,
Item *having_item);
public:
Item *left_expr;
/* Priority of this predicate in the convert-to-semi-join-nest process. */
......@@ -407,14 +416,8 @@ class Item_in_subselect :public Item_exists_subselect
*/
bool sjm_scan_allowed;
/* The method chosen to execute the IN predicate. */
enum enum_exec_method {
NOT_TRANSFORMED, /* No execution method was chosen for this IN. */
SEMI_JOIN, /* IN was converted to semi-join nest and should be removed. */
IN_TO_EXISTS, /* IN was converted to correlated EXISTS. */
MATERIALIZATION /* IN will be executed via subquery materialization. */
};
enum_exec_method exec_method;
/* A bitmap of possible execution strategies for an IN predicate. */
uchar in_strategy;
bool *get_cond_guard(int i)
{
......@@ -433,7 +436,7 @@ class Item_in_subselect :public Item_exists_subselect
Item_in_subselect()
:Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE),
is_constant(FALSE), optimizer(0), abort_on_null(0),
pushed_cond_guards(NULL), func(NULL), exec_method(NOT_TRANSFORMED),
pushed_cond_guards(NULL), func(NULL), in_strategy(0),
upper_item(0)
{}
void cleanup();
......@@ -446,8 +449,8 @@ class Item_in_subselect :public Item_exists_subselect
was_null= 0;
}
trans_res select_transformer(JOIN *join);
bool create_in_to_exists_cond(JOIN * join_arg);
bool inject_in_to_exists_cond(JOIN * join_arg);
bool create_in_to_exists_cond(JOIN *join_arg);
bool inject_in_to_exists_cond(JOIN *join_arg);
virtual bool exec();
longlong val_int();
......
......@@ -563,20 +563,20 @@ class Default_object_creation_ctx : public Object_creation_ctx
#define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4
#define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8
#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16
#define OPTIMIZER_SWITCH_FIRSTMATCH 32
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
#define OPTIMIZER_SWITCH_IN_TO_EXISTS 256
#define OPTIMIZER_SWITCH_SEMIJOIN 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 1024
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN (1<<11)
#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<12)
#ifdef DBUG_OFF
# define OPTIMIZER_SWITCH_LAST (1<<12)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
# define OPTIMIZER_SWITCH_LAST (1<<13)
#else
# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<13)
# define OPTIMIZER_SWITCH_LAST (1<<14)
#endif
#ifdef DBUG_OFF
......@@ -589,6 +589,7 @@ class Default_object_creation_ctx : public Object_creation_ctx
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
......@@ -603,6 +604,7 @@ class Default_object_creation_ctx : public Object_creation_ctx
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_IN_TO_EXISTS | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
......
......@@ -304,7 +304,7 @@ static const char *optimizer_switch_names[]=
"index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection",
"index_condition_pushdown",
"firstmatch","loosescan","materialization", "semijoin",
"firstmatch","loosescan","materialization","in_to_exists","semijoin",
"partial_match_rowid_merge",
"partial_match_table_scan",
"subquery_cache",
......@@ -325,6 +325,7 @@ static const unsigned int optimizer_switch_names_len[]=
sizeof("firstmatch") - 1,
sizeof("loosescan") - 1,
sizeof("materialization") - 1,
sizeof("in_to_exists") - 1,
sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
......@@ -412,6 +413,7 @@ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"firstmatch=on,"
"loosescan=on,"
"materialization=on,"
"in_to_exists=on,"
"semijoin=on,"
"partial_match_rowid_merge=on,"
"partial_match_table_scan=on,"
......@@ -7233,7 +7235,7 @@ The minimum value for this variable is 4096.",
{"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, "
"index_condition_pushdown, firstmatch, loosescan, materialization, "
"index_condition_pushdown, firstmatch, loosescan, materialization, in_to_exists, "
"semijoin, partial_match_rowid_merge, partial_match_table_scan, "
"subquery_cache"
#ifndef DBUG_OFF
......
This diff is collapsed.
......@@ -6245,3 +6245,5 @@ ER_UNKNOWN_OPTION
eng "Unknown option '%-.64s'"
ER_BAD_OPTION_VALUE
eng "Incorrect value '%-.64s' for option '%-.64s'"
ER_ILLEGAL_SUBQUERY_OPTIMIZER_SWITCHES
eng "At least one of the 'in_to_exists' or 'materialization' optimizer_switch flags must be 'on'."
......@@ -2157,8 +2157,8 @@ void st_select_lex::print_limit(THD *thd,
select_limit == 1, and there should be no offset_limit.
*/
(((subs_type == Item_subselect::IN_SUBS) &&
((Item_in_subselect*)item)->exec_method ==
Item_in_subselect::MATERIALIZATION) ?
((Item_in_subselect*)item)->in_strategy &
SUBS_MATERIALIZATION) ?
TRUE :
(select_limit->val_int() == 1LL) &&
offset_limit == 0));
......@@ -3096,25 +3096,11 @@ bool st_select_lex::optimize_unflattened_subqueries()
Item_subselect *subquery_predicate= un->item;
if (subquery_predicate)
{
Item_in_subselect *item_in= NULL;
if (subquery_predicate->substype() == Item_subselect::IN_SUBS ||
subquery_predicate->substype() == Item_subselect::ALL_SUBS ||
subquery_predicate->substype() == Item_subselect::ANY_SUBS)
item_in= (Item_in_subselect*) subquery_predicate;
for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
{
JOIN *inner_join= sl->join;
SELECT_LEX *save_select= un->thd->lex->current_select;
int res;
/*
Make sure that we do not create IN->EXISTS conditions for
subquery predicates that were substituted by Item_maxmin_subselect
or by Item_singlerow_subselect.
*/
DBUG_ASSERT(!item_in || (item_in && !item_in->is_min_max_optimized));
if (item_in && item_in->create_in_to_exists_cond(inner_join))
return TRUE;
/* We need only 1 row to determine existence */
un->set_limit(un->global_parameters);
un->thd->lex->current_select= sl;
......
This diff is collapsed.
......@@ -1369,8 +1369,21 @@ inline bool sj_is_materialize_strategy(uint strategy)
class JOIN :public Sql_alloc
{
private:
JOIN(const JOIN &rhs); /**< not implemented */
JOIN& operator=(const JOIN &rhs); /**< not implemented */
protected:
/* Support for plan reoptimization with rewritten conditions. */
int reoptimize(Item *added_where, table_map join_tables,
POSITION *save_best_positions);
int save_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
void restore_query_plan(DYNAMIC_ARRAY *save_keyuse, POSITION *save_positions,
KEYUSE **save_join_tab_keyuse,
key_map *save_join_tab_checked_keys);
public:
JOIN_TAB *join_tab,**best_ref;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
......@@ -1746,7 +1759,7 @@ class JOIN :public Sql_alloc
NULL : join_tab+const_tables;
}
bool setup_subquery_caches();
bool choose_subquery_plan();
bool choose_subquery_plan(table_map join_tables);
private:
/**
TRUE if the query contains an aggregate function but has no GROUP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment