Commit 26c3dc45 authored by Sergey Petrunya's avatar Sergey Petrunya

MWL#90: Subqueries: Inside-out execution for non-semijoin materialized...

MWL#90: Subqueries: Inside-out execution for non-semijoin materialized subqueries that are AND-parts of the WHERE
- Address feedback
- Code cleanup (not finished)
parent 559dafdf
...@@ -5733,26 +5733,10 @@ Item_field* Item_equal::get_first(Item_field *field) ...@@ -5733,26 +5733,10 @@ Item_field* Item_equal::get_first(Item_field *field)
It's a field from an materialized semi-join. We can substitute it only It's a field from an materialized semi-join. We can substitute it only
for a field from the same semi-join. for a field from the same semi-join.
*/ */
#if 0
psergey3:remove:
JOIN_TAB *first;
JOIN *join= field_tab->join;
int tab_idx= field_tab - field_tab->join->join_tab;
/* Find the first table of this semi-join nest */
for (int i= tab_idx; i >= (int)join->const_tables; i--)
{
if (join->join_tab[i].table->map & emb_nest->sj_inner_tables)
first= join->join_tab + i;
else
// Found first tab that doesn't belong to current SJ.
break;
}
#endif
/* Find an item to substitute for. */ /* Find an item to substitute for. */
while ((item= it++)) while ((item= it++))
{ {
//if (item->field->table->reginfo.join_tab >= first)
if (item->field->table->pos_in_table_list->embedding == emb_nest) if (item->field->table->pos_in_table_list->embedding == emb_nest)
{ {
/* /*
......
...@@ -45,8 +45,8 @@ ...@@ -45,8 +45,8 @@
exception that we don't care how many matches a row from outer_tbl has in exception that we don't care how many matches a row from outer_tbl has in
inner_tbl. inner_tbl.
In SQL, that translates into following: a semi-join subquery is an IN In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of
subquery that is an AND-part of the WHERE/ON clause. the WHERE/ON clause.
2. General idea about semi-join execution 2. General idea about semi-join execution
----------------------------------------- -----------------------------------------
...@@ -3957,8 +3957,8 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where) ...@@ -3957,8 +3957,8 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
bool join_tab_execution_startup(JOIN_TAB *tab) bool join_tab_execution_startup(JOIN_TAB *tab)
{ {
DBUG_ENTER("join_tab_execution_startup");
Item_in_subselect *in_subs; Item_in_subselect *in_subs;
DBUG_ENTER("join_tab_execution_startup");
if (tab->table->pos_in_table_list && if (tab->table->pos_in_table_list &&
(in_subs= tab->table->pos_in_table_list->jtbm_subselect)) (in_subs= tab->table->pos_in_table_list->jtbm_subselect))
{ {
...@@ -3995,13 +3995,9 @@ bool join_tab_execution_startup(JOIN_TAB *tab) ...@@ -3995,13 +3995,9 @@ bool join_tab_execution_startup(JOIN_TAB *tab)
if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 || if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 ||
(rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0) (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0)
{ {
//psergey3-todo: set sjm->materialized=TRUE here, too??
join->return_tab= save_return_tab; join->return_tab= save_return_tab;
DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/
} }
/*
Ok, materialization finished. Initialize the access to the temptable
*/
join->return_tab= save_return_tab; join->return_tab= save_return_tab;
sjm->materialized= TRUE; sjm->materialized= TRUE;
} }
......
...@@ -158,42 +158,6 @@ JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots); ...@@ -158,42 +158,6 @@ JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots);
void JOIN_CACHE::calc_record_fields() void JOIN_CACHE::calc_record_fields()
{ {
//psergey4-todo: prev_cache, or
// - first non-const table if on top level
// - first table inside SJM nest if within sjm nest
// this->join_tab is 'our' join_tab
// No. the right idea: start from ... and walk to the current join_tab
/// with an iterator, skipping
// join nests (can do so for now)
/*
The above sucks, too.
The right idea:
- for SJM-inner tables, walk only within the nest
- for SJM-outer tables, use all preceding tables, including inner ones.
eof
*/
/* JOIN_TAB *tab = prev_cache ? prev_cache->join_tab :
join->join_tab+join->const_tables;
*/
/* JOIN_TAB *tab;
if (prev_cache)
tab= prev_cache->join_tab;
else
{
if (tab->bush_root_tab)
{
;
}
else
{
/ * top-level * /
tab= join->join_tab+join->const_tables;
}
}*/
JOIN_TAB *tab; JOIN_TAB *tab;
if (prev_cache) if (prev_cache)
tab= prev_cache->join_tab; tab= prev_cache->join_tab;
...@@ -201,12 +165,18 @@ void JOIN_CACHE::calc_record_fields() ...@@ -201,12 +165,18 @@ void JOIN_CACHE::calc_record_fields()
{ {
if (join_tab->bush_root_tab) if (join_tab->bush_root_tab)
{ {
// inside SJM-Mat nest: pick first one /*
If the tab we're attached to is inside an SJM-nest, start from the
first tab in that SJM nest
*/
tab= join_tab->bush_root_tab->bush_children->start; tab= join_tab->bush_root_tab->bush_children->start;
} }
else else
{ {
// outside SJM-Mat nest: start from first non-const table /*
The tab we're attached to is not inside an SJM-nest. Start from the
first non-const table.
*/
tab= join->join_tab + join->const_tables; tab= join->join_tab + join->const_tables;
} }
} }
......
...@@ -1011,10 +1011,10 @@ JOIN::optimize() ...@@ -1011,10 +1011,10 @@ JOIN::optimize()
{ {
List_iterator<JOIN_TAB_RANGE> it(join_tab_ranges); List_iterator<JOIN_TAB_RANGE> it(join_tab_ranges);
JOIN_TAB_RANGE *jt_range; JOIN_TAB_RANGE *jt_range;
bool first= TRUE; uint first_tab_offs= const_tables;
while ((jt_range= it++)) while ((jt_range= it++))
{ {
for (JOIN_TAB *tab= jt_range->start + (first ? const_tables : 0); for (JOIN_TAB *tab= jt_range->start + first_tab_offs;
tab < jt_range->end; tab++) tab < jt_range->end; tab++)
{ {
if (*tab->on_expr_ref) if (*tab->on_expr_ref)
...@@ -1025,7 +1025,7 @@ JOIN::optimize() ...@@ -1025,7 +1025,7 @@ JOIN::optimize()
(*tab->on_expr_ref)->update_used_tables(); (*tab->on_expr_ref)->update_used_tables();
} }
} }
first= FALSE; first_tab_offs= 0;
} }
} }
...@@ -1298,9 +1298,11 @@ JOIN::optimize() ...@@ -1298,9 +1298,11 @@ JOIN::optimize()
*/ */
if (need_tmp || select_distinct || group_list || order) if (need_tmp || select_distinct || group_list || order)
{ {
for (uint i = const_tables; i < tables; i++) for (uint i= 0; i < tables; i++)
{
if (!(table[i]->map & const_table_map))
table[i]->prepare_for_position(); table[i]->prepare_for_position();
}
} }
DBUG_EXECUTE("info",TEST_join(this);); DBUG_EXECUTE("info",TEST_join(this););
...@@ -5868,10 +5870,9 @@ JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables) ...@@ -5868,10 +5870,9 @@ JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables)
{ {
JOIN_TAB *first= join->join_tab; JOIN_TAB *first= join->join_tab;
if (after_const_tables) if (after_const_tables)
first += join->const_tables; first+= join->const_tables;
if (first < join->join_tab + join->top_jtrange_tables) if (first < join->join_tab + join->top_jtrange_tables)
return first; return first;
else
return NULL; return NULL;
} }
...@@ -5888,24 +5889,24 @@ JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables) ...@@ -5888,24 +5889,24 @@ JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables)
to.) to.)
*/ */
JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) //psergey2: added JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots)
{ {
if (include_bush_roots && tab->bush_children) if (include_bush_roots && tab->bush_children)
return tab->bush_children->start; return tab->bush_children->start;
DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
if (tab->last_leaf_in_bush) if (tab->last_leaf_in_bush)
tab= tab->bush_root_tab; tab= tab->bush_root_tab;
if (tab->bush_root_tab) if (tab->bush_root_tab)
return ++tab; return ++tab;
if (++tab == join->join_tab + join->top_jtrange_tables /*join->join_tab_ranges.head()->end*/) if (++tab == join->join_tab + join->top_jtrange_tables)
return NULL; return NULL;
if (!include_bush_roots && tab->bush_children) if (!include_bush_roots && tab->bush_children)
{
tab= tab->bush_children->start; tab= tab->bush_children->start;
}
return tab; return tab;
} }
...@@ -5929,12 +5930,12 @@ JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) // ...@@ -5929,12 +5930,12 @@ JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) //
*/ */
JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) //psergey2: added JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
{ {
bool start= FALSE; bool start= FALSE;
if (tab == NULL) if (tab == NULL)
{ {
/* This means we're starting. */ /* This means we're starting the enumeration */
if (join->const_tables == join->top_jtrange_tables) if (join->const_tables == join->top_jtrange_tables)
return NULL; return NULL;
...@@ -5945,7 +5946,11 @@ JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) //psergey2: added ...@@ -5945,7 +5946,11 @@ JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) //psergey2: added
if (tab->last_leaf_in_bush) if (tab->last_leaf_in_bush)
return tab->bush_root_tab; return tab->bush_root_tab;
if ((start? tab: ++tab) == join->join_tab_ranges.head()->end) /* Move to next tab in the array we're traversing*/
if (!start)
tab++;
if (tab == join->join_tab_ranges.head()->end)
return NULL; /* End */ return NULL; /* End */
if (tab->bush_children) if (tab->bush_children)
...@@ -5955,7 +5960,7 @@ JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) //psergey2: added ...@@ -5955,7 +5960,7 @@ JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) //psergey2: added
} }
static Item *null_ptr= NULL; static Item * const null_ptr= NULL;
/* /*
Set up join struct according to the picked join order in Set up join struct according to the picked join order in
...@@ -6004,10 +6009,6 @@ get_best_combination(JOIN *join) ...@@ -6004,10 +6009,6 @@ get_best_combination(JOIN *join)
fix_semijoin_strategies_for_picked_join_order(join); fix_semijoin_strategies_for_picked_join_order(join);
/*
psergey2-todo: Here: switch to nested structure when copying.
*/
JOIN_TAB_RANGE *root_range= new JOIN_TAB_RANGE; JOIN_TAB_RANGE *root_range= new JOIN_TAB_RANGE;
root_range->start= join->join_tab; root_range->start= join->join_tab;
/* root_range->end will be set later */ /* root_range->end will be set later */
...@@ -6041,7 +6042,7 @@ get_best_combination(JOIN *join) ...@@ -6041,7 +6042,7 @@ get_best_combination(JOIN *join)
j->ref.key_parts=0; j->ref.key_parts=0;
j->loosescan_match_tab= NULL; //non-nulls will be set later j->loosescan_match_tab= NULL; //non-nulls will be set later
j->use_join_cache= FALSE; j->use_join_cache= FALSE;
j->on_expr_ref= &null_ptr; j->on_expr_ref= (Item**) &null_ptr;
j->cache= NULL; j->cache= NULL;
/* /*
...@@ -6363,9 +6364,6 @@ JOIN::make_simple_join(JOIN *parent, TABLE *tmp_table) ...@@ -6363,9 +6364,6 @@ JOIN::make_simple_join(JOIN *parent, TABLE *tmp_table)
DBUG_RETURN(TRUE); /* purecov: inspected */ DBUG_RETURN(TRUE); /* purecov: inspected */
join_tab= parent->join_tab_reexec; join_tab= parent->join_tab_reexec;
//psergey2: hopefully this is ok:
// join_tab_ranges.head()->start= join_tab;
// join_tab_ranges.head()->end= join_tab + 1;
top_jtrange_tables= 1; top_jtrange_tables= 1;
table= &parent->table_reexec[0]; parent->table_reexec[0]= tmp_table; table= &parent->table_reexec[0]; parent->table_reexec[0]= tmp_table;
...@@ -7701,21 +7699,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) ...@@ -7701,21 +7699,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
tab->sorted= sorted; tab->sorted= sorted;
sorted= 0; // only first must be sorted sorted= 0; // only first must be sorted
if (tab->bush_children)
//if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy))
if (tab->bush_children) // SJM
{ {
/* This is a start of semi-join nest */
//first_sjm_table= i;
//last_sjm_table= i + join->best_positions[i].n_sj_tables;
/*
psergey2: dont:
if (i == join->const_tables)
join->first_select= sub_select_sjm;
else
tab[-1].next_select= sub_select_sjm;
*/
if (setup_sj_materialization(tab)) if (setup_sj_materialization(tab))
return TRUE; return TRUE;
table= tab->table; table= tab->table;
...@@ -12717,9 +12702,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) ...@@ -12717,9 +12702,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
else else
{ {
DBUG_ASSERT(join->tables); DBUG_ASSERT(join->tables);
error= join->first_select(join,join_tab,0); error= sub_select(join,join_tab,0);
if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
error= join->first_select(join,join_tab,1); error= sub_select(join,join_tab,1);
if (error == NESTED_LOOP_QUERY_LIMIT) if (error == NESTED_LOOP_QUERY_LIMIT)
error= NESTED_LOOP_OK; /* select_limit used */ error= NESTED_LOOP_OK; /* select_limit used */
} }
......
...@@ -190,8 +190,15 @@ typedef struct st_join_table { ...@@ -190,8 +190,15 @@ typedef struct st_join_table {
psergey2: for join tabs that are inside a bush: root of this bush. psergey2: for join tabs that are inside a bush: root of this bush.
*/ */
st_join_table *bush_root_tab; st_join_table *bush_root_tab;
/* TRUE <=> This join_tab is inside a join bush and is the last leaf tab here */
bool last_leaf_in_bush; bool last_leaf_in_bush;
/*
ptr - this is a bush, and ptr points to description of child join_tab
range
NULL - this join tab has no bush children
*/
JOIN_TAB_RANGE *bush_children; JOIN_TAB_RANGE *bush_children;
/* Special content for EXPLAIN 'Extra' column or NULL if none */ /* Special content for EXPLAIN 'Extra' column or NULL if none */
...@@ -500,13 +507,13 @@ class JOIN_CACHE :public Sql_alloc ...@@ -500,13 +507,13 @@ class JOIN_CACHE :public Sql_alloc
context can be accessed. context can be accessed.
*/ */
JOIN *join; JOIN *join;
#if 0
/* /*
Cardinality of the range of join tables whose fields can be put into the JOIN_TAB of the first table that can have it's fields in the join cache.
cache. (A table from the range not necessarily contributes to the cache.) That is, tables in the [start_tab, tab) range can have their fields in the
join cache.
If a join tab in the range represents an SJM-nest, then all tables from the
nest can have their fields in the join cache, too.
*/ */
uint tables;
#endif
JOIN_TAB *start_tab; JOIN_TAB *start_tab;
/* /*
...@@ -1505,7 +1512,6 @@ class JOIN :public Sql_alloc ...@@ -1505,7 +1512,6 @@ class JOIN :public Sql_alloc
/* We also maintain a stack of join optimization states in * join->positions[] */ /* We also maintain a stack of join optimization states in * join->positions[] */
/******* Join optimization state members end *******/ /******* Join optimization state members end *******/
Next_select_func first_select;
/* /*
The cost of best complete join plan found so far during optimization, The cost of best complete join plan found so far during optimization,
after optimization phase - cost of picked join order (not taking into after optimization phase - cost of picked join order (not taking into
...@@ -1691,7 +1697,6 @@ class JOIN :public Sql_alloc ...@@ -1691,7 +1697,6 @@ class JOIN :public Sql_alloc
rollup.state= ROLLUP::STATE_NONE; rollup.state= ROLLUP::STATE_NONE;
no_const_tables= FALSE; no_const_tables= FALSE;
first_select= sub_select;
} }
int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num, int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment