Commit 946aef4a authored by psergey's avatar psergey

MWL#90: code cleanup

- Remove deadcode
- Improve comments 
- Do small several small TODOs
parent e9bac8db
......@@ -5731,10 +5731,8 @@ Item_field* Item_equal::get_first(Item_field *field)
{
/*
It's a field from an materialized semi-join. We can substitute it only
for a field from the same semi-join.
for a field from the same semi-join. Find the first of such items.
*/
/* Find an item to substitute for. */
while ((item= it++))
{
if (item->field->table->pos_in_table_list->embedding == emb_nest)
......@@ -5762,7 +5760,7 @@ Item_field* Item_equal::get_first(Item_field *field)
First let's note that either it1.col or it2.col participates in
subquery's IN-equality. It can't be otherwise, because materialization is
only applicable to uncorrelated subqueries, so the only way we could
infer "it1.col=ot1.col" is from IN-equality. Ok, so IN-eqality has
infer "it1.col=ot1.col" is from the IN-equality. Ok, so IN-eqality has
it1.col or it2.col on its inner side. it1.col is first such item in the
join order, so it's not possible for SJ-Mat to be
SJ-Materialization-lookup, it is SJ-Materialization-Scan. The scan part
......
......@@ -30,6 +30,8 @@
3.1.1 Merged semi-join TABLE_LIST object
3.1.2 Non-merged semi-join data structure
3.2 Semi-joins and query optimization
3.2.1 Non-merged semi-joins and join optimization
3.2.2 Merged semi-joins and join optimization
3.3 Semi-joins and query execution
1. What is a semi-join subquery
......@@ -99,8 +101,8 @@
3. Code-level view of semi-join processing
------------------------------------------
3.1 Conversion
--------------
3.1 Conversion and pre-optimization data structures
---------------------------------------------------
* When doing JOIN::prepare for the subquery, we detect that it can be
converted into a semi-join and register it in parent_join->sj_subselects
......@@ -117,7 +119,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Merged semi-join object is a TABLE_LIST that contains a sub-join of
subquery tables and the semi-join ON expression (in this respect it is
ery similar to nested outer join representation)
very similar to nested outer join representation)
Merged semi-join represents this SQL:
... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr
......@@ -135,14 +137,38 @@
clause. (They do remain in PS-saved WHERE clause, and they replace themselves
with Item_int(1) on subsequent re-executions).
3.2 Semi-joins and query optimization
-------------------------------------
Query optimizer operates on semi-join nests.
3.2 Semi-joins and join optimization
------------------------------------
3.2.1 Non-merged semi-joins and join optimization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For join optimization purposes, non-merged semi-join nests are similar to
base tables - they've got one JOIN_TAB, which can be accessed with one of
two methods:
- full table scan (representing SJ-Materialization-Scan strategy)
- eq_ref-like table lookup (representing SJ-Materialization-Lookup)
Unlike regular base tables, non-merged semi-joins have:
- non-zero JOIN_TAB::startup_cost, and
- join_tab->table->is_filled_at_execution()==TRUE, which means one
cannot do const table detection or range analysis or other table data-
dependent inferences
// instead, get_delayed_table_estimates() runs optimization on the nest so that
// we get an idea about temptable size
3.2.2 Merged semi-joins and join optimization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- optimize_semijoin_nests() does pre-optimization
- during join optimization, the join has one JOIN_TAB (or is it POSITION?)
array, and suffix-based detection is used, see advance_sj_state()
- after join optimization is done, get_best_combination() switches
the data-structure to prefix-based, multiple JOIN_TAB ranges format.
3.3 Semi-joins and query execution
----------------------------------
* Join executor has hooks for all semi-join strategies.
TODO elaborate
TODO elaborate.
*/
......@@ -2682,8 +2708,6 @@ bool setup_sj_materialization(JOIN_TAB *sjm_tab)
temptable.
*/
TABLE_REF *tab_ref;
//if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF))))
// DBUG_RETURN(TRUE); /* purecov: inspected */
tab_ref= &sjm_tab->ref;
tab_ref->key= 0; /* The only temp table index. */
tab_ref->key_length= tmp_key->key_length;
......@@ -3966,17 +3990,19 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
pointers.
RETURN
FALSE Ok
TRUE Error, join execution is not possible.
NESTED_LOOP_OK - OK
NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution
*/
bool join_tab_execution_startup(JOIN_TAB *tab)
enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab)
{
Item_in_subselect *in_subs;
DBUG_ENTER("join_tab_execution_startup");
if (tab->table->pos_in_table_list &&
(in_subs= tab->table->pos_in_table_list->jtbm_subselect))
{
/* It's a non-merged SJM nest */
DBUG_ASSERT(in_subs->engine->engine_type() ==
subselect_engine::HASH_SJ_ENGINE);
......@@ -3988,13 +4014,13 @@ bool join_tab_execution_startup(JOIN_TAB *tab)
hash_sj_engine->is_materialized= TRUE;
if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error)
DBUG_RETURN(TRUE);
DBUG_RETURN(NESTED_LOOP_ERROR);
}
}
else if (tab->bush_children)
{
/* It's a merged SJM nest */
int rc; // psergey3: todo: error codes!
enum_nested_loop_state rc;
JOIN *join= tab->join;
SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info;
JOIN_TAB *join_tab= tab->bush_children->start;
......@@ -4018,6 +4044,6 @@ bool join_tab_execution_startup(JOIN_TAB *tab)
}
}
DBUG_RETURN(0);
DBUG_RETURN(NESTED_LOOP_OK);
}
......@@ -372,5 +372,5 @@ void get_delayed_table_estimates(TABLE *table,
double *scan_time,
double *startup_cost);
bool join_tab_execution_startup(JOIN_TAB *tab);
enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab);
......@@ -1809,11 +1809,8 @@ enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
/* Start retrieving all records of the joined table */
if (join_tab_execution_startup(join_tab))
{
rc= NESTED_LOOP_ERROR;
if ((rc= join_tab_execution_startup(join_tab)) < 0)
goto finish;
}
if ((error= join_init_read_record(join_tab)))
{
......
......@@ -8100,6 +8100,8 @@ void JOIN::cleanup(bool full)
{
for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE))
tab->cleanup();
//psergey4: how is the above supposed to work when
//top_jtrange_tables==FALSE? It will crash right away!
table= 0;
}
else
......@@ -12745,133 +12747,6 @@ int rr_sequential_and_unpack(READ_RECORD *info)
}
/*
Semi-join materialization join function
SYNOPSIS
sub_select_sjm()
join The join
join_tab The first table in the materialization nest
end_of_records FALSE <=> This call is made to pass another record
combination
TRUE <=> EOF
DESCRIPTION
This is a join execution function that does materialization of a join
suborder before joining it to the rest of the join.
The table pointed by join_tab is the first of the materialized tables.
This function first creates the materialized table and then switches to
joining the materialized table with the rest of the join.
The materialized table can be accessed in two ways:
- index lookups
- full table scan
RETURN
One of enum_nested_loop_state values
*/
#if 0
enum_nested_loop_state
sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
{
int res;
enum_nested_loop_state rc;
DBUG_ENTER("sub_select_sjm");
if (!join_tab->emb_sj_nest)
{
/*
We're handling GROUP BY/ORDER BY, this is the first table, and we've
actually executed the join already and now we're just reading the
result of the join from the temporary table.
Bypass to regular join handling.
Yes, it would be nicer if sub_select_sjm wasn't called at all in this
case but there's no easy way to arrange this.
*/
rc= sub_select(join, join_tab, end_of_records);
DBUG_RETURN(rc);
}
SJ_MATERIALIZATION_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info;
if (end_of_records)
{
rc= (*join_tab[sjm->tables - 1].next_select)(join,
join_tab + sjm->tables,
end_of_records);
DBUG_RETURN(rc);
}
if (!sjm->materialized)
{
/*
Do the materialization. First, put end_sj_materialize after the last
inner table so we can catch record combinations of sj-inner tables.
*/
Next_select_func next_func= join_tab[sjm->tables - 1].next_select;
join_tab[sjm->tables - 1].next_select= end_sj_materialize;
/*
Now run the join for the inner tables. The first call is to run the
join, the second one is to signal EOF (this is essential for some
join strategies, e.g. it will make join buffering flush the records)
*/
if ((rc= sub_select(join, join_tab, FALSE)) < 0 ||
(rc= sub_select(join, join_tab, TRUE/*EOF*/)) < 0)
{
join_tab[sjm->tables - 1].next_select= next_func;
DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/
}
join_tab[sjm->tables - 1].next_select= next_func;
/*
Ok, materialization finished. Initialize the access to the temptable
*/
sjm->materialized= TRUE;
join_tab->read_record.read_record= join_no_more_records;
if (sjm->is_sj_scan)
{
/* Initialize full scan */
JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
init_read_record(&last_tab->read_record, join->thd,
sjm->table, NULL, TRUE, TRUE, FALSE);
DBUG_ASSERT(last_tab->read_record.read_record == rr_sequential);
last_tab->read_first_record= join_read_record_no_init;
last_tab->read_record.copy_field= sjm->copy_field;
last_tab->read_record.copy_field_end= sjm->copy_field +
sjm->sjm_table_cols.elements;
last_tab->read_record.read_record= rr_sequential_and_unpack;
}
}
if (sjm->is_sj_scan)
{
/* Do full scan of the materialized table */
JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
Item *save_cond= last_tab->select_cond;
last_tab->set_select_cond(sjm->join_cond, __LINE__);
rc= sub_select(join, last_tab, end_of_records);
last_tab->set_select_cond(save_cond, __LINE__);
DBUG_RETURN(rc);
}
else
{
/* Do index lookup in the materialized table */
if ((res= join_read_key2(join_tab, sjm->table, sjm->tab_ref)) == 1)
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
if (res || !sjm->in_equality->val_int())
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
}
rc= (*join_tab[sjm->tables - 1].next_select)(join,
join_tab + sjm->tables,
end_of_records);
DBUG_RETURN(rc);
}
#endif
/*
Fill the join buffer with partial records, retrieve all full matches for them
......@@ -13122,8 +12997,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
}
join->thd->row_count= 0;
if (join_tab_execution_startup(join_tab))
DBUG_RETURN(NESTED_LOOP_ERROR);
if ((rc= join_tab_execution_startup(join_tab)) < 0)
DBUG_RETURN(rc);
error= (*join_tab->read_first_record)(join_tab);
......
......@@ -1247,9 +1247,6 @@ enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
end_of_records);
enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
end_of_records);
enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab,
bool end_of_records);
enum_nested_loop_state
end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
bool end_of_records);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment