Commit 524e6aad authored by Sergey Petrunya's avatar Sergey Petrunya

MWL#90: Non-merged semi-joins

- Take into account that grouping or aggregates decrease join output cardinality.
  (First code, can't make use of index statistics yet)
parent ea43df76
......@@ -475,10 +475,10 @@ bool Item_subselect::exec()
return (res);
}
int Item_subselect::optimize()
int Item_subselect::optimize(double *out_rows, double *cost)
{
int res;
res= engine->optimize();
res= engine->optimize(out_rows, cost);
return res;
}
......@@ -4085,16 +4085,218 @@ void subselect_hash_sj_engine::cleanup()
result->cleanup(); /* Resets the temp table as well. */
}
JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);
int subselect_hash_sj_engine::optimize()
/*
Get fanout produced by tables specified in the table_map
*/
double get_fanout_with_deps(JOIN *join, table_map tset)
{
/* First, recursively get all tables we depend on */
table_map deps_to_check= tset;
table_map checked_deps= 0;
table_map further_deps;
do
{
further_deps= 0;
Table_map_iterator tm_it(deps_to_check);
int tableno;
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
{
/* get tableno's dependency tables that are not in needed_set */
further_deps |= join->map2table[tableno]->ref.depend_map & ~checked_deps;
}
checked_deps |= deps_to_check;
deps_to_check= further_deps;
} while (further_deps != 0);
/* Now, walk the join order and calculate the fanout */
double fanout= 1;
for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab;
tab= next_top_level_tab(join, tab))
{
fanout *= (tab->records_read && !tab->emb_sj_nest) ?
rows2double(tab->records_read) : 1;
}
return fanout;
}
#if 0
void check_out_index_stats(JOIN *join)
{
ORDER *order;
uint n_order_items;
/*
First, collect the keys that we can use in each table.
We can use a key if
- all tables refer to it.
*/
key_map key_start_use[MAX_TABLES];
key_map key_infix_use[MAX_TABLES];
table_map key_used=0;
table_map non_key_used= 0;
bzero(&key_start_use, sizeof(key_start_use)); //psergey-todo: safe initialization!
bzero(&key_infix_use, sizeof(key_infix_use));
for (order= join->group_list; order; order= order->next)
{
Item *item= order->item[0];
if (item->real_type() == Item::FIELD_ITEM)
{
if (item->used_tables() & OUTER_REF_TABLE_BIT)
continue; /* outside references are like constants for us */
Field *field= ((Item_field*)item->real_item())->field;
uint table_no= field->table->tablenr;
if (!(non_key_used && table_map(1) << table_no) &&
!field->part_of_key.is_clear_all())
{
key_map infix_map= field->part_of_key;
infix_map.subtract(field->key_start);
key_start_use[table_no].merge(field->key_start);
key_infix_use[table_no].merge(infix_map);
key_used |= table_no;
}
continue;
}
/*
Note: the below will cause clauses like GROUP BY YEAR(date) not to be
handled.
*/
non_key_used |= item->used_tables();
}
Table_map_iterator tm_it(key_used & ~non_key_used);
int tableno;
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
{
key_map::iterator key_it(key_start_use);
int keyno;
while ((keyno = tm_it.next_bit()) != key_map::iterator::BITMAP_END)
{
for (order= join->group_list; order; order= order->next)
{
Item *item= order->item[0];
if (item->used_tables() & (table_map(1) << tableno))
{
DBUG_ASSERT(item->real_type() == Item::FIELD_ITEM);
}
}
/*
if (continuation)
{
walk through list and find which key parts are occupied;
// note that the above can't be made any faster.
}
else
use rec_per_key[0];
find out the cardinality.
check if cardinality decreases if we use it;
*/
}
}
}
#endif
double get_post_group_estimate(JOIN* join)
{
table_map tables_in_group_list= table_map(0);
/* Find out which tables are used in GROUP BY list */
for (ORDER *order= join->group_list; order; order= order->next)
{
Item *item= order->item[0];
if (item->used_tables() & RAND_TABLE_BIT)
return HA_POS_ERROR; // TODO: change to join-output-estimate
tables_in_group_list|= item->used_tables();
}
tables_in_group_list &= ~PSEUDO_TABLE_BITS;
/*
Use join fanouts to calculate the max. number of records in the group-list
*/
double fanout_rows[MAX_KEY];
bzero(&fanout_rows, sizeof(fanout_rows));
double out_rows;
out_rows= get_fanout_with_deps(join, tables_in_group_list);
/*
Also generate max. number of records for each of the tables mentioned
in the group-list. We'll use that a baseline number that we'll try to
reduce by using
- #table-records
- index statistics.
*/
Table_map_iterator tm_it(tables_in_group_list);
int tableno;
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
{
fanout_rows[tableno]= get_fanout_with_deps(join, table_map(1) << tableno);
}
/*
Try to bring down estimates using index statistics.
*/
//check_out_index_stats(join);
return out_rows;
}
int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
{
int res;
DBUG_ENTER("subselect_hash_sj_engine::optimize");
SELECT_LEX *save_select= thd->lex->current_select;
thd->lex->current_select= materialize_join->select_lex;
res= materialize_join->optimize();
JOIN *join= materialize_join;
thd->lex->current_select= join->select_lex;
res= join->optimize();
/* Calculate #rows and cost of join execution */
get_partial_join_cost(join, join->table_count - join->const_tables,
cost, out_rows);
/*
Adjust join output cardinality. There can be these cases:
- Have no GROUP BY and no aggregate funcs: we won't get into this
function because such join will be processed as a merged semi-join
(TODO: does it really mean we don't need to handle such cases here at
all? put ASSERT)
- Have no GROUP BY but have aggregate funcs: output is 1 record.
- Have GROUP BY and have (or not) aggregate funcs: need to adjust output
cardinality.
*/
thd->lex->current_select= save_select;
if (!join->group_list && !join->group_optimized_away &&
join->tmp_table_param.sum_func_count)
{
DBUG_PRINT("info",("Materialized join will have only 1 row (has "
"aggregates but not GROUP BY"));
*out_rows= 1;
}
/* Now with grouping */
if (join->group_list)
{
DBUG_PRINT("info",("Materialized join has grouping, trying to estimate"));
double output_rows= get_post_group_estimate(materialize_join);
DBUG_PRINT("info",("Got value of %g", output_rows));
*out_rows= output_rows;
}
return res;
DBUG_RETURN(res);
}
/**
......
......@@ -147,7 +147,7 @@ class Item_subselect :public Item_result_field
bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
virtual int optimize();
virtual int optimize(double *out_rows, double *cost);
virtual bool exec();
virtual void fix_length_and_dec();
table_map used_tables() const;
......@@ -534,7 +534,7 @@ class subselect_engine: public Sql_alloc
THD * get_thd() { return thd; }
virtual int prepare()= 0;
virtual void fix_length_and_dec(Item_cache** row)= 0;
virtual int optimize() { DBUG_ASSERT(0); return 0; }
virtual int optimize(double *out_rows, double *cost) { DBUG_ASSERT(0); return 0; }
/*
Execute the engine
......@@ -804,7 +804,7 @@ class subselect_hash_sj_engine : public subselect_engine
bool init_runtime();
void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */
int optimize();
int optimize(double *out_rows, double *cost);
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()
......
......@@ -825,21 +825,16 @@ void get_delayed_table_estimates(TABLE *table,
double *startup_cost)
{
Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
item->optimize();
double rows;
double read_time;
item->optimize(&rows, &read_time);
DBUG_ASSERT(item->engine->engine_type() ==
subselect_engine::HASH_SJ_ENGINE);
subselect_hash_sj_engine *hash_sj_engine=
((subselect_hash_sj_engine*)item->engine);
JOIN *join= hash_sj_engine->materialize_join;
double rows;
double read_time;
/* Calculate #rows and cost of join execution */
get_partial_join_cost(join, join->table_count - join->const_tables,
&read_time, &rows);
*out_rows= (ha_rows)rows;
*startup_cost= read_time;
......
......@@ -6489,7 +6489,7 @@ JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const
JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
{
tab= next_breadth_first_tab(join, tab);
if (tab->bush_root_tab)
if (tab && tab->bush_root_tab)
tab= NULL;
return tab;
}
......@@ -9262,6 +9262,8 @@ void JOIN::cleanup(bool full)
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
@endcode
TODO: this function checks ORDER::used, which can only have a value of 0.
*/
static bool
......
......@@ -66,7 +66,8 @@ typedef struct st_order {
bool counter_used; /* parameter was counter of columns */
Field *field; /* If tmp-table group */
char *buff; /* If tmp-table group */
table_map used, depend_map;
table_map used; /* NOTE: the below is only set to 0 but is still used by eq_ref_table */
table_map depend_map;
} ORDER;
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment