Commit e0bb2f90 authored by Sergey Petrunya's avatar Sergey Petrunya

Subquery optimization backport:

- Factor out subquery code into sql/opt_subselect.{h,cc}
- Stop using the term "confluent" (was used due to misreading the dictionary)
parent 4746ddf6
......@@ -1922,3 +1922,7 @@ libmysqld/examples/mysqltest.cc
extra/libevent/event-config.h
libmysqld/opt_table_elimination.cc
libmysqld/ha_federatedx.cc
libmysqld/multi_range_read.cc
libmysqld/opt_index_cond_pushdown.cc
libmysqld/opt_subselect.cc
libmysqld/sql_join_cache.cc
......@@ -58,6 +58,7 @@ sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \
log_event.cc rpl_record.cc \
log_event_old.cc rpl_record_old.cc \
protocol.cc net_serv.cc opt_range.cc \
opt_subselect.cc \
opt_sum.cc procedure.cc records.cc sql_acl.cc \
sql_load.cc discover.cc sql_locale.cc \
sql_profile.cc \
......
......@@ -60,6 +60,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
ha_ndbcluster_binlog.h ha_ndbcluster_tables.h \
ha_partition.h rpl_constants.h \
opt_range.h protocol.h rpl_tblmap.h rpl_utility.h \
opt_subselect.h \
rpl_reporting.h \
log.h log_slow.h sql_show.h rpl_rli.h rpl_mi.h \
sql_select.h structs.h table.h sql_udf.h hash_filo.h \
......@@ -102,7 +103,8 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \
unireg.cc des_key_file.cc \
log_event.cc rpl_record.cc \
log_event_old.cc rpl_record_old.cc \
discover.cc time.cc opt_range.cc opt_sum.cc \
discover.cc time.cc opt_range.cc opt_subselect.cc \
opt_sum.cc \
records.cc filesort.cc handler.cc \
ha_partition.cc \
sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \
......
......@@ -31,10 +31,6 @@
#include "mysql_priv.h"
#include "sql_select.h"
inline Item * and_items(Item* cond, Item *item)
{
return (cond? (new Item_cond_and(cond, item)) : item);
}
Item_subselect::Item_subselect():
Item_result_field(), value_assigned(0), thd(0), substitution(0),
......@@ -1899,7 +1895,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func)
object, but we can't know it here, but here we need address correct
reference on left expresion.
//psergey: he means confluent cases like "... IN (SELECT 1)"
//psergey: he means degenerate cases like "... IN (SELECT 1)"
*/
if (!optimizer)
{
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* */
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
#endif
int check_and_do_in_subquery_rewrites(JOIN *join);
bool convert_join_subqueries_to_semijoins(JOIN *join);
int pull_out_semijoin_tables(JOIN *join);
bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
// used by Loose_scan_opt
ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest,
table_map remaining_tables);
/*
This is a class for considering possible loose index scan optimizations.
It's usage pattern is as follows:
best_access_path()
{
Loose_scan_opt opt;
opt.init()
for each index we can do ref access with
{
opt.next_ref_key();
for each keyuse
opt.add_keyuse();
opt.check_ref_access();
}
if (some criteria for range scans)
opt.check_range_access();
opt.get_best_option();
}
*/
class Loose_scan_opt
{
public:
/* All methods must check this before doing anything else */
bool try_loosescan;
/*
If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
called sj-equality. If oeK depends only on preceding tables then such
equality is called 'bound'.
*/
ulonglong bound_sj_equalities;
/* Accumulated properties of ref access we're now considering: */
ulonglong handled_sj_equalities;
key_part_map loose_scan_keyparts;
uint max_loose_keypart;
bool part1_conds_met;
/*
Use of quick select is a special case. Some of its properties:
*/
uint quick_uses_applicable_index;
uint quick_max_loose_keypart;
/* Best loose scan method so far */
uint best_loose_scan_key;
double best_loose_scan_cost;
double best_loose_scan_records;
KEYUSE *best_loose_scan_start_key;
uint best_max_loose_keypart;
Loose_scan_opt():
try_loosescan(FALSE),
bound_sj_equalities(0),
quick_uses_applicable_index(FALSE)
{
UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */
/* The following are protected by best_loose_scan_cost!= DBL_MAX */
UNINIT_VAR(best_loose_scan_key);
UNINIT_VAR(best_loose_scan_records);
UNINIT_VAR(best_max_loose_keypart);
UNINIT_VAR(best_loose_scan_start_key);
}
void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables)
{
/*
Discover the bound equalities. We need to do this if
1. The next table is an SJ-inner table, and
2. It is the first table from that semijoin, and
3. We're not within a semi-join range (i.e. all semi-joins either have
all or none of their tables in join_table_map), except
s->emb_sj_nest (which we've just entered, see #2).
4. All non-IN-equality correlation references from this sj-nest are
bound
5. But some of the IN-equalities aren't (so this can't be handled by
FirstMatch strategy)
*/
best_loose_scan_cost= DBL_MAX;
if (!join->emb_sjm_nest && s->emb_sj_nest && // (1)
s->emb_sj_nest->sj_in_exprs < 64 &&
((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2)
s->emb_sj_nest->sj_inner_tables) && // (2)
join->cur_sj_inner_tables == 0 && // (3)
!(remaining_tables &
s->emb_sj_nest->nested_join->sj_corr_tables) && // (4)
remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN))
{
/* This table is an LooseScan scan candidate */
bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest,
remaining_tables);
try_loosescan= TRUE;
DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx",
(longlong)bound_sj_equalities));
}
}
void next_ref_key()
{
handled_sj_equalities=0;
loose_scan_keyparts= 0;
max_loose_keypart= 0;
part1_conds_met= FALSE;
}
void add_keyuse(table_map remaining_tables, KEYUSE *keyuse)
{
if (try_loosescan && keyuse->sj_pred_no != UINT_MAX)
{
if (!(remaining_tables & keyuse->used_tables))
{
/*
This allows to use equality propagation to infer that some
sj-equalities are bound.
*/
bound_sj_equalities |= 1ULL << keyuse->sj_pred_no;
}
else
{
handled_sj_equalities |= 1ULL << keyuse->sj_pred_no;
loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart;
set_if_bigger(max_loose_keypart, keyuse->keypart);
}
}
}
bool have_a_case() { return test(handled_sj_equalities); }
void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key,
table_map found_part)
{
/*
Check if we can use LooseScan semi-join strategy. We can if
1. This is the right table at right location
2. All IN-equalities are either
- "bound", ie. the outer_expr part refers to the preceding tables
- "handled", ie. covered by the index we're considering
3. Index order allows to enumerate subquery's duplicate groups in
order. This happens when the index definition matches this
pattern:
(handled_col|bound_col)* (other_col|bound_col)
*/
if (try_loosescan && // (1)
(handled_sj_equalities | bound_sj_equalities) == // (2)
PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2)
(PREV_BITS(key_part_map, max_loose_keypart+1) & // (3)
(found_part | loose_scan_keyparts)) == // (3)
(found_part | loose_scan_keyparts) && // (3)
!key_uses_partial_cols(s->table, key))
{
/* Ok, can use the strategy */
part1_conds_met= TRUE;
if (s->quick && s->quick->index == key &&
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
{
quick_uses_applicable_index= TRUE;
quick_max_loose_keypart= max_loose_keypart;
}
DBUG_PRINT("info", ("Can use LooseScan scan"));
/*
Check if this is a special case where there are no usable bound
IN-equalities, i.e. we have
outer_expr IN (SELECT innertbl.key FROM ...)
and outer_expr cannot be evaluated yet, so it's actually full
index scan and not a ref access
*/
if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
s->table->covering_keys.is_set(key))
{
DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
/* Calculate the cost of complete loose index scan. */
double records= rows2double(s->table->file->stats.records);
/* The cost is entire index scan cost (divided by 2) */
double read_time= s->table->file->index_only_read_time(key, records);
/*
Now find out how many different keys we will get (for now we
ignore the fact that we have "keypart_i=const" restriction for
some key components, that may make us think think that loose
scan will produce more distinct records than it actually will)
*/
ulong rpc;
if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
records= records / rpc;
// TODO: previous version also did /2
if (read_time < best_loose_scan_cost)
{
best_loose_scan_key= key;
best_loose_scan_cost= read_time;
best_loose_scan_records= records;
best_max_loose_keypart= max_loose_keypart;
best_loose_scan_start_key= start_key;
}
}
}
}
void check_ref_access_part2(uint key, KEYUSE *start_key, double records,
double read_time)
{
if (part1_conds_met && read_time < best_loose_scan_cost)
{
/* TODO use rec-per-key-based fanout calculations */
best_loose_scan_key= key;
best_loose_scan_cost= read_time;
best_loose_scan_records= records;
best_max_loose_keypart= max_loose_keypart;
best_loose_scan_start_key= start_key;
}
}
void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick)
{
/* TODO: this the right part restriction: */
if (quick_uses_applicable_index && idx == join->const_tables &&
quick->read_time < best_loose_scan_cost)
{
best_loose_scan_key= quick->index;
best_loose_scan_cost= quick->read_time;
/* this is ok because idx == join->const_tables */
best_loose_scan_records= rows2double(quick->records);
best_max_loose_keypart= quick_max_loose_keypart;
best_loose_scan_start_key= NULL;
}
}
void save_to_position(JOIN_TAB *tab, POSITION *pos)
{
pos->read_time= best_loose_scan_cost;
if (best_loose_scan_cost != DBL_MAX)
{
pos->records_read= best_loose_scan_records;
pos->key= best_loose_scan_start_key;
pos->loosescan_key= best_loose_scan_key;
pos->loosescan_parts= best_max_loose_keypart + 1;
pos->use_join_buffer= FALSE;
pos->table= tab;
// todo need ref_depend_map ?
DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
tab->table->key_info[best_loose_scan_key].name,
best_loose_scan_start_key? "(ref access)":
"(range/index access)"));
}
}
};
void advance_sj_state(JOIN *join, const table_map remaining_tables,
const JOIN_TAB *new_join_tab, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos);
void restore_prev_sj_state(const table_map remaining_tables,
const JOIN_TAB *tab, uint idx);
void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
bool setup_sj_materialization(JOIN_TAB *tab);
TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
SJ_TMP_TABLE *sjtbl);
int do_sj_reset(SJ_TMP_TABLE *sj_tbl);
int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
/*
Temporary table used by semi-join DuplicateElimination strategy
This consists of the temptable itself and data needed to put records
into it. The table's DDL is as follows:
CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
where the primary key can be replaced with unique constraint if n exceeds
the limit (as it is always done for query execution-time temptables).
The record value is a concatenation of rowids of tables from the join we're
executing. If a join table is on the inner side of the outer join, we
assume that its rowid can be NULL and provide means to store this rowid in
the tuple.
*/
class SJ_TMP_TABLE : public Sql_alloc
{
public:
/*
Array of pointers to tables whose rowids compose the temporary table
record.
*/
class TAB
{
public:
JOIN_TAB *join_tab;
uint rowid_offset;
ushort null_byte;
uchar null_bit;
};
TAB *tabs;
TAB *tabs_end;
/*
is_degenerate==TRUE means this is a special case where the temptable record
has zero length (and presence of a unique key means that the temptable can
have either 0 or 1 records).
In this case we don't create the physical temptable but instead record
its state in SJ_TMP_TABLE::have_degenerate_row.
*/
bool is_degenerate;
/*
When is_degenerate==TRUE: the contents of the table (whether it has the
record or not).
*/
bool have_degenerate_row;
/* table record parameters */
uint null_bits;
uint null_bytes;
uint rowid_len;
/* The temporary table itself (NULL means not created yet) */
TABLE *tmp_table;
/*
These are the members we got from temptable creation code. We'll need
them if we'll need to convert table from HEAP to MyISAM/Maria.
*/
ENGINE_COLUMNDEF *start_recinfo;
ENGINE_COLUMNDEF *recinfo;
/* Pointer to next table (next->start_idx > this->end_idx) */
SJ_TMP_TABLE *next;
};
int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
void destroy_sj_tmp_tables(JOIN *join);
int clear_sj_tmp_tables(JOIN *join);
int rewrite_to_index_subquery_engine(JOIN *join);
......@@ -29,6 +29,7 @@
#include "mysql_priv.h"
#include "sql_select.h"
#include "opt_subselect.h"
/*****************************************************************************
......
......@@ -605,7 +605,7 @@ class st_select_lex: public st_select_lex_node
List<TABLE_LIST> top_join_list; /* join list of the top level */
List<TABLE_LIST> *join_list; /* list for the currently parsed join */
TABLE_LIST *embedding; /* table embedding to the above list */
List<TABLE_LIST> sj_nests;
List<TABLE_LIST> sj_nests; /* Semi-join nests within this join */
/*
Beginning of the list of leaves in a FROM clause, where the leaves
inlcude all base tables including view tables. The tables are connected
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -28,6 +28,12 @@
#include "procedure.h"
#include <myisam.h>
#if defined(WITH_MARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
#include "../storage/maria/ha_maria.h"
#define TMP_ENGINE_HTON maria_hton
#else
#define TMP_ENGINE_HTON myisam_hton
#endif
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
#define KEY_OPTIMIZE_REF_OR_NULL 2
......@@ -1195,7 +1201,6 @@ enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
end_of_records);
enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab,
bool end_of_records);
int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
enum_nested_loop_state
end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
......@@ -1329,74 +1334,6 @@ typedef struct st_rollup
List<Item> *fields;
} ROLLUP;
/*
Temporary table used by semi-join DuplicateElimination strategy
This consists of the temptable itself and data needed to put records
into it. The table's DDL is as follows:
CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
where the primary key can be replaced with unique constraint if n exceeds
the limit (as it is always done for query execution-time temptables).
The record value is a concatenation of rowids of tables from the join we're
executing. If a join table is on the inner side of the outer join, we
assume that its rowid can be NULL and provide means to store this rowid in
the tuple.
*/
class SJ_TMP_TABLE : public Sql_alloc
{
public:
/*
Array of pointers to tables whose rowids compose the temporary table
record.
*/
class TAB
{
public:
JOIN_TAB *join_tab;
uint rowid_offset;
ushort null_byte;
uchar null_bit;
};
TAB *tabs;
TAB *tabs_end;
/*
is_confluent==TRUE means this is a special case where the temptable record
has zero length (and presence of a unique key means that the temptable can
have either 0 or 1 records).
In this case we don't create the physical temptable but instead record
its state in SJ_TMP_TABLE::have_confluent_record.
*/
bool is_confluent;
/*
When is_confluent==TRUE: the contents of the table (whether it has the
record or not).
*/
bool have_confluent_row;
/* table record parameters */
uint null_bits;
uint null_bytes;
uint rowid_len;
/* The temporary table itself (NULL means not created yet) */
TABLE *tmp_table;
/*
These are the members we got from temptable creation code. We'll need
them if we'll need to convert table from HEAP to MyISAM/Maria.
*/
ENGINE_COLUMNDEF *start_recinfo;
ENGINE_COLUMNDEF *recinfo;
/* Pointer to next table (next->start_idx > this->end_idx) */
SJ_TMP_TABLE *next;
};
#define SJ_OPT_NONE 0
#define SJ_OPT_DUPS_WEEDOUT 1
......@@ -1711,7 +1648,6 @@ class JOIN :public Sql_alloc
Item_sum ***func);
int rollup_send_data(uint idx);
int rollup_write_data(uint idx, TABLE *table);
void remove_subq_pushed_predicates(Item **where);
/**
Release memory and, if possible, the open tables held by this execution
plan (and nested plans). It's used to release some tables before
......@@ -1763,11 +1699,6 @@ void TEST_join(JOIN *join);
/* Extern functions in sql_select.cc */
bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag);
TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
char* alias);
void free_tmp_table(THD *thd, TABLE *entry);
void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
List<Item> &fields, bool reset_with_sum_func);
bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
......@@ -1776,10 +1707,6 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
uint elements, List<Item> &fields);
void copy_fields(TMP_TABLE_PARAM *param);
void copy_funcs(Item **func_ptr);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
int error, bool ignore_last_dupp_key_error);
uint find_shortest_key(TABLE *table, const key_map *usable_keys);
Field* create_tmp_field_from_field(THD *thd, Field* org_field,
const char *name, TABLE *table,
......@@ -1955,13 +1882,59 @@ int test_if_item_cache_changed(List<Cached_item> &list);
void calc_used_field_length(THD *thd, JOIN_TAB *join_tab);
int join_init_read_record(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(Item* cond, Item *item)
{
return (cond? (new Item_cond_and(cond, item)) : item);
}
bool choose_plan(JOIN *join,table_map join_tables);
void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
double *record_count_arg);
void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
table_map last_remaining_tables,
bool first_alt, uint no_jbuf_before,
double *reopt_rec_count, double *reopt_cost,
double *sj_inner_fanout);
Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
bool *inherited_fl);
bool test_if_ref(COND *root_cond,
Item_field *left_item,Item *right_item);
inline bool optimizer_flag(THD *thd, uint flag)
{
return (thd->variables.optimizer_switch & flag);
}
/* Table elimination entry point function */
void eliminate_tables(JOIN *join);
/* Index Condition Pushdown entry point function */
void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok);
/****************************************************************************
Temporary table support for SQL Runtime
***************************************************************************/
#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
#define AVG_STRING_LENGTH_TO_PACK_ROWS 64
#define RATIO_TO_PACK_ROWS 2
#define MIN_STRING_LENGTH_TO_PACK_ROWS 10
TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
char* alias);
void free_tmp_table(THD *thd, TABLE *entry);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
int error, bool ignore_last_dupp_key_error);
bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
ulonglong options);
bool open_tmp_table(TABLE *table);
void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment