Commit 3f65208c authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT

Modify the join optimizer to specifically try to produce join orders that
can short-cut their execution for ORDER BY..LIMIT clause.

The optimization is controlled by @@optimizer_join_limit_pref_ratio.
Default value 0 means don't construct short-cutting join orders.
Other value means construct short-cutting join order, and prefer it only
if it promises speedup of more than #value times.

In Optimizer Trace, look for these names:
* join_limit_shortcut_is_applicable
* join_limit_shortcut_optimization
* limit_shortcut_choice
parent 1e78e1ef
...@@ -723,6 +723,14 @@ The following specify which files/extra groups are read (specified before remain ...@@ -723,6 +723,14 @@ The following specify which files/extra groups are read (specified before remain
in MariaDB 11.0 as it is not needed with the new 11.0 in MariaDB 11.0 as it is not needed with the new 11.0
optimizer. optimizer.
Use 'ALL' to set all combinations. Use 'ALL' to set all combinations.
--optimizer-join-limit-pref-ratio=#
For queries with JOIN and ORDER BY LIMIT : change the
join plan to one that can short-cut after producing
#LIMIT matches if that promises N times speedup. (That
is, a conservative setting is a high value, like var=100
to change only if this promises 100x) The default is 0
which gives old behavior (don't change no matter what the
speedup)
--optimizer-max-sel-arg-weight=# --optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no The maximum weight of the SEL_ARG graph. Set to 0 for no
limit limit
...@@ -1696,6 +1704,7 @@ old-mode UTF8_IS_UTF8MB3 ...@@ -1696,6 +1704,7 @@ old-mode UTF8_IS_UTF8MB3
old-passwords FALSE old-passwords FALSE
old-style-user-limits FALSE old-style-user-limits FALSE
optimizer-adjust-secondary-key-costs optimizer-adjust-secondary-key-costs
optimizer-join-limit-pref-ratio 0
optimizer-max-sel-arg-weight 32000 optimizer-max-sel-arg-weight 32000
optimizer-max-sel-args 16000 optimizer-max-sel-args 16000
optimizer-prune-level 1 optimizer-prune-level 1
......
#
# MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
#
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
#
# Query 1 - basic example.
#
# Table t1 is not the first, have to use temporary+filesort:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
set optimizer_join_limit_pref_ratio=10;
# t1 is first, key=col1 produces ordering, no filesort or temporary:
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index a,b col1 5 NULL 10 Using where
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order":
[
{
"reconsidering_access_paths_for_index_ordering":
{
"clause": "ORDER BY",
"fanout": 1,
"read_time": 53.27053125,
"table": "t1",
"rows_estimation": 10000,
"possible_keys":
[
{
"index": "a",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "b",
"can_resolve_order": false,
"cause": "not usable index for the query"
},
{
"index": "col1",
"can_resolve_order": true,
"updated_limit": 10,
"index_scan_time": 10,
"records": 10000,
"chosen": true
}
]
}
}
],
"can_skip_filesort": true,
"full_join_cost": 46064.98442,
"shortcut_join_cost": 99.28224614,
"use_shortcut_plan": true
}
]
#
# Query 2 - same as above but without a suitable index.
#
# Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 ALL a NULL NULL NULL 100 Using where; Using temporary; Using filesort
1 SIMPLE t1 ref a,b a 5 test.t10.a 100 Using where
1 SIMPLE t11 ref b b 5 test.t1.b 1
# t1 is first but there's no suitable index,
# so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL a,b NULL NULL NULL 10000 Using where; Using filesort
1 SIMPLE t10 ref a a 5 test.t1.a 1
1 SIMPLE t11 ref b b 5 test.t1.b 1
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
JS
[
{
"limit_fraction": 0.001,
"test_if_skip_sort_order":
[],
"can_skip_filesort": false,
"full_join_cost": 46064.98442,
"shortcut_join_cost": 2097.281246,
"use_shortcut_plan": true
}
]
set optimizer_trace=@tmp_os;
drop table t1, t10, t11;
--echo #
--echo # MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
--echo #
--source include/have_sequence.inc
create table t1 (
a int,
b int,
c int,
col1 int,
col2 int,
index(a),
index(b),
index(col1)
);
insert into t1 select
mod(seq, 100),
mod(seq, 95),
seq,
seq,
seq
from
seq_1_to_10000;
create table t10 (
a int,
a_value char(10),
key(a)
);
insert into t10 select seq, seq from seq_1_to_100;
create table t11 (
b int,
b_value char(10),
key(b)
);
insert into t11 select seq, seq from seq_1_to_100;
set @tmp_os=@@optimizer_trace;
set optimizer_trace=1;
--echo #
--echo # Query 1 - basic example.
--echo #
let $query= explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col1
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
eval $query;
set optimizer_join_limit_pref_ratio=10;
--echo # t1 is first, key=col1 produces ordering, no filesort or temporary:
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
--echo #
--echo # Query 2 - same as above but without a suitable index.
--echo #
let $query=
explain
select
*
from
t1
join t10 on t1.a=t10.a
join t11 on t1.b=t11.b
order by
t1.col2
limit 10;
--echo # Table t1 is not the first, have to use temporary+filesort:
set optimizer_join_limit_pref_ratio=0;
eval $query;
--echo # t1 is first but there's no suitable index,
--echo # so we use filesort but using temporary:
set optimizer_join_limit_pref_ratio=10;
eval $query;
set @trace=(select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@trace, '$**.limit_shortcut_choice')) as JS;
set optimizer_trace=@tmp_os;
drop table t1, t10, t11;
...@@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL ...@@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
READ_ONLY NO READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : change the join plan to one that can short-cut after producing #LIMIT matches if that promises N times speedup. (That is, a conservative setting is a high value, like var=100 to change only if this promises 100x) The default is 0 which gives old behavior (don't change no matter what the speedup)
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_TYPE BIGINT UNSIGNED
......
...@@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL ...@@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
READ_ONLY NO READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : change the join plan to one that can short-cut after producing #LIMIT matches if that promises N times speedup. (That is, a conservative setting is a high value, like var=100 to change only if this promises 100x) The default is 0 which gives old behavior (don't change no matter what the speedup)
NUMERIC_MIN_VALUE 0
NUMERIC_MAX_VALUE 4294967295
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
VARIABLE_SCOPE SESSION VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_TYPE BIGINT UNSIGNED
......
...@@ -758,6 +758,7 @@ typedef struct system_variables ...@@ -758,6 +758,7 @@ typedef struct system_variables
ulong net_retry_count; ulong net_retry_count;
ulong net_wait_timeout; ulong net_wait_timeout;
ulong net_write_timeout; ulong net_write_timeout;
ulonglong optimizer_join_limit_pref_ratio;
ulong optimizer_prune_level; ulong optimizer_prune_level;
ulong optimizer_search_depth; ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit; ulong optimizer_selectivity_sampling_limit;
......
...@@ -233,12 +233,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, ...@@ -233,12 +233,14 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
static Item* part_of_refkey(TABLE *form,Field *field); static Item* part_of_refkey(TABLE *form,Field *field);
uint find_shortest_key(TABLE *table, const key_map *usable_keys); uint find_shortest_key(TABLE *table, const key_map *usable_keys);
static bool test_if_cheaper_ordering(const JOIN_TAB *tab, static bool test_if_cheaper_ordering(bool in_join_optimizer,
const JOIN_TAB *tab,
ORDER *order, TABLE *table, ORDER *order, TABLE *table,
key_map usable_keys, int key, key_map usable_keys, int key,
ha_rows select_limit, ha_rows select_limit,
int *new_key, int *new_key_direction, int *new_key, int *new_key_direction,
ha_rows *new_select_limit, ha_rows *new_select_limit,
double *new_read_time,
uint *new_used_key_parts= NULL, uint *new_used_key_parts= NULL,
uint *saved_best_key_parts= NULL); uint *saved_best_key_parts= NULL);
static int test_if_order_by_key(JOIN *join, static int test_if_order_by_key(JOIN *join,
...@@ -330,6 +332,7 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond); ...@@ -330,6 +332,7 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond);
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit, static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
Item *cond); Item *cond);
bool join_shortcut_limit_is_applicable(JOIN *join);
static static
bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order, bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
key_map *usable_keys); key_map *usable_keys);
...@@ -5803,6 +5806,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, ...@@ -5803,6 +5806,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
join->sort_by_table= get_sort_by_table(join->order, join->group_list, join->sort_by_table= get_sort_by_table(join->order, join->group_list,
join->select_lex->leaf_tables, join->select_lex->leaf_tables,
join->const_table_map); join->const_table_map);
join->limit_shortcut_applicable= join_shortcut_limit_is_applicable(join);
/* /*
Update info on indexes that can be used for search lookups as Update info on indexes that can be used for search lookups as
reading const tables may has added new sargable predicates. reading const tables may has added new sargable predicates.
...@@ -8075,7 +8079,7 @@ best_access_path(JOIN *join, ...@@ -8075,7 +8079,7 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables); loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable()) if (s->table->is_splittable() && !join->limit_optimization_mode)
spl_plan= s->choose_best_splitting(idx, spl_plan= s->choose_best_splitting(idx,
remaining_tables, remaining_tables,
join_positions, join_positions,
...@@ -9161,6 +9165,7 @@ choose_plan(JOIN *join, table_map join_tables) ...@@ -9161,6 +9165,7 @@ choose_plan(JOIN *join, table_map join_tables)
THD *thd= join->thd; THD *thd= join->thd;
DBUG_ENTER("choose_plan"); DBUG_ENTER("choose_plan");
join->limit_optimization_mode= false;
join->cur_embedding_map= 0; join->cur_embedding_map= 0;
reset_nj_counters(join, join->join_list); reset_nj_counters(join, join->join_list);
qsort2_cmp jtab_sort_func; qsort2_cmp jtab_sort_func;
...@@ -9221,6 +9226,8 @@ choose_plan(JOIN *join, table_map join_tables) ...@@ -9221,6 +9226,8 @@ choose_plan(JOIN *join, table_map join_tables)
if (greedy_search(join, join_tables, search_depth, prune_level, if (greedy_search(join, join_tables, search_depth, prune_level,
use_cond_selectivity)) use_cond_selectivity))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
DBUG_ASSERT(!join->limit_optimization_mode);
} }
/* /*
...@@ -10330,6 +10337,365 @@ check_if_edge_table(POSITION *pos, ...@@ -10330,6 +10337,365 @@ check_if_edge_table(POSITION *pos,
} }
/*
@brief
Check if it is potentally possible to short-cut the JOIN execution due to
ORDER BY ... LIMIT clause
@detail
It is possible when the join has "ORDER BY ... LIMIT n" clause, and the
sort+limit operation is done right after the join operation (there's no
grouping or DISTINCT in between).
Then we can potentially build a join plan that enumerates rows in the
ORDER BY order and so will be able to terminate as soon as it has produced
#limit rows.
*/
bool join_shortcut_limit_is_applicable(JOIN *join)
{
/*
Any post-join operation like GROUP BY or DISTINCT or window functions
means we cannot short-cut join execution
*/
if (!join->thd->variables.optimizer_join_limit_pref_ratio ||
!join->order ||
join->select_limit == HA_POS_ERROR ||
join->group_list ||
join->select_distinct ||
join->select_options & SELECT_BIG_RESULT ||
join->rollup.state != ROLLUP::STATE_NONE ||
join->select_lex->have_window_funcs()
// || TODO: aggregates and implicit grouping
)
{
return false;
}
/* If sorting is not done by one table can't do that either */
if (!join->sort_by_table)
return false;
Json_writer_object wrapper(join->thd);
Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable");
trace.add("applicable", 1);
/* It looks like we can short-cut limit due to join */
return true;
}
/*
@brief
Check if we should try building a join order which allows to
short-cut the join operation
@detail
* The optimization must be applicable
* LIMIT should be small enough for short-cutting to make sense
* The join order already built must not be one that allows to shortcut.
*/
JOIN_TAB **join_check_shortcut_limit_now(JOIN *join, uint idx)
{
if (join->limit_shortcut_applicable && idx == join->const_tables &&
!join->emb_sjm_nest &&
join->join_record_count > join->select_limit &&
join->best_positions[join->const_tables].table->table != join->sort_by_table)
{
JOIN_TAB **sort_tbl;
for (sort_tbl= join->best_ref + idx ; *sort_tbl ; sort_tbl++)
{
if ((*sort_tbl)->table == join->sort_by_table)
return sort_tbl;
}
}
return NULL;
}
class Shortcut_opt : public Sql_alloc
{
Json_writer_object wrapper;
Json_writer_object trace;
Json_writer_array trace_array;
public:
Shortcut_opt(THD *thd):
wrapper(thd),
trace(thd, "join_limit_shortcut_optimization"),
trace_array(thd, "opt")
{}
double save_join_record_count;
double save_best_read;
POSITION *save_best_pos;
};
/*
@brief
Start building the join order that allows to short-cut the join execution
@detail
This must be accompanied by join_end_shortcut_limit_run() call.
Basic idea: save the plan we've produced so far and switch to building
the plan that can be cut short.
*/
Shortcut_opt *join_start_shortcut_limit_run(JOIN *join)
{
THD *thd= join->thd;
// We expect that there is some query plan already.
DBUG_ASSERT(join->best_read < DBL_MAX);
Shortcut_opt *opt;
POSITION *pos;
if (!(opt= new (thd->mem_root) Shortcut_opt(thd)) ||
!(pos= (POSITION*)alloc_root(thd->mem_root,
sizeof(POSITION)*
(join->table_count + 1))))
return NULL;
memcpy((uchar*)pos, (uchar*)join->best_positions,
sizeof(POSITION)*join->table_count);
opt->save_join_record_count= join->join_record_count;
opt->save_best_read= join->best_read;
opt->save_best_pos= pos;
join->best_read= DBL_MAX;
join->limit_optimization_mode= true;
return opt;
}
/*
@brief
Check if we could use an index-based access method to produce rows
in the order for ORDER BY ... LIMIT.
@detail
This should do what test_if_skip_sort_order() does. We can't call that
function here for two reasons:
1. We're at the join optimization stage and have not done query plan
fix-ups done in get_best_combination() and co.
2. The code in test_if_skip_sort_order() does modify query plan structures,
for example it may change the table's quick select. This is done even
if it's called with no_changes=true parameter.
@return
true - Can skip.
false - Cannot skip.
*/
bool test_if_skip_sort_order_early(JOIN *join,
bool *have_first_table_cost,
double *first_table_cost)
{
const POSITION *pos= &join->best_positions[join->const_tables];
TABLE *table= pos->table->table;
key_map usable_keys= table->keys_in_use_for_order_by;
*have_first_table_cost= false;
// Step #1: Find indexes that produce the required ordering.
if (find_indexes_matching_order(join, table, join->order, &usable_keys))
{
return false; // Cannot skip sorting
}
// Step #2: Check if the index we're using produces the needed ordering
uint ref_key;
if (pos->key)
{
// We're guaranteed to have no splitting in limit_optimization_mode
DBUG_ASSERT(!pos->spl_plan);
// Mirror the (wrong) logic in test_if_skip_sort_order:
if (pos->type == JT_REF_OR_NULL)
return false; // Use filesort
ref_key= pos->key->key;
}
else
{
if (pos->table->quick)
{
if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
ref_key= pos->table->quick->index;
else
ref_key= MAX_KEY;
}
else
ref_key= MAX_KEY;
}
if (ref_key != MAX_KEY && usable_keys.is_set(ref_key))
{
return true; // we're using an index that produces the reqired ordering.
}
/*
Step #3: check if we can switch to using an index that would produce the
ordering.
(But don't actually switch, this will be done by test_if_skip_sort_order)
*/
int best_key= -1;
uint UNINIT_VAR(best_key_parts);
uint saved_best_key_parts= 0;
int best_key_direction= 0;
JOIN_TAB *tab= pos->table;
ha_rows new_limit;
double new_read_time;
if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE,
tab, join->order, table, usable_keys,
ref_key, join->select_limit,
&best_key, &best_key_direction,
&new_limit, &new_read_time,
&best_key_parts,
&saved_best_key_parts))
{
// Ok found a way to skip sorting
*have_first_table_cost= true;
*first_table_cost= new_read_time;
return true;
}
return false;
}
/*
Compute the cost of join assuming we only need fraction_limit
of the output.
*/
double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting,
double *first_table_cost,
double fraction)
{
POSITION *pos= join->best_positions + join->const_tables;
/*
Generally, we assume that producing X% of output takes X% of the cost.
*/
double partial_join_cost= join->best_read * fraction;
if (skip_sorting)
{
if (first_table_cost)
{
partial_join_cost -= pos->read_time*fraction;
// todo: also records_read?
partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost);
}
}
else //(!skip_sorting)
{
DBUG_ASSERT(!first_table_cost);
/*
However, if we used filesort() for the first table, we still had to
read all rows and check the WHERE for them.
Add the substracted part pack:
*/
double extra_first_table_cost= pos->read_time * (1.0 - fraction);
double extra_first_table_where= pos->records_read * (1.0 - fraction) /
TIME_FOR_COMPARE;
partial_join_cost= COST_ADD(partial_join_cost,
COST_ADD(extra_first_table_cost,
extra_first_table_where));
}
return partial_join_cost;
}
/*
@brief
Finalize building the join order which allows to short-cut the join
execution.
@detail
This is called after we have produced a join order that allows short-
cutting.
Here, we decide if it is cheaper to use this one or the original join
order.
@seealso
join_start_shortcut_limit_run
*/
void join_end_shortcut_limit_run(JOIN *join, Shortcut_opt *opt)
{
join->limit_optimization_mode= false;
bool use_shortcut_plan= false;
{
Json_writer_object wrapper(join->thd);
Json_writer_object trace(join->thd, "limit_shortcut_choice");
if (join->best_read < DBL_MAX)
{
/* We have produced a query plan with a matching join order */
/* Check which fraction of join output we need */
double fraction= 1.0;
if (join->join_record_count > join->select_limit)
{
fraction= join->select_limit / join->join_record_count;
trace.add("limit_fraction", fraction);
}
/*
TODO: here, check if the first table's access method produces the
required ordering.
Possible options:
1. Yes: we can just take a fraction of the execution cost.
2A No: change the access method to one that does produce
the required ordering, update the costs.
2B No: Need to pass the first table to filesort().
*/
bool skip_sorting;
double first_table_cost;
bool have_first_table_cost;
{
Json_writer_array tmp(join->thd, "test_if_skip_sort_order");
skip_sorting= test_if_skip_sort_order_early(join,
&have_first_table_cost,
&first_table_cost);
}
trace.add("can_skip_filesort", skip_sorting);
double limited_cost=
recompute_join_cost_with_limit(join, skip_sorting,
have_first_table_cost? &first_table_cost:(double*)NULL,
fraction);
trace.add("full_join_cost", join->best_read);
trace.add("shortcut_join_cost", limited_cost);
double needed_speedup = (double)join->thd->variables.optimizer_join_limit_pref_ratio;
if (limited_cost * needed_speedup < opt->save_best_read)
{
// LIMIT plan is cheaper.
// It is already in join->best_positions so do nothing
// (TODO: update the cost in join->best_read?)
// LIMIT short-cutting will be done automatically.
// test_if_skip_sort_order() may switch to use index-based access
// method
use_shortcut_plan= true;
}
trace.add("use_shortcut_plan", use_shortcut_plan);
}
else
trace.add("got_shortcut_plan", false);
}
if (!use_shortcut_plan)
{
// Restore back the original plan
memcpy((uchar*)join->best_positions, (uchar*)opt->save_best_pos,
sizeof(POSITION)*join->table_count);
join->join_record_count= opt->save_join_record_count;
join->best_read= opt->save_best_read;
}
delete opt;
}
/** /**
Find a good, possibly optimal, query execution plan (QEP) by a possibly Find a good, possibly optimal, query execution plan (QEP) by a possibly
exhaustive search. exhaustive search.
...@@ -10497,6 +10863,8 @@ best_extension_by_limited_search(JOIN *join, ...@@ -10497,6 +10863,8 @@ best_extension_by_limited_search(JOIN *join,
if (join->emb_sjm_nest) if (join->emb_sjm_nest)
allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map; allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
Shortcut_opt *optimizing_shortcut= NULL;
for (pos= join->best_ref + idx ; (s= *pos) ; pos++) for (pos= join->best_ref + idx ; (s= *pos) ; pos++)
{ {
table_map real_table_bit= s->table->map; table_map real_table_bit= s->table->map;
...@@ -10682,13 +11050,39 @@ best_extension_by_limited_search(JOIN *join, ...@@ -10682,13 +11050,39 @@ best_extension_by_limited_search(JOIN *join,
if (best_res == SEARCH_FOUND_EDGE) if (best_res == SEARCH_FOUND_EDGE)
{ {
trace_one_table.add("pruned_by_hanging_leaf", true); trace_one_table.add("pruned_by_hanging_leaf", true);
/*
If we're building a join order for order by..limit shortcut, we
must end with building some query plan.
*/
DBUG_ASSERT(!optimizing_shortcut);
goto end; goto end;
} }
} }
if (optimizing_shortcut)
{
join_end_shortcut_limit_run(join, optimizing_shortcut);
break;
}
JOIN_TAB **sort_tbl;
/*
If we're at the start of the join order and have tried all the tables,
try also building a join order that can short-cut the join execution
(todo: describe how this would work with pruning?)
*/
if (!pos[1] && (sort_tbl= join_check_shortcut_limit_now(join, idx)))
{
// Do another pass by putting the table of interest first.
if (!(optimizing_shortcut= join_start_shortcut_limit_run(join)))
DBUG_RETURN(SEARCH_ERROR);
pos= sort_tbl - 1;
}
} }
best_res= SEARCH_OK; best_res= SEARCH_OK;
end: end:
DBUG_ASSERT(!optimizing_shortcut || !join->limit_optimization_mode);
/* Restore original table order */ /* Restore original table order */
if (!*pos) if (!*pos)
pos--; // Revert last pos++ in for loop pos--; // Revert last pos++ in for loop
...@@ -24692,7 +25086,8 @@ void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table, ...@@ -24692,7 +25086,8 @@ void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table,
@brief @brief
Given a table and order, find indexes that produce rows in the order Given a table and order, find indexes that produce rows in the order
@param usable_keys OUT Bitmap of indexes that produce rows in order. @param usable_keys IN Bitmap of keys we can use
OUT Bitmap of indexes that produce rows in order.
@return @return
false Some indexes were found false Some indexes were found
...@@ -24990,11 +25385,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, ...@@ -24990,11 +25385,13 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
int best_key_direction= 0; int best_key_direction= 0;
JOIN *join= tab->join; JOIN *join= tab->join;
ha_rows table_records= table->stat_records(); ha_rows table_records= table->stat_records();
double new_read_time_dummy;
test_if_cheaper_ordering(tab, order, table, usable_keys, test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys,
ref_key, select_limit, ref_key, select_limit,
&best_key, &best_key_direction, &best_key, &best_key_direction,
&select_limit, &best_key_parts, &select_limit, &new_read_time_dummy,
&best_key_parts,
&saved_best_key_parts); &saved_best_key_parts);
/* /*
...@@ -30250,11 +30647,13 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, ...@@ -30250,11 +30647,13 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab,
*/ */
static bool static bool
test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, test_if_cheaper_ordering(bool in_join_optimizer,
const JOIN_TAB *tab, ORDER *order, TABLE *table,
key_map usable_keys, int ref_key, key_map usable_keys, int ref_key,
ha_rows select_limit_arg, ha_rows select_limit_arg,
int *new_key, int *new_key_direction, int *new_key, int *new_key_direction,
ha_rows *new_select_limit, uint *new_used_key_parts, ha_rows *new_select_limit, double *new_read_time,
uint *new_used_key_parts,
uint *saved_best_key_parts) uint *saved_best_key_parts)
{ {
DBUG_ENTER("test_if_cheaper_ordering"); DBUG_ENTER("test_if_cheaper_ordering");
...@@ -30312,7 +30711,14 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, ...@@ -30312,7 +30711,14 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (join) if (join)
{ {
uint tablenr= (uint)(tab - join->join_tab); uint tablenr;
/*
The below two should be always the same. Just being extra cautious:
*/
if (in_join_optimizer)
tablenr= join->const_tables;
else
tablenr= (uint)(tab - join->join_tab);
read_time= join->best_positions[tablenr].read_time; read_time= join->best_positions[tablenr].read_time;
records= join->best_positions[tablenr].records_read; records= join->best_positions[tablenr].records_read;
for (uint i= tablenr+1; i < join->table_count; i++) for (uint i= tablenr+1; i < join->table_count; i++)
...@@ -30350,7 +30756,24 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, ...@@ -30350,7 +30756,24 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
Calculate the selectivity of the ref_key for REF_ACCESS. For Calculate the selectivity of the ref_key for REF_ACCESS. For
RANGE_ACCESS we use table->opt_range_condition_rows. RANGE_ACCESS we use table->opt_range_condition_rows.
*/ */
if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF) if (in_join_optimizer)
{
/*
Again, join->best_positions[...].records_read should have the
same value for practical purposes.
It can be different, because best_access_path() uses
actual_rec_per_key() while the code below uses rec_per_key
but here the code below is wrong.
*/
if (ref_key >= 0 && ref_key != MAX_KEY &&
join->positions[join->const_tables].type == JT_REF)
{
refkey_rows_estimate=
(ha_rows)join->best_positions[join->const_tables].records_read;
set_if_bigger(refkey_rows_estimate, 1);
}
}
else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
{ {
/* /*
If ref access uses keypart=const for all its key parts, If ref access uses keypart=const for all its key parts,
...@@ -30680,6 +31103,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, ...@@ -30680,6 +31103,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
*new_key= best_key; *new_key= best_key;
*new_key_direction= best_key_direction; *new_key_direction= best_key_direction;
*new_select_limit= has_limit ? best_select_limit : table_records; *new_select_limit= has_limit ? best_select_limit : table_records;
*new_read_time= read_time;
if (new_used_key_parts != NULL) if (new_used_key_parts != NULL)
*new_used_key_parts= best_key_parts; *new_used_key_parts= best_key_parts;
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
...@@ -30779,10 +31203,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, ...@@ -30779,10 +31203,11 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
table->opt_range_condition_rows= table->stat_records(); table->opt_range_condition_rows= table->stat_records();
int key, direction; int key, direction;
if (test_if_cheaper_ordering(NULL, order, table, double new_cost;
if (test_if_cheaper_ordering(FALSE, NULL, order, table,
table->keys_in_use_for_order_by, -1, table->keys_in_use_for_order_by, -1,
limit, limit,
&key, &direction, &limit) && &key, &direction, &limit, &new_cost) &&
!is_key_used(table, key, table->write_set)) !is_key_used(table, key, table->write_set))
{ {
*need_sort= FALSE; *need_sort= FALSE;
...@@ -1205,6 +1205,10 @@ class JOIN :public Sql_alloc ...@@ -1205,6 +1205,10 @@ class JOIN :public Sql_alloc
passing 1st non-const table to filesort(). NULL means no such table exists. passing 1st non-const table to filesort(). NULL means no such table exists.
*/ */
TABLE *sort_by_table; TABLE *sort_by_table;
bool limit_shortcut_applicable;
bool limit_optimization_mode;
/* /*
Number of tables in the join. Number of tables in the join.
(In MySQL, it is named 'tables' and is also the number of elements in (In MySQL, it is named 'tables' and is also the number of elements in
......
...@@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( ...@@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));
static Sys_var_ulonglong Sys_optimizer_join_limit_pref_ratio(
"optimizer_join_limit_pref_ratio",
"For queries with JOIN and ORDER BY LIMIT : change the join plan "
"to one that can short-cut after producing #LIMIT matches if that "
"promises N times speedup. "
"(That is, a conservative setting is a high value, like var=100 to "
"change only if this promises 100x) "
"The default is 0 which gives old behavior (don't change no matter "
"what the speedup)",
SESSION_VAR(optimizer_join_limit_pref_ratio),
CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, UINT_MAX),
DEFAULT(0), BLOCK_SIZE(1));
static Sys_var_ulong Sys_optimizer_use_condition_selectivity( static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
"optimizer_use_condition_selectivity", "optimizer_use_condition_selectivity",
"Controls selectivity of which conditions the optimizer takes into " "Controls selectivity of which conditions the optimizer takes into "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment