Commit 87eccd78 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-30218: Incorrect optimization for rowid_filtering

(Patch from Monty, slightly amended)

Fix rowid filtering optimization in best_access_path():

== Ref access + rowid filtering ==
The cost computations compare #records and index-only scan cost
(keyread_tmp) to find out the per-record advantage one will get if
they skip reading full table record.

The computations produce wrong result when:

- the #records are "clipped down" with s->worst_seeks or
  thd->variables.max_seeks_for_key. keyread_tmp is not clipped
  this way so the numbers are not comparable.

- access_factor is negative. This means index_only read is
  cheaper than non-index-only read.

This patch makes the optimizer not to consider Rowid Filtering in
such cases.
The decision is logged in the Optimizer Trace using
"rowid_filter_skipped" name.

== Range access + rowid filtering ==
when considering to use Rowid Filter with range access, do multiply
keyread_tmp by record_count. That way, it is comparable with the
range access's estimate, which is multiplied by record_count.
parent fdf43b5c
This diff is collapsed.
......@@ -1016,6 +1016,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b {
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 200,
"chosen": true
......@@ -1072,6 +1073,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b {
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 200,
"chosen": true
......@@ -2118,6 +2120,7 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 {
"access_type": "ref",
"index": "a_c",
"used_range_estimates": true,
"rowid_filter_skipped": "worst/max seeks clipping",
"rows": 180,
"cost": 92,
"chosen": true
......@@ -3343,6 +3346,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
"access_type": "ref",
"index": "pk",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 2,
"chosen": true
......@@ -3351,6 +3355,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
"access_type": "ref",
"index": "pk_a",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 2,
"chosen": false,
......@@ -3360,6 +3365,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
"access_type": "ref",
"index": "pk_a_b",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 1.0043,
"chosen": true
......@@ -3994,6 +4000,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
"index": "a",
"used_range_estimates": false,
"cause": "not better than ref estimates",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 3.007,
"chosen": true
......@@ -4049,6 +4056,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
"index": "a",
"used_range_estimates": false,
"cause": "not better than ref estimates",
"rowid_filter_skipped": "worst/max seeks clipping",
"rows": 2,
"cost": 3.014,
"chosen": true
......@@ -8145,6 +8153,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
"index": "b",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 20,
"chosen": true
......@@ -8370,6 +8379,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 20,
"chosen": true
......@@ -8445,6 +8455,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 200,
"chosen": true
......
......@@ -208,6 +208,7 @@ explain select * from t1 where pk1 != 0 and key1 = 1 {
"access_type": "ref",
"index": "key1",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 2,
"chosen": true
......
......@@ -281,7 +281,7 @@ INSERT INTO t1 VALUES
(33,5),(33,5),(33,5),(33,5),(34,5),(35,5);
EXPLAIN SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref|filter a,b b|a 5|5 const 15 (5%) Using where; Using rowid filter
1 SIMPLE t1 range a,b a 5 NULL 2 Using index condition; Using where
SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
a b
DROP TABLE t1;
......
This diff is collapsed.
This diff is collapsed.
......@@ -48,39 +48,3 @@ ERROR 70100: Query execution was interrupted
set debug_sync='RESET';
drop table t2,t3;
set default_storage_engine=default;
set @save_optimizer_switch= @@optimizer_switch;
set @save_use_stat_tables= @@use_stat_tables;
set @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
set @@use_stat_tables=preferably;
set optimizer_use_condition_selectivity=2;
set optimizer_switch='rowid_filter=on';
#
# MDEV-22761 KILL QUERY during rowid_filter, crashes
# (The smaller testcase)
#
CREATE TABLE t1 (a INT, b INT, INDEX(a), INDEX(b)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (0,0),(1,0),(-1,1), (-2,1), (-2,3), (-3,4), (-2,4);
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
set debug_sync='handler_rowid_filter_check SIGNAL killme WAIT_FOR go';
SELECT * FROM t1 WHERE a > 0 AND b=0;
connect con1, localhost, root,,;
set debug_sync='now WAIT_FOR killme';
kill query @id;
set debug_sync='now SIGNAL go';
connection default;
ERROR 70100: Query execution was interrupted
set debug_sync='RESET';
disconnect con1;
drop table t1;
set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set @@optimizer_switch=@save_optimizer_switch;
set @@use_stat_tables=@save_use_stat_tables;
......@@ -5,55 +5,3 @@ set default_storage_engine=innodb;
--source include/rowid_filter_debug_kill.inc
set default_storage_engine=default;
--source include/default_optimizer_switch.inc
--source include/count_sessions.inc
set @save_optimizer_switch= @@optimizer_switch;
set @save_use_stat_tables= @@use_stat_tables;
set @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
set @@use_stat_tables=preferably;
set optimizer_use_condition_selectivity=2;
set optimizer_switch='rowid_filter=on';
--echo #
--echo # MDEV-22761 KILL QUERY during rowid_filter, crashes
--echo # (The smaller testcase)
--echo #
CREATE TABLE t1 (a INT, b INT, INDEX(a), INDEX(b)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (0,0),(1,0),(-1,1), (-2,1), (-2,3), (-3,4), (-2,4);
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
let $ID= `SELECT @id := CONNECTION_ID()`;
set debug_sync='handler_rowid_filter_check SIGNAL killme WAIT_FOR go';
send SELECT * FROM t1 WHERE a > 0 AND b=0;
connect (con1, localhost, root,,);
let $ignore= `SELECT @id := $ID`;
set debug_sync='now WAIT_FOR killme';
kill query @id;
set debug_sync='now SIGNAL go';
connection default;
--error ER_QUERY_INTERRUPTED
reap;
set debug_sync='RESET';
disconnect con1;
drop table t1;
set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set @@optimizer_switch=@save_optimizer_switch;
set @@use_stat_tables=@save_use_stat_tables;
--source include/wait_until_count_sessions.inc
......@@ -1661,7 +1661,7 @@ Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1`
# gives selectivity data
explain extended select * from t1 where a in (17,51,5) and b=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ref|filter b,a b|a 5|5 const 58 (3%) 2.90 Using where; Using rowid filter
1 SIMPLE t1 ref b,a b 5 const 58 2.90 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` = 2 and `test`.`t1`.`a` in (17,51,5)
drop table t1;
......
......@@ -1671,7 +1671,7 @@ Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1`
# gives selectivity data
explain extended select * from t1 where a in (17,51,5) and b=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ref|filter b,a b|a 5|5 const 59 (3%) 2.90 Using where; Using rowid filter
1 SIMPLE t1 ref b,a b 5 const 59 2.90 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` = 2 and `test`.`t1`.`a` in (17,51,5)
drop table t1;
......
......@@ -7887,7 +7887,9 @@ best_access_path(JOIN *join,
} /* not ft_key */
if (records < DBL_MAX &&
(found_part & 1)) // start_key->key can be used for index access
(found_part & 1) && // start_key->key can be used for index access
(s->table->file->index_flags(start_key->key,0,1) &
HA_DO_RANGE_FILTER_PUSHDOWN))
{
double rows= record_count * records;
......@@ -7912,23 +7914,50 @@ best_access_path(JOIN *join,
cost_of_fetching_1_row = tmp/rows
cost_of_fetching_1_key_tuple = keyread_tmp/rows
Note that access_cost_factor may be greater than 1.0. In this case
we still can expect a gain of using rowid filter due to smaller number
of checks for conditions pushed to the joined table.
access_cost_factor is the gain we expect for using rowid filter.
An access_cost_factor of 1.0 means that keyread_tmp is 0
(using key read is infinitely fast) and the gain for each row when
using filter is great.
An access_cost_factor if 0.0 means that using keyread has the
same cost as reading rows, so there is no gain to get with
filter.
access_cost_factor should never be bigger than 1.0 (if all
calculations are correct) as the cost of keyread should always be
smaller than the cost of fetching the same number of keys + rows.
access_cost_factor should also never be smaller than 0.0.
The one exception is if number of records is 1 (eq_ref), then
because we are comparing rows to cost of keyread_tmp, keyread_tmp
is higher by 1.0. This is a big that will be fixed in a later
version.
If we have limited the cost (=tmp) of reading rows with 'worst_seek'
we cannot use filters as the cost calculation below would cause
tmp to become negative. The future resultion is to not limit
cost with worst_seek.
We cannot use filter with JT_EQ_REF as in this case 'tmp' is
number of rows from prev_record_read() and keyread_tmp is 0. These
numbers are not usable with rowid filter code.
*/
double rows_access_cost= MY_MIN(rows, s->worst_seeks);
double access_cost_factor= MY_MIN((rows_access_cost - keyread_tmp) /
rows, 1.0);
filter=
table->best_range_rowid_filter_for_partial_join(start_key->key, rows,
access_cost_factor);
if (filter)
{
filter->get_cmp_gain(rows);
tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows);
DBUG_ASSERT(tmp >= 0);
trace_access_idx.add("rowid_filter_key",
s->table->key_info[filter->key_no].name);
double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
if (!(records < s->worst_seeks &&
records <= thd->variables.max_seeks_for_key))
trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping");
else if (access_cost_factor <= 0.0)
trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0");
else if (type != JT_EQ_REF)
{
filter=
table->best_range_rowid_filter_for_partial_join(start_key->key,
rows,
access_cost_factor);
if (filter)
{
tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows);
DBUG_ASSERT(tmp >= 0);
trace_access_idx.add("rowid_filter_key",
s->table->key_info[filter->key_no].name);
}
}
}
trace_access_idx.add("rows", records).add("cost", tmp);
......@@ -8070,7 +8099,8 @@ best_access_path(JOIN *join,
access (see first else-branch below), but we don't take it into
account here for range/index_merge access. Find out why this is so.
*/
double cmp_time= (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE;
double cmp_time= ((s->found_records - rnd_records)/
(double) TIME_FOR_COMPARE);
tmp= COST_MULT(record_count,
COST_ADD(s->quick->read_time, cmp_time));
......@@ -8080,16 +8110,23 @@ best_access_path(JOIN *join,
uint key_no= s->quick->index;
/* See the comment concerning using rowid filter for with ref access */
keyread_tmp= s->table->quick_index_only_costs[key_no];
keyread_tmp= s->table->quick_index_only_costs[key_no] * record_count;
double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
filter=
s->table->best_range_rowid_filter_for_partial_join(key_no, rows,
access_cost_factor);
if (filter)
if (access_cost_factor > 0.0)
{
tmp-= filter->get_adjusted_gain(rows);
DBUG_ASSERT(tmp >= 0);
filter=
s->table->
best_range_rowid_filter_for_partial_join(key_no, rows,
access_cost_factor);
if (filter)
{
tmp-= filter->get_adjusted_gain(rows);
DBUG_ASSERT(tmp >= 0);
}
}
else
trace_access_scan.add("rowid_filter_skipped", "cost_factor <= 0");
type= JT_RANGE;
}
else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment