Commit d1a46c68 authored by Igor Babaev's avatar Igor Babaev Committed by Oleksandr Byelkin

MDEV-30218 Incorrect optimization for rowid_filtering

Correction over the last patch for this MDEV.
parent 03c9a4ef
......@@ -2085,7 +2085,7 @@ ON t6.b >= 2 AND t5.b=t7.b AND
(t8.a > 0 OR t8.c IS NULL) AND t6.a>0 AND t7.a>0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t5 ALL NULL NULL NULL NULL 3
1 SIMPLE t7 ref PRIMARY,b_i b_i 5 test.t5.b 2 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t7 ref|filter PRIMARY,b_i b_i|PRIMARY 5|4 test.t5.b 2 (29%) Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan; Using rowid filter
1 SIMPLE t6 range PRIMARY,b_i PRIMARY 4 NULL 3 Using where; Rowid-ordered scan; Using join buffer (incremental, BNL join)
1 SIMPLE t8 ref b_i b_i 5 test.t5.b 2 Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
SELECT t5.a,t5.b,t6.a,t6.b,t7.a,t7.b,t8.a,t8.b
......
......@@ -1016,7 +1016,6 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b {
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 200,
"chosen": true
......@@ -1073,7 +1072,6 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b {
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 200,
"chosen": true
......@@ -2120,7 +2118,6 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 {
"access_type": "ref",
"index": "a_c",
"used_range_estimates": true,
"rowid_filter_skipped": "worst/max seeks clipping",
"rows": 180,
"cost": 92,
"chosen": true
......@@ -3346,7 +3343,6 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
"access_type": "ref",
"index": "pk",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 2,
"chosen": true
......@@ -3355,7 +3351,6 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
"access_type": "ref",
"index": "pk_a",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 2,
"chosen": false,
......@@ -3365,7 +3360,6 @@ explain select * from t1 where pk = 2 and a=5 and b=1 {
"access_type": "ref",
"index": "pk_a_b",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 1.0043,
"chosen": true
......@@ -3974,6 +3968,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
"best_access_path": {
"considered_access_paths": [
{
"rowid_filter_skipped": "cost_factor <= 0",
"access_type": "range",
"resulting_rows": 3,
"cost": 1.407,
......@@ -4000,7 +3995,6 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
"index": "a",
"used_range_estimates": false,
"cause": "not better than ref estimates",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 3.007,
"chosen": true
......@@ -4030,6 +4024,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
"best_access_path": {
"considered_access_paths": [
{
"rowid_filter_skipped": "cost_factor <= 0",
"access_type": "range",
"resulting_rows": 3,
"cost": 1.407,
......@@ -4056,7 +4051,6 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 {
"index": "a",
"used_range_estimates": false,
"cause": "not better than ref estimates",
"rowid_filter_skipped": "worst/max seeks clipping",
"rows": 2,
"cost": 3.014,
"chosen": true
......@@ -8069,7 +8063,6 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
"index": "b",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 20,
"chosen": true
......@@ -8273,7 +8266,6 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 20,
"chosen": true
......@@ -8341,7 +8333,6 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
"index": "a",
"used_range_estimates": false,
"cause": "not available",
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 200,
"chosen": true
......
......@@ -208,7 +208,6 @@ explain select * from t1 where pk1 != 0 and key1 = 1 {
"access_type": "ref",
"index": "key1",
"used_range_estimates": true,
"rowid_filter_skipped": "cost_factor <= 0",
"rows": 1,
"cost": 2,
"chosen": true
......
......@@ -281,7 +281,7 @@ INSERT INTO t1 VALUES
(33,5),(33,5),(33,5),(33,5),(34,5),(35,5);
EXPLAIN SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a,b a 5 NULL 2 Using index condition; Using where
1 SIMPLE t1 ref|filter a,b b|a 5|5 const 15 (5%) Using where; Using rowid filter
SELECT * FROM t1 WHERE a IN(1,2) AND b=5;
a b
DROP TABLE t1;
......
This diff is collapsed.
This diff is collapsed.
......@@ -3744,7 +3744,7 @@ EXPLAIN SELECT * FROM t1
WHERE ID_better=1 AND ID1_with_null IS NULL AND
(ID2_with_null=1 OR ID2_with_null=2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref idx1,idx2 idx2 4 const 2 Using where
1 SIMPLE t1 ref|filter idx1,idx2 idx1|idx2 5|4 const 2 (1%) Using index condition; Using where; Using rowid filter
DROP TABLE t1;
CREATE TABLE t1 (a INT, ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, KEY ts(ts));
INSERT INTO t1 VALUES (30,"2006-01-03 23:00:00"), (31,"2006-01-03 23:00:00");
......
......@@ -3755,7 +3755,7 @@ EXPLAIN SELECT * FROM t1
WHERE ID_better=1 AND ID1_with_null IS NULL AND
(ID2_with_null=1 OR ID2_with_null=2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref idx1,idx2 idx2 4 const 2 Using where
1 SIMPLE t1 ref|filter idx1,idx2 idx1|idx2 5|4 const 2 (1%) Using index condition; Using where; Using rowid filter
DROP TABLE t1;
CREATE TABLE t1 (a INT, ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, KEY ts(ts));
INSERT INTO t1 VALUES (30,"2006-01-03 23:00:00"), (31,"2006-01-03 23:00:00");
......
......@@ -3744,7 +3744,7 @@ EXPLAIN SELECT * FROM t1
WHERE ID_better=1 AND ID1_with_null IS NULL AND
(ID2_with_null=1 OR ID2_with_null=2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref idx1,idx2 idx2 4 const 2 Using where
1 SIMPLE t1 ref|filter idx1,idx2 idx1|idx2 5|4 const 2 (1%) Using index condition; Using where; Using rowid filter
DROP TABLE t1;
CREATE TABLE t1 (a INT, ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, KEY ts(ts));
INSERT INTO t1 VALUES (30,"2006-01-03 23:00:00"), (31,"2006-01-03 23:00:00");
......
......@@ -1661,7 +1661,7 @@ Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1`
# gives selectivity data
explain extended select * from t1 where a in (17,51,5) and b=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ref b,a b 5 const 58 2.90 Using where
1 SIMPLE t1 ref|filter b,a b|a 5|5 const 58 (3%) 2.90 Using where; Using rowid filter
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`b` = 2 and `test`.`t1`.`a` in (17,51,5)
drop table t1;
......
......@@ -7557,7 +7557,6 @@ best_access_path(JOIN *join,
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
Json_writer_object trace_access_idx(thd);
double eq_ref_rows= 0;
/*
full text keys require special treatment
*/
......@@ -7596,8 +7595,7 @@ best_access_path(JOIN *join,
type= JT_EQ_REF;
trace_access_idx.add("access_type", join_type_str[type])
.add("index", keyinfo->name);
eq_ref_rows= tmp = prev_record_reads(join_positions, idx,
found_ref);
tmp = prev_record_reads(join_positions, idx, found_ref);
records=1.0;
}
else
......@@ -7904,28 +7902,7 @@ best_access_path(JOIN *join,
(s->table->file->index_flags(start_key->key,0,1) &
HA_DO_RANGE_FILTER_PUSHDOWN))
{
double rows;
if (type == JT_EQ_REF)
{
/*
Treat EQ_REF access in a special way:
1. We have no cost for index-only read. Assume its cost is 50% of
the cost of the full read.
2. A regular ref access will do #record_count lookups, but eq_ref
has "lookup cache" which reduces the number of lookups made.
The estimation code uses prev_record_reads() call to estimate:
tmp = prev_record_reads(join_positions, idx, found_ref);
Set the effective number of rows from "tmp" here.
*/
keyread_tmp= COST_ADD(eq_ref_rows / 2, s->startup_cost);
rows= eq_ref_rows;
}
else
rows= record_count * records;
double rows= record_count * records;
/*
If we use filter F with selectivity s the the cost of fetching data
by key using this filter will be
......@@ -7947,46 +7924,53 @@ best_access_path(JOIN *join,
cost_of_fetching_1_row = tmp/rows
cost_of_fetching_1_key_tuple = keyread_tmp/rows
access_cost_factor is the gain we expect for using rowid filter.
An access_cost_factor of 1.0 means that keyread_tmp is 0
(using key read is infinitely fast) and the gain for each row when
using filter is great.
An access_cost_factor if 0.0 means that using keyread has the
same cost as reading rows, so there is no gain to get with
filter.
access_cost_factor should never be bigger than 1.0 (if all
calculations are correct) as the cost of keyread should always be
smaller than the cost of fetching the same number of keys + rows.
access_cost_factor should also never be smaller than 0.0.
The one exception is if number of records is 1 (eq_ref), then
because we are comparing rows to cost of keyread_tmp, keyread_tmp
is higher by 1.0. This is a big that will be fixed in a later
version.
If we have limited the cost (=tmp) of reading rows with 'worst_seek'
we cannot use filters as the cost calculation below would cause
tmp to become negative. The future resultion is to not limit
cost with worst_seek.
Here's a more detailed explanation that uses the formulas behind
the function the call filter->get_adjusted_gain(). The function
takes as a parameter the number of probes/look-ups into the filter
that is equal to the number of fetched key entries that is equal to
the number of row fetches when no filter is used (assuming no
index condition pushdown is employed for the used key access).
Let this number be N. Then the total gain from using the filter is
N*a_adj - b where b is the cost of building the filter and
a_adj is calcilated as follows:
a - (1-access_cost_factor)*(1-s) =
(1+1_cond_eval_cost)*(1-s)-1_probe_cost - (1-access_cost_factor)*(1-s)
= (1-s)*(1_cond_eval_cost+access_cost_factor) - 1_probe_cost.
Here ((1-s)*(1_cond_eval_cost) * N is the gain from checking less
conditions pushed into the table, 1_probe_cost*N is the cost of the
probes and (1*s) * access_cost_factor * N must be the gain from
accessing less rows.
It does not matter how we calculate the cost of N full row fetches
cost_of_fetching_N_rows or
how we calculate the cost of fetching N key entries
cost_of_fetching_N_key_entries
the gain from less row fetches will be
(cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) * (1-s)
and this should be equal to (1*s) * access_cost_factor * N.
Thus access_cost_factor must be calculated as
(cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) / N.
For safety we clip cost_of_fetching_N_key_entries by the value
of cost_of_fetching_N_row though formally it's not necessary.
*/
double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
if (!(records < s->worst_seeks &&
records <= thd->variables.max_seeks_for_key))
trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping");
else if (access_cost_factor <= 0.0)
trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0");
else
/*
For eq_ref access we assume that the cost of fetching N key entries
is equal to the half of fetching N rows
*/
double key_access_cost=
type == JT_EQ_REF ? 0.5 * tmp : MY_MIN(tmp, keyread_tmp);
double access_cost_factor= MY_MIN((tmp - key_access_cost) / rows, 1.0);
filter=
table->best_range_rowid_filter_for_partial_join(start_key->key,
rows,
access_cost_factor);
if (filter)
{
filter=
table->best_range_rowid_filter_for_partial_join(start_key->key,
rows,
access_cost_factor);
if (filter)
{
tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows);
DBUG_ASSERT(tmp >= 0);
trace_access_idx.add("rowid_filter_key",
tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows);
DBUG_ASSERT(tmp >= 0);
trace_access_idx.add("rowid_filter_key",
s->table->key_info[filter->key_no].name);
}
}
}
trace_access_idx.add("rows", records).add("cost", tmp);
......@@ -8139,19 +8123,19 @@ best_access_path(JOIN *join,
uint key_no= s->quick->index;
/* See the comment concerning using rowid filter for with ref access */
keyread_tmp= s->table->quick_index_only_costs[key_no] * record_count;
double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0);
if (access_cost_factor > 0.0)
double row_access_cost= s->quick->read_time * record_count;
double key_access_cost=
MY_MIN(row_access_cost,
s->table->quick_index_only_costs[key_no] * record_count);
double access_cost_factor= MY_MIN((row_access_cost - key_access_cost) /
rows, 1.0);
filter=
s->table->best_range_rowid_filter_for_partial_join(key_no, rows,
access_cost_factor);
if (filter)
{
filter=
s->table->
best_range_rowid_filter_for_partial_join(key_no, rows,
access_cost_factor);
if (filter)
{
tmp-= filter->get_adjusted_gain(rows);
DBUG_ASSERT(tmp >= 0);
}
tmp-= filter->get_adjusted_gain(rows);
DBUG_ASSERT(tmp >= 0);
}
else
trace_access_scan.add("rowid_filter_skipped", "cost_factor <= 0");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment