Commit f7579518 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-19600: The optimizer should be able to produce rows=1 estimate for unique...

MDEV-19600: The optimizer should be able to produce rows=1 estimate for unique index with NULLable columns

Modify best_access_path() to produce rows=1 estimate for null-rejecting
lookups on unique NULL keys.
parent 7060b032
......@@ -404,8 +404,8 @@ b int(11) YES NULL
c int(11) YES NULL
explain select * from t1,t2 where t1.b = t2.c and t1.c = t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 10
1 SIMPLE t1 ALL b,c NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 ref b,c b 5 test.t2.c 1 Using where
select * from t1,t2 where t1.b = t2.c and t1.c = t2.b;
a a b c
1 1 1 1
......
......@@ -1599,3 +1599,33 @@ SELECT STRAIGHT_JOIN * FROM t1, t2 AS t2_1, t2 AS t2_2
WHERE t2_2.c = t2_1.c AND t2_2.b = t2_1.b AND ( a IS NULL OR t2_1.c = a );
a b c b c
DROP TABLE t1,t2;
#
# MDEV-19600: The optimizer should be able to produce rows=1 estimate for unique index with NULLable columns
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (
pk int not null primary key auto_increment,
a int,
b int,
unique key(a)
);
insert into t1 (a,b) select null, 12345 from t0 A, t0 B, t0 C;
insert into t1 (a,b) select a,a from t0;
# Simulate InnoDB's persistent statistics (It always uses nulls_equal)
set @tmp1= @@myisam_stats_method;
set myisam_stats_method=nulls_equal;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
set myisam_stats_method=@tmp1;
show keys from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
t1 0 PRIMARY 1 pk A 1010 NULL NULL BTREE
t1 0 a 1 a A 10 NULL NULL YES BTREE
# t1 must use ref(t1.a=t0.a) and rows must be 1 (and not 45):
explain select * from t0,t1 where t0.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
1 SIMPLE t1 ref a a 5 test.t0.a 1
drop table t0,t1;
......@@ -1254,3 +1254,33 @@ SELECT STRAIGHT_JOIN * FROM t1, t2 AS t2_1, t2 AS t2_2
WHERE t2_2.c = t2_1.c AND t2_2.b = t2_1.b AND ( a IS NULL OR t2_1.c = a );
DROP TABLE t1,t2;
--echo #
--echo # MDEV-19600: The optimizer should be able to produce rows=1 estimate for unique index with NULLable columns
--echo #
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (
pk int not null primary key auto_increment,
a int,
b int,
unique key(a)
);
# 10K of null values
insert into t1 (a,b) select null, 12345 from t0 A, t0 B, t0 C;
insert into t1 (a,b) select a,a from t0;
--echo # Simulate InnoDB's persistent statistics (It always uses nulls_equal)
set @tmp1= @@myisam_stats_method;
set myisam_stats_method=nulls_equal;
analyze table t1;
set myisam_stats_method=@tmp1;
show keys from t1;
--echo # t1 must use ref(t1.a=t0.a) and rows must be 1 (and not 45):
explain select * from t0,t1 where t0.a=t1.a;
drop table t0,t1;
......@@ -1515,7 +1515,7 @@ WHERE t2.b=14 AND t2.a=t1.a AND 5.1<t2.c AND t1.b='DE'
ORDER BY t2.c LIMIT 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a,b,c c 5 NULL 420 Using where
1 SIMPLE t1 ref a a 39 test.t2.a,const 10 Using where; Using index
1 SIMPLE t1 ref a a 39 test.t2.a,const 1 Using where; Using index
SELECT d FROM t3 AS t1, t2 AS t2
WHERE t2.b=14 AND t2.a=t1.a AND 5.1<t2.c AND t1.b='DE'
ORDER BY t2.c LIMIT 1;
......
......@@ -2555,33 +2555,89 @@ CREATE TABLE t1 ( a INT PRIMARY KEY, b INT, KEY(b) );
INSERT INTO t1 VALUES
(1,2),(2,1),(3,3),(4,2),(5,5),
(6,3),(7,1),(8,4),(9,3),(10,2);
CREATE TABLE t2 ( c INT, d INT, UNIQUE KEY(c) );
CREATE TABLE t2 ( c INT, d INT, KEY(c) );
INSERT INTO t2 VALUES
(1,2),(2,1),(3,3),(4,2),(5,5),(6,3),(7,1);
analyze table t1,t2;
Table Op Msg_type Msg_text
test.t1 analyze status OK
test.t2 analyze status OK
explain
SELECT a, b, d FROM t1, t2
WHERE ( b, d ) IN
( SELECT b, d FROM t1, t2 WHERE b = c );
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 7
1 PRIMARY t1 index b b 5 NULL 10 Using where; Using index; LooseScan
1 PRIMARY t2 ref c c 5 test.t1.b 1 Using where; FirstMatch(t1)
1 PRIMARY t1 ref b b 5 test.t1.b 2
SELECT a, b, d FROM t1, t2
WHERE ( b, d ) IN
( SELECT b, d FROM t1, t2 WHERE b = c );
a b d
2 1 2
7 1 2
2 1 2
7 1 2
1 2 1
4 2 1
10 2 1
8 4 2
1 2 1
4 2 1
10 2 1
3 3 3
6 3 3
9 3 3
2 1 2
7 1 2
8 4 2
5 5 5
3 3 3
6 3 3
9 3 3
8 4 2
8 4 2
5 5 5
1 2 1
4 2 1
10 2 1
DROP TABLE t1, t2;
# Another testcase for the above that still uses LooseScan:
create table t0(a int primary key);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t10(a int primary key);
insert into t10 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t1 (
pk int primary key auto_increment,
kp1 int,
kp2 int,
filler char(100),
key (kp1, kp2)
);
insert into t1 (kp1, kp2, filler)
select
A.a, B.a, 'filler-data'
from t0 A, t0 B;
create table t2 (a int, filler char(100), key(a));
create table t3 (a int);
insert into t3 values (1),(2);
insert into t2
select (A.a+1000*B.a)/20, 'filler_data' from t10 A, t0 B;
analyze table t1,t2,t3;
Table Op Msg_type Msg_text
test.t1 analyze status Table is already up to date
test.t2 analyze status Table is already up to date
test.t3 analyze status OK
delete from t1 where kp2 in (1,3);
# Ref + LooseScan on t1:
explain select sum(t2.a)
from t2,t3
where (t3.a,t2.a) in (select kp1,kp2 from t1,t0 where t0.a=2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t0 const PRIMARY PRIMARY 4 const 1 Using index
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where
1 PRIMARY t1 ref kp1 kp1 5 test.t3.a 10 Using where; Using index; LooseScan
1 PRIMARY t2 ref a a 5 test.t1.kp2 20 Using index
select sum(t2.a)
from t2,t3
where (t3.a,t2.a) in (select kp1,kp2 from t1,t0 where t0.a=2);
sum(t2.a)
1640
drop table t0,t10;
drop table t1,t2,t3;
#
# BUG#920713: Wrong result (missing rows) with firstmatch+BNL, IN subquery, ...
#
......
......@@ -2285,16 +2285,66 @@ INSERT INTO t1 VALUES
(1,2),(2,1),(3,3),(4,2),(5,5),
(6,3),(7,1),(8,4),(9,3),(10,2);
CREATE TABLE t2 ( c INT, d INT, UNIQUE KEY(c) );
CREATE TABLE t2 ( c INT, d INT, KEY(c) );
INSERT INTO t2 VALUES
(1,2),(2,1),(3,3),(4,2),(5,5),(6,3),(7,1);
analyze table t1,t2;
explain
SELECT a, b, d FROM t1, t2
WHERE ( b, d ) IN
( SELECT b, d FROM t1, t2 WHERE b = c );
SELECT a, b, d FROM t1, t2
WHERE ( b, d ) IN
( SELECT b, d FROM t1, t2 WHERE b = c );
DROP TABLE t1, t2;
--echo # Another testcase for the above that still uses LooseScan:
create table t0(a int primary key);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t10(a int primary key);
insert into t10 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t1 (
pk int primary key auto_increment,
kp1 int,
kp2 int,
filler char(100),
key (kp1, kp2)
);
# 10 groups, each has 10 elements.
insert into t1 (kp1, kp2, filler)
select
A.a, B.a, 'filler-data'
from t0 A, t0 B;
create table t2 (a int, filler char(100), key(a));
create table t3 (a int);
insert into t3 values (1),(2);
insert into t2
select (A.a+1000*B.a)/20, 'filler_data' from t10 A, t0 B;
analyze table t1,t2,t3;
delete from t1 where kp2 in (1,3);
--echo # Ref + LooseScan on t1:
explain select sum(t2.a)
from t2,t3
where (t3.a,t2.a) in (select kp1,kp2 from t1,t0 where t0.a=2);
select sum(t2.a)
from t2,t3
where (t3.a,t2.a) in (select kp1,kp2 from t1,t0 where t0.a=2);
drop table t0,t10;
drop table t1,t2,t3;
--echo #
--echo # BUG#920713: Wrong result (missing rows) with firstmatch+BNL, IN subquery, ...
--echo #
......
......@@ -2569,9 +2569,22 @@ CREATE TABLE t1 ( a INT PRIMARY KEY, b INT, KEY(b) );
INSERT INTO t1 VALUES
(1,2),(2,1),(3,3),(4,2),(5,5),
(6,3),(7,1),(8,4),(9,3),(10,2);
CREATE TABLE t2 ( c INT, d INT, UNIQUE KEY(c) );
CREATE TABLE t2 ( c INT, d INT, KEY(c) );
INSERT INTO t2 VALUES
(1,2),(2,1),(3,3),(4,2),(5,5),(6,3),(7,1);
analyze table t1,t2;
Table Op Msg_type Msg_text
test.t1 analyze status OK
test.t2 analyze status OK
explain
SELECT a, b, d FROM t1, t2
WHERE ( b, d ) IN
( SELECT b, d FROM t1, t2 WHERE b = c );
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 7
1 PRIMARY t1 index b b 5 NULL 10 Using where; Using index; LooseScan
1 PRIMARY t2 ref c c 5 test.t1.b 1 Using where; FirstMatch(t1)
1 PRIMARY t1 ref b b 5 test.t1.b 2 Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
SELECT a, b, d FROM t1, t2
WHERE ( b, d ) IN
( SELECT b, d FROM t1, t2 WHERE b = c );
......@@ -2596,6 +2609,49 @@ a b d
10 2 1
10 2 1
DROP TABLE t1, t2;
# Another testcase for the above that still uses LooseScan:
create table t0(a int primary key);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t10(a int primary key);
insert into t10 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t1 (
pk int primary key auto_increment,
kp1 int,
kp2 int,
filler char(100),
key (kp1, kp2)
);
insert into t1 (kp1, kp2, filler)
select
A.a, B.a, 'filler-data'
from t0 A, t0 B;
create table t2 (a int, filler char(100), key(a));
create table t3 (a int);
insert into t3 values (1),(2);
insert into t2
select (A.a+1000*B.a)/20, 'filler_data' from t10 A, t0 B;
analyze table t1,t2,t3;
Table Op Msg_type Msg_text
test.t1 analyze status Table is already up to date
test.t2 analyze status Table is already up to date
test.t3 analyze status OK
delete from t1 where kp2 in (1,3);
# Ref + LooseScan on t1:
explain select sum(t2.a)
from t2,t3
where (t3.a,t2.a) in (select kp1,kp2 from t1,t0 where t0.a=2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t0 const PRIMARY PRIMARY 4 const 1 Using index
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where
1 PRIMARY t1 ref kp1 kp1 5 test.t3.a 10 Using where; Using index; LooseScan
1 PRIMARY t2 ref a a 5 test.t1.kp2 20 Using index
select sum(t2.a)
from t2,t3
where (t3.a,t2.a) in (select kp1,kp2 from t1,t0 where t0.a=2);
sum(t2.a)
1640
drop table t0,t10;
drop table t1,t2,t3;
#
# BUG#920713: Wrong result (missing rows) with firstmatch+BNL, IN subquery, ...
#
......
......@@ -67,9 +67,9 @@ insert into t4 select A.a + 10*B.a, A.a + 10*B.a, 'filler' from t0 A, t0 B;
explain select * from t0, t4 where
t4.b=t0.a and t4.a in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t0 ALL NULL NULL NULL NULL 10
1 PRIMARY t4 ALL a NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t4.a 1
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
1 PRIMARY t0 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
1 PRIMARY t4 ref a a 10 <subquery2>.max(t2.a),test.t0.a 1
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
insert into t4 select 100 + (B.a *100 + A.a), 100 + (B.a*100 + A.a), 'filler' from t4 A, t0 B;
......@@ -77,9 +77,9 @@ explain select * from t4 where
t4.a in (select max(t2.a) from t1, t2 group by t2.b) and
t4.b in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
1 PRIMARY t4 ref a a 5 <subquery2>.max(t2.a) 12 Using index condition
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 test.t4.b 1
1 PRIMARY <subquery3> ALL distinct_key NULL NULL NULL 5
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5 Using join buffer (flat, BNL join)
1 PRIMARY t4 ref a a 10 <subquery2>.max(t2.a),<subquery3>.max(t2.a) 1
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
3 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
......
......@@ -279,7 +279,7 @@ insert into t2 values
explain select t1.* from t1 left join t2 on t2.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 10 NULL 2 Using index
1 SIMPLE t2 ref a a 3 test.t1.a 2 Using where
1 SIMPLE t2 ref a a 3 test.t1.a 1 Using where
drop table t1, t2;
#
# check UPDATE/DELETE that look like they could be eliminated
......
......@@ -1102,7 +1102,7 @@ id select_type table type possible_keys key key_len ref rows Extra
EXPLAIN SELECT * FROM t1 AS t2 STRAIGHT_JOIN t1 FORCE INDEX(b) WHERE t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL b NULL NULL NULL 2 Using where
1 SIMPLE t1 ref b b 5 test.t2.b 2
1 SIMPLE t1 ref b b 5 test.t2.b 1
EXPLAIN SELECT b FROM t1 FORCE INDEX(b);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL b 5 NULL 2 Using index
......
......@@ -5507,18 +5507,16 @@ add_key_field(JOIN *join,
(*key_fields)->level= and_level;
(*key_fields)->optimize= optimize;
/*
If the condition has form "tbl.keypart = othertbl.field" and
othertbl.field can be NULL, there will be no matches if othertbl.field
has NULL value.
We use null_rejecting in add_not_null_conds() to add
'othertbl.field IS NOT NULL' to tab->select_cond.
If the condition we are analyzing is NULL-rejecting and at least
one side of the equalities is NULLable, mark the KEY_FIELD object as
null-rejecting. This property is used by:
- add_not_null_conds() to add "column IS NOT NULL" conditions
- best_access_path() to produce better estimates for NULL-able unique keys.
*/
{
Item *real= (*value)->real_item();
if (((cond->functype() == Item_func::EQ_FUNC) ||
(cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
(real->type() == Item::FIELD_ITEM) &&
((Item_field*)real)->field->maybe_null())
if ((cond->functype() == Item_func::EQ_FUNC ||
cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
((*value)->maybe_null || field->real_maybe_null()))
(*key_fields)->null_rejecting= true;
else
(*key_fields)->null_rejecting= false;
......@@ -6834,6 +6832,7 @@ best_access_path(JOIN *join,
ulong key_flags;
uint key_parts;
key_part_map found_part= 0;
key_part_map notnull_part=0; // key parts which won't have NULL in lookup tuple.
table_map found_ref= 0;
uint key= keyuse->key;
bool ft_key= (keyuse->keypart == FT_KEYPART);
......@@ -6892,6 +6891,9 @@ best_access_path(JOIN *join,
if (!(keyuse->used_tables & ~join->const_table_map))
const_part|= keyuse->keypart_map;
if (!keyuse->val->maybe_null || keyuse->null_rejecting)
notnull_part|=keyuse->keypart_map;
double tmp2= prev_record_reads(join->positions, idx,
(found_ref | keyuse->used_tables));
if (tmp2 < best_prev_record_reads)
......@@ -6942,12 +6944,19 @@ best_access_path(JOIN *join,
loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
/* Check if we found full key */
if (found_part == PREV_BITS(uint, key_parts) &&
!ref_or_null_part)
const key_part_map all_key_parts= PREV_BITS(uint, key_parts);
if (found_part == all_key_parts && !ref_or_null_part)
{ /* use eq key */
max_key_part= (uint) ~0;
if ((key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
MY_TEST(key_flags & HA_EXT_NOSAME))
/*
If the index is a unique index (1), and
- all its columns are not null (2), or
- equalities we are using reject NULLs (3)
then the estimate is rows=1.
*/
if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)) && // (1)
(!(key_flags & HA_NULL_PART_KEY) || // (2)
all_key_parts == notnull_part)) // (3)
{
tmp = prev_record_reads(join->positions, idx, found_ref);
records=1.0;
......@@ -9991,8 +10000,16 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit);
j->ref.items[i]=keyuse->val; // Save for cond removal
j->ref.cond_guards[i]= keyuse->cond_guard;
if (keyuse->null_rejecting)
/*
Set ref.null_rejecting to true only if we are going to inject a
"keyuse->val IS NOT NULL" predicate.
*/
Item *real= (keyuse->val)->real_item();
if (keyuse->null_rejecting && (real->type() == Item::FIELD_ITEM) &&
((Item_field*)real)->field->maybe_null())
j->ref.null_rejecting|= (key_part_map)1 << i;
keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
/*
We don't want to compute heavy expressions in EXPLAIN, an example would
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment