Commit 0d67aafa authored by Sergey Petrunya's avatar Sergey Petrunya

Merge

parents 1a4c8ae1 ab061a2b
drop table if exists t1,t2,t3; drop table if exists t0,t1,t2,t3;
select @@global.use_stat_tables; select @@global.use_stat_tables;
@@global.use_stat_tables @@global.use_stat_tables
COMPLEMENTARY COMPLEMENTARY
...@@ -826,7 +826,7 @@ flush table t1; ...@@ -826,7 +826,7 @@ flush table t1;
set optimizer_use_condition_selectivity=4; set optimizer_use_condition_selectivity=4;
explain extended select * from t1 where a=0; explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 49.61 Using where 1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 0.39 Using where
Warnings: Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0) Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
drop table t1; drop table t1;
...@@ -1308,15 +1308,85 @@ test.t2 analyze status OK ...@@ -1308,15 +1308,85 @@ test.t2 analyze status OK
# The following two must have the same in 'Extra' column: # The following two must have the same in 'Extra' column:
explain extended select * from t2 where col1 IN (20, 180); explain extended select * from t2 where col1 IN (20, 180);
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.37 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
Warnings: Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180)) Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
explain extended select * from t2 where col1 IN (180, 20); explain extended select * from t2 where col1 IN (180, 20);
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.37 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
Warnings: Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20)) Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
drop table t1, t2; drop table t1, t2;
#
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a from t0 A, t0 B, t0 C;
set histogram_size=20;
set histogram_type='single_prec_hb';
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
# Should select about 10%:
explain extended select * from t1 where a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
# Should select about 10%:
explain extended select * from t1 where a=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
# Must not have filtered=100%:
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
# Again, must not have filtered=100%:
explain extended select * from t1 where a=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
drop table t0, t1;
#
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
#
create table t1 (col1 int);
set @a=-1;
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
select min(col1), max(col1), count(*) from t1;
min(col1) max(col1) count(*)
0 99 10000
set histogram_size=100;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
explain extended select * from t1 where col1 in (1,2,3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
# Must not cause fp division by zero, or produce nonsense numbers:
explain extended select * from t1 where col1 in (-1,-2,-3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
explain extended select * from t1 where col1<=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
drop table t1, t2;
set histogram_type=@save_histogram_type; set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size; set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
......
SET SESSION STORAGE_ENGINE='InnoDB'; SET SESSION STORAGE_ENGINE='InnoDB';
set @save_optimizer_switch_for_selectivity_test=@@optimizer_switch; set @save_optimizer_switch_for_selectivity_test=@@optimizer_switch;
set optimizer_switch='extended_keys=on'; set optimizer_switch='extended_keys=on';
drop table if exists t1,t2,t3; drop table if exists t0,t1,t2,t3;
select @@global.use_stat_tables; select @@global.use_stat_tables;
@@global.use_stat_tables @@global.use_stat_tables
COMPLEMENTARY COMPLEMENTARY
...@@ -835,7 +835,7 @@ flush table t1; ...@@ -835,7 +835,7 @@ flush table t1;
set optimizer_use_condition_selectivity=4; set optimizer_use_condition_selectivity=4;
explain extended select * from t1 where a=0; explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 49.61 Using where 1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 0.39 Using where
Warnings: Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0) Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
drop table t1; drop table t1;
...@@ -1318,15 +1318,85 @@ test.t2 analyze status OK ...@@ -1318,15 +1318,85 @@ test.t2 analyze status OK
# The following two must have the same in 'Extra' column: # The following two must have the same in 'Extra' column:
explain extended select * from t2 where col1 IN (20, 180); explain extended select * from t2 where col1 IN (20, 180);
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.37 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
Warnings: Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180)) Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
explain extended select * from t2 where col1 IN (180, 20); explain extended select * from t2 where col1 IN (180, 20);
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.37 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
Warnings: Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20)) Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
drop table t1, t2; drop table t1, t2;
#
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a from t0 A, t0 B, t0 C;
set histogram_size=20;
set histogram_type='single_prec_hb';
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
# Should select about 10%:
explain extended select * from t1 where a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
# Should select about 10%:
explain extended select * from t1 where a=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
# Must not have filtered=100%:
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
# Again, must not have filtered=100%:
explain extended select * from t1 where a=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
drop table t0, t1;
#
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
#
create table t1 (col1 int);
set @a=-1;
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
select min(col1), max(col1), count(*) from t1;
min(col1) max(col1) count(*)
0 99 10000
set histogram_size=100;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
explain extended select * from t1 where col1 in (1,2,3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
# Must not cause fp division by zero, or produce nonsense numbers:
explain extended select * from t1 where col1 in (-1,-2,-3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
explain extended select * from t1 where col1<=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
drop table t1, t2;
set histogram_type=@save_histogram_type; set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size; set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
......
--source include/have_stat_tables.inc --source include/have_stat_tables.inc
--disable_warnings --disable_warnings
drop table if exists t1,t2,t3; drop table if exists t0,t1,t2,t3;
--enable_warnings --enable_warnings
select @@global.use_stat_tables; select @@global.use_stat_tables;
...@@ -885,6 +885,45 @@ explain extended select * from t2 where col1 IN (180, 20); ...@@ -885,6 +885,45 @@ explain extended select * from t2 where col1 IN (180, 20);
drop table t1, t2; drop table t1, t2;
--echo #
--echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
--echo #
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a from t0 A, t0 B, t0 C;
set histogram_size=20;
set histogram_type='single_prec_hb';
analyze table t1 persistent for all;
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
--echo # Should select about 10%:
explain extended select * from t1 where a=2;
--echo # Should select about 10%:
explain extended select * from t1 where a=1;
--echo # Must not have filtered=100%:
explain extended select * from t1 where a=0;
--echo # Again, must not have filtered=100%:
explain extended select * from t1 where a=-1;
drop table t0, t1;
--echo #
--echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
--echo #
create table t1 (col1 int);
set @a=-1;
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
select min(col1), max(col1), count(*) from t1;
set histogram_size=100;
analyze table t1 persistent for all;
explain extended select * from t1 where col1 in (1,2,3);
--echo # Must not cause fp division by zero, or produce nonsense numbers:
explain extended select * from t1 where col1 in (-1,-2,-3);
explain extended select * from t1 where col1<=-1;
drop table t1, t2;
set histogram_type=@save_histogram_type; set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size; set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
......
...@@ -113,7 +113,7 @@ class Histogram ...@@ -113,7 +113,7 @@ class Histogram
private: private:
Histogram_type type; Histogram_type type;
uint8 size; uint8 size; /* Size of values array, in bytes */
uchar *values; uchar *values;
uint prec_factor() uint prec_factor()
...@@ -142,6 +142,7 @@ public: ...@@ -142,6 +142,7 @@ public:
private: private:
uint get_value(uint i) uint get_value(uint i)
{ {
DBUG_ASSERT(i < get_width());
switch (type) { switch (type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
return (uint) (((uint8 *) values)[i]); return (uint) (((uint8 *) values)[i]);
...@@ -151,6 +152,7 @@ private: ...@@ -151,6 +152,7 @@ private:
return 0; return 0;
} }
/* Find the bucket which value 'pos' falls into. */
uint find_bucket(double pos, bool first) uint find_bucket(double pos, bool first)
{ {
uint val= (uint) (pos * prec_factor()); uint val= (uint) (pos * prec_factor());
...@@ -169,6 +171,10 @@ private: ...@@ -169,6 +171,10 @@ private:
else else
break; break;
} }
if (val > get_value(i) && i < (get_width() - 1))
i++;
if (val == get_value(i)) if (val == get_value(i))
{ {
if (first) if (first)
...@@ -235,23 +241,120 @@ public: ...@@ -235,23 +241,120 @@ public:
return sel; return sel;
} }
/*
Estimate selectivity of "col=const" using a histogram
@param pos Position of the "const" between column's min_value and
max_value. This is a number in [0..1] range.
@param avg_sel Average selectivity of condition "col=const" in this table.
It is calcuated as (#non_null_values / #distinct_values).
@return
Expected condition selectivity (a number between 0 and 1)
@notes
[re_zero_length_buckets] If a bucket with zero value-length is in the
middle of the histogram, we will not have min==max. Example: suppose,
pos_value=0x12, and the histogram is:
#n #n+1 #n+2
... 0x10 0x12 0x12 0x14 ...
|
+------------- bucket with zero value-length
Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
The problem happens at the histogram ends. if pos_value=0, and the
histogram is:
0x00 0x10 ...
then min=0, max=0. This means pos_value is contained within bucket #0,
but on the other hand, histogram data says that the bucket has only one
value.
*/
double point_selectivity(double pos, double avg_sel) double point_selectivity(double pos, double avg_sel)
{ {
double sel; double sel;
double bucket_sel= 1.0/(get_width() + 1); /* Find the bucket that contains the value 'pos'. */
uint min= find_bucket(pos, TRUE); uint min= find_bucket(pos, TRUE);
uint pos_value= (uint) (pos * prec_factor());
/* Find how many buckets this value occupies */
uint max= min; uint max= min;
while (max + 1 < get_width() && get_value(max + 1) == get_value(max)) while (max + 1 < get_width() && get_value(max + 1) == pos_value)
max++; max++;
/*
A special case: we're looking at a single bucket, and that bucket has
zero value-length. Use the multi-bucket formula (attempt to use
single-bucket formula will cause divison by zero).
For more details see [re_zero_length_buckets] above.
*/
if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
max++;
if (max > min)
{
/*
The value occupies multiple buckets. Use start_bucket ... end_bucket as
selectivity.
*/
double bucket_sel= 1.0/(get_width() + 1);
sel= bucket_sel * (max - min + 1);
}
else
{
/*
The value 'pos' fits within one single histogram bucket.
Histogram buckets have the same numbers of rows, but they cover
different ranges of values.
We assume that values are uniformly distributed across the [0..1] value
range.
*/
/*
If all buckets covered value ranges of the same size, the width of
value range would be:
*/
double avg_bucket_width= 1.0 / (get_width() + 1);
/*
Let's see what is the width of value range that our bucket is covering.
(min==max currently. they are kept in the formula just in case we
will want to extend it to handle multi-bucket case)
*/
double inv_prec_factor= (double) 1.0 / prec_factor(); double inv_prec_factor= (double) 1.0 / prec_factor();
double width= (max + 1 == get_width() ? double current_bucket_width=
1.0 : get_value(max) * inv_prec_factor) - (max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
(min == 0 ? (min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
0.0 : get_value(min-1) * inv_prec_factor);
sel= avg_sel * (bucket_sel * (max + 1 - min)) / width; DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
/*
So:
- each bucket has the same #rows
- values are unformly distributed across the [min_value,max_value] domain.
If a bucket has value range that's N times bigger then average, than
each value will have to have N times fewer rows than average.
*/
sel= avg_sel * avg_bucket_width / current_bucket_width;
/*
(Q: if we just follow this proportion we may end up in a situation
where number of different values we expect to find in this bucket
exceeds the number of rows that this histogram has in a bucket. Are
we ok with this or we would want to have certain caps?)
*/
}
return sel; return sel;
} }
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment