Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
0d67aafa
Commit
0d67aafa
authored
Mar 27, 2014
by
Sergey Petrunya
Browse files
Options
Browse Files
Download
Plain Diff
Merge
parents
1a4c8ae1
ab061a2b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
301 additions
and
19 deletions
+301
-19
mysql-test/r/selectivity.result
mysql-test/r/selectivity.result
+74
-4
mysql-test/r/selectivity_innodb.result
mysql-test/r/selectivity_innodb.result
+74
-4
mysql-test/t/selectivity.test
mysql-test/t/selectivity.test
+40
-1
sql/sql_statistics.h
sql/sql_statistics.h
+113
-10
No files found.
mysql-test/r/selectivity.result
View file @
0d67aafa
drop table if exists t1,t2,t3;
drop table if exists t
0,t
1,t2,t3;
select @@global.use_stat_tables;
@@global.use_stat_tables
COMPLEMENTARY
...
...
@@ -826,7 +826,7 @@ flush table t1;
set optimizer_use_condition_selectivity=4;
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1025
49.61
Using where
1 SIMPLE t1 ALL NULL NULL NULL NULL 1025
0.39
Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
drop table t1;
...
...
@@ -1308,15 +1308,85 @@ test.t2 analyze status OK
# The following two must have the same in 'Extra' column:
explain extended select * from t2 where col1 IN (20, 180);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
7
Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
5
Using where
Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
explain extended select * from t2 where col1 IN (180, 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
7
Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
5
Using where
Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
drop table t1, t2;
#
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a from t0 A, t0 B, t0 C;
set histogram_size=20;
set histogram_type='single_prec_hb';
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
# Should select about 10%:
explain extended select * from t1 where a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
# Should select about 10%:
explain extended select * from t1 where a=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
# Must not have filtered=100%:
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
# Again, must not have filtered=100%:
explain extended select * from t1 where a=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
drop table t0, t1;
#
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
#
create table t1 (col1 int);
set @a=-1;
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
select min(col1), max(col1), count(*) from t1;
min(col1) max(col1) count(*)
0 99 10000
set histogram_size=100;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
explain extended select * from t1 where col1 in (1,2,3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
# Must not cause fp division by zero, or produce nonsense numbers:
explain extended select * from t1 where col1 in (-1,-2,-3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
explain extended select * from t1 where col1<=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
drop table t1, t2;
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
...
...
mysql-test/r/selectivity_innodb.result
View file @
0d67aafa
SET SESSION STORAGE_ENGINE='InnoDB';
set @save_optimizer_switch_for_selectivity_test=@@optimizer_switch;
set optimizer_switch='extended_keys=on';
drop table if exists t1,t2,t3;
drop table if exists t
0,t
1,t2,t3;
select @@global.use_stat_tables;
@@global.use_stat_tables
COMPLEMENTARY
...
...
@@ -835,7 +835,7 @@ flush table t1;
set optimizer_use_condition_selectivity=4;
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1025
49.61
Using where
1 SIMPLE t1 ALL NULL NULL NULL NULL 1025
0.39
Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
drop table t1;
...
...
@@ -1318,15 +1318,85 @@ test.t2 analyze status OK
# The following two must have the same in 'Extra' column:
explain extended select * from t2 where col1 IN (20, 180);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
7
Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
5
Using where
Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
explain extended select * from t2 where col1 IN (180, 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
7
Using where
1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.3
5
Using where
Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
drop table t1, t2;
#
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a from t0 A, t0 B, t0 C;
set histogram_size=20;
set histogram_type='single_prec_hb';
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
set use_stat_tables='preferably';
set optimizer_use_condition_selectivity=4;
# Should select about 10%:
explain extended select * from t1 where a=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
# Should select about 10%:
explain extended select * from t1 where a=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
# Must not have filtered=100%:
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
# Again, must not have filtered=100%:
explain extended select * from t1 where a=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
drop table t0, t1;
#
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
#
create table t1 (col1 int);
set @a=-1;
create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
select min(col1), max(col1), count(*) from t1;
min(col1) max(col1) count(*)
0 99 10000
set histogram_size=100;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status OK
explain extended select * from t1 where col1 in (1,2,3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
# Must not cause fp division by zero, or produce nonsense numbers:
explain extended select * from t1 where col1 in (-1,-2,-3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
explain extended select * from t1 where col1<=-1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
drop table t1, t2;
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
...
...
mysql-test/t/selectivity.test
View file @
0d67aafa
--
source
include
/
have_stat_tables
.
inc
--
disable_warnings
drop
table
if
exists
t1
,
t2
,
t3
;
drop
table
if
exists
t
0
,
t
1
,
t2
,
t3
;
--
enable_warnings
select
@@
global
.
use_stat_tables
;
...
...
@@ -885,6 +885,45 @@ explain extended select * from t2 where col1 IN (180, 20);
drop
table
t1
,
t2
;
--
echo
#
--
echo
# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
--
echo
#
create
table
t0
(
a
int
);
insert
into
t0
values
(
0
),(
1
),(
2
),(
3
),(
4
),(
5
),(
6
),(
7
),(
8
),(
9
);
create
table
t1
(
a
int
);
insert
into
t1
select
A
.
a
from
t0
A
,
t0
B
,
t0
C
;
set
histogram_size
=
20
;
set
histogram_type
=
'single_prec_hb'
;
analyze
table
t1
persistent
for
all
;
set
use_stat_tables
=
'preferably'
;
set
optimizer_use_condition_selectivity
=
4
;
--
echo
# Should select about 10%:
explain
extended
select
*
from
t1
where
a
=
2
;
--
echo
# Should select about 10%:
explain
extended
select
*
from
t1
where
a
=
1
;
--
echo
# Must not have filtered=100%:
explain
extended
select
*
from
t1
where
a
=
0
;
--
echo
# Again, must not have filtered=100%:
explain
extended
select
*
from
t1
where
a
=-
1
;
drop
table
t0
,
t1
;
--
echo
#
--
echo
# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
--
echo
#
create
table
t1
(
col1
int
);
set
@
a
=-
1
;
create
table
t2
(
a
int
)
select
(
@
a
:=@
a
+
1
)
as
a
from
information_schema
.
session_variables
A
limit
100
;
insert
into
t1
select
A
.
a
from
t2
A
,
t2
B
where
A
.
a
<
100
and
B
.
a
<
100
;
select
min
(
col1
),
max
(
col1
),
count
(
*
)
from
t1
;
set
histogram_size
=
100
;
analyze
table
t1
persistent
for
all
;
explain
extended
select
*
from
t1
where
col1
in
(
1
,
2
,
3
);
--
echo
# Must not cause fp division by zero, or produce nonsense numbers:
explain
extended
select
*
from
t1
where
col1
in
(
-
1
,
-
2
,
-
3
);
explain
extended
select
*
from
t1
where
col1
<=-
1
;
drop
table
t1
,
t2
;
set
histogram_type
=@
save_histogram_type
;
set
histogram_size
=@
save_histogram_size
;
set
optimizer_use_condition_selectivity
=@
save_optimizer_use_condition_selectivity
;
...
...
sql/sql_statistics.h
View file @
0d67aafa
...
...
@@ -113,7 +113,7 @@ class Histogram
private:
Histogram_type
type
;
uint8
size
;
uint8
size
;
/* Size of values array, in bytes */
uchar
*
values
;
uint
prec_factor
()
...
...
@@ -142,6 +142,7 @@ public:
private:
uint
get_value
(
uint
i
)
{
DBUG_ASSERT
(
i
<
get_width
());
switch
(
type
)
{
case
SINGLE_PREC_HB
:
return
(
uint
)
(((
uint8
*
)
values
)[
i
]);
...
...
@@ -151,6 +152,7 @@ private:
return
0
;
}
/* Find the bucket which value 'pos' falls into. */
uint
find_bucket
(
double
pos
,
bool
first
)
{
uint
val
=
(
uint
)
(
pos
*
prec_factor
());
...
...
@@ -169,6 +171,10 @@ private:
else
break
;
}
if
(
val
>
get_value
(
i
)
&&
i
<
(
get_width
()
-
1
))
i
++
;
if
(
val
==
get_value
(
i
))
{
if
(
first
)
...
...
@@ -235,23 +241,120 @@ public:
return
sel
;
}
/*
Estimate selectivity of "col=const" using a histogram
@param pos Position of the "const" between column's min_value and
max_value. This is a number in [0..1] range.
@param avg_sel Average selectivity of condition "col=const" in this table.
It is calcuated as (#non_null_values / #distinct_values).
@return
Expected condition selectivity (a number between 0 and 1)
@notes
[re_zero_length_buckets] If a bucket with zero value-length is in the
middle of the histogram, we will not have min==max. Example: suppose,
pos_value=0x12, and the histogram is:
#n #n+1 #n+2
... 0x10 0x12 0x12 0x14 ...
|
+------------- bucket with zero value-length
Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
The problem happens at the histogram ends. if pos_value=0, and the
histogram is:
0x00 0x10 ...
then min=0, max=0. This means pos_value is contained within bucket #0,
but on the other hand, histogram data says that the bucket has only one
value.
*/
double
point_selectivity
(
double
pos
,
double
avg_sel
)
{
double
sel
;
double
bucket_sel
=
1.0
/
(
get_width
()
+
1
);
/* Find the bucket that contains the value 'pos'. */
uint
min
=
find_bucket
(
pos
,
TRUE
);
uint
pos_value
=
(
uint
)
(
pos
*
prec_factor
());
/* Find how many buckets this value occupies */
uint
max
=
min
;
while
(
max
+
1
<
get_width
()
&&
get_value
(
max
+
1
)
==
get_value
(
max
)
)
while
(
max
+
1
<
get_width
()
&&
get_value
(
max
+
1
)
==
pos_value
)
max
++
;
/*
A special case: we're looking at a single bucket, and that bucket has
zero value-length. Use the multi-bucket formula (attempt to use
single-bucket formula will cause divison by zero).
For more details see [re_zero_length_buckets] above.
*/
if
(
max
==
min
&&
get_value
(
max
)
==
((
max
==
0
)
?
0
:
get_value
(
max
-
1
)))
max
++
;
if
(
max
>
min
)
{
/*
The value occupies multiple buckets. Use start_bucket ... end_bucket as
selectivity.
*/
double
bucket_sel
=
1.0
/
(
get_width
()
+
1
);
sel
=
bucket_sel
*
(
max
-
min
+
1
);
}
else
{
/*
The value 'pos' fits within one single histogram bucket.
Histogram buckets have the same numbers of rows, but they cover
different ranges of values.
We assume that values are uniformly distributed across the [0..1] value
range.
*/
/*
If all buckets covered value ranges of the same size, the width of
value range would be:
*/
double
avg_bucket_width
=
1.0
/
(
get_width
()
+
1
);
/*
Let's see what is the width of value range that our bucket is covering.
(min==max currently. they are kept in the formula just in case we
will want to extend it to handle multi-bucket case)
*/
double
inv_prec_factor
=
(
double
)
1.0
/
prec_factor
();
double
width
=
(
max
+
1
==
get_width
()
?
1.0
:
get_value
(
max
)
*
inv_prec_factor
)
-
(
min
==
0
?
0.0
:
get_value
(
min
-
1
)
*
inv_prec_factor
);
sel
=
avg_sel
*
(
bucket_sel
*
(
max
+
1
-
min
))
/
width
;
double
current_bucket_width
=
(
max
+
1
==
get_width
()
?
1.0
:
(
get_value
(
max
)
*
inv_prec_factor
))
-
(
min
==
0
?
0.0
:
(
get_value
(
min
-
1
)
*
inv_prec_factor
));
DBUG_ASSERT
(
current_bucket_width
);
/* We shouldn't get a one zero-width bucket */
/*
So:
- each bucket has the same #rows
- values are unformly distributed across the [min_value,max_value] domain.
If a bucket has value range that's N times bigger then average, than
each value will have to have N times fewer rows than average.
*/
sel
=
avg_sel
*
avg_bucket_width
/
current_bucket_width
;
/*
(Q: if we just follow this proportion we may end up in a situation
where number of different values we expect to find in this bucket
exceeds the number of rows that this histogram has in a bucket. Are
we ok with this or we would want to have certain caps?)
*/
}
return
sel
;
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment