Commit 4886d148 authored by Varun Gupta's avatar Varun Gupta

MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY

The problem here is EITS statistics does not calculate statistics for the partitions of the table.
So a temporary solution would be to not read EITS statistics for partitioned tables.

Also disabling reading of EITS for columns that participate in the partition list of a table.
parent 12b1ba19
......@@ -2645,3 +2645,103 @@ Warnings:
Note 1517 Duplicate partition name p2
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
#
# MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
#
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t2 (
part_key int,
a int,
b int
) partition by list(part_key) (
partition p0 values in (0),
partition p1 values in (1),
partition p2 values in (2),
partition p3 values in (3),
partition p4 values in (4)
);
insert into t2
select mod(a,5), a/100, mod(a,5) from t1;
set @save_use_stat_tables= @@use_stat_tables;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
#
# Tests using stats provided by the storage engine
#
explain extended select * from t2 where part_key=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
explain partitions select * from t2 where part_key=1;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
explain extended select * from t2 where part_key in (1,2);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
explain partitions select * from t2 where part_key in (1,2);
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
explain extended select * from t2 where b=5;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 100.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
explain partitions select * from t2 where b=5;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
explain extended select * from t2 partition(p0) where b=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
set @save_histogram_size=@@histogram_size;
set @@histogram_size=100;
set @@use_stat_tables= PREFERABLY;
set @@optimizer_use_condition_selectivity=4;
analyze table t2;
Table Op Msg_type Msg_text
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
#
# Tests using EITS
#
# filtered should be 100
explain extended select * from t2 where part_key=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
explain partitions select * from t2 where part_key=1;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
# filtered should be 100
explain extended select * from t2 where part_key in (1,2);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
explain partitions select * from t2 where part_key in (1,2);
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
explain extended select * from t2 where b=5;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 19.80 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
explain partitions select * from t2 where b=5;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
explain extended select * from t2 partition(p0) where b=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 200 19.80 Using where
Warnings:
Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
set @@use_stat_tables= @save_use_stat_tables;
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set @@histogram_size= @save_histogram_size;
drop table t0,t1,t2;
......@@ -2897,3 +2897,64 @@ EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
--echo #
--echo # MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
--echo #
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
create table t2 (
part_key int,
a int,
b int
) partition by list(part_key) (
partition p0 values in (0),
partition p1 values in (1),
partition p2 values in (2),
partition p3 values in (3),
partition p4 values in (4)
);
insert into t2
select mod(a,5), a/100, mod(a,5) from t1;
set @save_use_stat_tables= @@use_stat_tables;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
--echo #
--echo # Tests using stats provided by the storage engine
--echo #
explain extended select * from t2 where part_key=1;
explain partitions select * from t2 where part_key=1;
explain extended select * from t2 where part_key in (1,2);
explain partitions select * from t2 where part_key in (1,2);
explain extended select * from t2 where b=5;
explain partitions select * from t2 where b=5;
explain extended select * from t2 partition(p0) where b=1;
set @save_histogram_size=@@histogram_size;
set @@histogram_size=100;
set @@use_stat_tables= PREFERABLY;
set @@optimizer_use_condition_selectivity=4;
analyze table t2;
--echo #
--echo # Tests using EITS
--echo #
--echo # filtered should be 100
explain extended select * from t2 where part_key=1;
explain partitions select * from t2 where part_key=1;
--echo # filtered should be 100
explain extended select * from t2 where part_key in (1,2);
explain partitions select * from t2 where part_key in (1,2);
explain extended select * from t2 where b=5;
explain partitions select * from t2 where b=5;
explain extended select * from t2 partition(p0) where b=1;
set @@use_stat_tables= @save_use_stat_tables;
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set @@histogram_size= @save_histogram_size;
drop table t0,t1,t2;
......@@ -3322,14 +3322,17 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field **field_ptr;
TABLE *table= param->table;
partition_info *part_info= NULL;
#ifdef WITH_PARTITION_STORAGE_ENGINE
part_info= table->part_info;
#endif
uint parts= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
Column_statistics* col_stats= (*field_ptr)->read_stats;
if (bitmap_is_set(used_fields, (*field_ptr)->field_index)
&& col_stats && !col_stats->no_stat_values_provided()
&& !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY))
Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index) &&
is_eits_usable(field))
parts++;
}
......@@ -3346,13 +3349,11 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
param->key_parts= key_part;
uint max_key_len= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
{
Field *field= *field_ptr;
Column_statistics* col_stats= field->read_stats;
if (field->type() == MYSQL_TYPE_GEOMETRY ||
!col_stats || col_stats->no_stat_values_provided())
if (bitmap_is_set(used_fields, field->field_index))
{
if (!is_eits_usable(field))
continue;
uint16 store_length;
......
......@@ -3164,6 +3164,23 @@ void partition_info::print_debug(const char *str, uint *value)
DBUG_PRINT("info", ("parser: %s", str));
DBUG_VOID_RETURN;
}
bool partition_info::field_in_partition_expr(Field *field) const
{
uint i;
for (i= 0; i < num_part_fields; i++)
{
if (field->eq(part_field_array[i]))
return TRUE;
}
for (i= 0; i < num_subpart_fields; i++)
{
if (field->eq(subpart_field_array[i]))
return TRUE;
}
return FALSE;
}
#else /* WITH_PARTITION_STORAGE_ENGINE */
/*
For builds without partitioning we need to define these functions
......
......@@ -384,6 +384,7 @@ class partition_info : public Sql_alloc
bool is_full_part_expr_in_fields(List<Item> &fields);
public:
bool has_unique_name(partition_element *element);
bool field_in_partition_expr(Field *field) const;
};
uint32 get_next_partition_id_range(struct st_partition_iter* part_iter);
......
......@@ -30,6 +30,7 @@
#include "opt_range.h"
#include "my_atomic.h"
#include "sql_show.h"
#include "sql_partition.h"
/*
The system variable 'use_stat_tables' can take one of the
......@@ -3589,6 +3590,22 @@ void set_statistics_for_table(THD *thd, TABLE *table)
(use_stat_table_mode <= COMPLEMENTARY ||
!table->stats_is_read || read_stats->cardinality_is_null) ?
table->file->stats.records : read_stats->cardinality;
/*
For partitioned table, EITS statistics is based on data from all partitions.
On the other hand, Partition Pruning figures which partitions will be
accessed and then computes the estimate of rows in used_partitions.
Use the estimate from Partition Pruning as it is typically more precise.
Ideally, EITS should provide per-partition statistics but this is not
implemented currently.
*/
#ifdef WITH_PARTITION_STORAGE_ENGINE
if (table->part_info)
table->used_stat_records= table->file->stats.records;
#endif
KEY *key_info, *key_info_end;
for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
key_info < key_info_end; key_info++)
......@@ -3904,3 +3921,31 @@ bool is_stat_table(const char *db, const char *table)
}
return false;
}
/*
Check wheter we can use EITS statistics for a field or not
TRUE : Use EITS for the columns
FALSE: Otherwise
*/
bool is_eits_usable(Field *field)
{
partition_info *part_info= NULL;
#ifdef WITH_PARTITION_STORAGE_ENGINE
part_info= field->table->part_info;
#endif
/*
(1): checks if we have EITS statistics for a particular column
(2): Don't use EITS for GEOMETRY columns
(3): Disabling reading EITS statistics for columns involved in the
partition list of a table. We assume the selecticivity for
such columns would be handled during partition pruning.
*/
Column_statistics* col_stats= field->read_stats;
if (col_stats && !col_stats->no_stat_values_provided() && //(1)
field->type() != MYSQL_TYPE_GEOMETRY && //(2)
(!part_info || !part_info->field_in_partition_expr(field))) //(3)
return TRUE;
return FALSE;
}
......@@ -109,6 +109,7 @@ double get_column_range_cardinality(Field *field,
key_range *max_endp,
uint range_flag);
bool is_stat_table(const char *db, const char *table);
bool is_eits_usable(Field* field);
class Histogram
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment