Commit 5972f5c2 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-33314: Crash in calculate_cond_selectivity_for_table() with many columns

Variant#3: moved the logic out of create_key_parts_for_pseudo_indexes

Range Analyzer (get_mm_tree functions) can only process up to MAX_KEY=64
indexes. The problem was that calculate_cond_selectivity_for_table used
it to estimate selectivities for columns, and since a table can
have > MAX_KEY columns, would invoke Range Analyzer with more than MAX_KEY
"pseudo-indexes".

Fixed by making calculate_cond_selectivity_for_table() to run Range
Analyzer with at most MAX_KEY pseudo-indexes. If there are more
columns to process, Range Analyzer will be invoked multiple times.

Also made this change:
-    param.real_keynr[0]= 0;
+    MEM_UNDEFINED(&param.real_keynr, sizeof(param.real_keynr));

Range Analyzer should have no use on real_keynr when it is run with
pseudo-indexes.
parent 78662dda
...@@ -105,17 +105,113 @@ from information_schema.optimizer_trace; ...@@ -105,17 +105,113 @@ from information_schema.optimizer_trace;
set optimizer_trace=@tmp; set optimizer_trace=@tmp;
drop table t0,t1,t10; drop table t0,t1,t10;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size; set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
--echo # --echo #
--echo # End of 10.4 tests --echo # End of 10.4 tests
--echo # --echo #
--echo #
--echo # MDEV-33314: Crash inside calculate_cond_selectivity_for_table() with many columns
--echo #
set optimizer_use_condition_selectivity= 4;
set use_stat_tables= preferably;
let $N_CONDS=160;
let $N_LAST_COND=159;
--echo #
--echo # create table t1 (col0 int, col1 int, col2 int, ...);
--echo #
let $create_tbl= create table t1 ( col0 int;
let $i=1;
while ($i < $N_CONDS) {
let $create_tbl= $create_tbl, col$i int;
let $i=`select $i + 1`;
}
let $create_tbl= $create_tbl );
#echo $create_tbl;
evalp $create_tbl;
--echo #
--echo # insert into t1 select seq, ... seq from seq_1_to_10;
--echo #
let $insert_cmd= insert into t1 select seq;
let $i=1;
while ($i < $N_CONDS) {
let $insert_cmd = $insert_cmd ,seq;
let $i=`select $i + 1`;
}
let $insert_cmd= $insert_cmd from seq_1_to_100;
# echo $insert_cmd;
evalp $insert_cmd;
analyze table t1 persistent for all;
set @trace_tmp=@@optimizer_trace;
set optimizer_trace=1;
--echo #
--echo # Basic testcase: don't crash for many-column selectivity
--echo # explain extended select * from t1 where col0>1 and col1>1 and col2>1 and ...
--echo #
let $query_tbl= explain format=json select * from t1 where col0>1;
let $i=1;
while ($i < $N_CONDS) {
let $query_tbl= $query_tbl and col$i>1;
let $i=`select $i + 1`;
}
#echo $query_tbl;
evalp $query_tbl;
select
json_detailed(json_extract(trace,'$**.selectivity_for_columns[0]')) as JS
from
information_schema.optimizer_trace;
evalp $query_tbl;
eval select
json_detailed(json_extract(trace,'\$**.selectivity_for_columns[$N_LAST_COND]')) as JS
from
information_schema.optimizer_trace;
--echo #
--echo # Check if not being able to infer anything for the first MAX_KEY
--echo # columns doesn't prevent further inferences.
--echo #
--echo # explain extended select * from t1
--echo # where (1>2 or col0>1 or col1>1 or ...) and col99>1
--echo #
let $query_tbl= explain format=json select * from t1 where (1>2 ;
let $i=1;
while ($i < $N_LAST_COND) {
let $query_tbl= $query_tbl or col$i>1;
let $i=`select $i + 1`;
}
let $query_tbl= $query_tbl) and col$N_LAST_COND>1;
#echo $query_tbl;
evalp $query_tbl;
select
json_detailed(json_extract(trace,'$**.selectivity_for_columns')) as JS
from
information_schema.optimizer_trace;
set optimizer_trace=@trace_tmp;
drop table t1;
--echo # --echo #
--echo # Clean up --echo # Clean up
--echo # --echo #
--source include/restore_charset.inc --source include/restore_charset.inc
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set use_stat_tables= @save_use_stat_tables;
set @@global.histogram_size=@save_histogram_size; set @@global.histogram_size=@save_histogram_size;
...@@ -3111,6 +3111,53 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, ...@@ -3111,6 +3111,53 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
****************************************************************************/ ****************************************************************************/
/*
@brief
Create a bitmap of columns for which to perform Range Analysis for EITS
condition selectivity estimates.
@detail
Walk through the bitmap of fields used in the query, and
- pick columns for which EITS data is usable (see is_eits_usable() call)
- do not produce more than MAX_KEY columns. Range Analyzer cannot handle
more than that. If there are more than MAX_KEY eligible columns,
this function should be called multiple times to produce multiple
bitmaps.
@param used_fields Columns used by the query
@param col_no Start from this column
@param out OUT Filled column bitmap
@return
(uint)-1 If there are no more columns for range analysis.
Other Index of the last considered column. Pass this to next call to
this function
*/
uint get_columns_for_pseudo_indexes(const TABLE *table,
const MY_BITMAP *used_fields, int col_no,
MY_BITMAP *out)
{
bitmap_clear_all(out);
int n_bits= 0;
for (; table->field[col_no]; col_no++)
{
if (bitmap_is_set(used_fields, col_no) &&
is_eits_usable(table->field[col_no]))
{
bitmap_set_bit(out, col_no);
if (++n_bits == MAX_KEY)
{
col_no++;
break;
}
}
}
return n_bits? col_no: (uint)-1;
}
/* /*
Build descriptors of pseudo-indexes over columns to perform range analysis Build descriptors of pseudo-indexes over columns to perform range analysis
...@@ -3136,22 +3183,11 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, ...@@ -3136,22 +3183,11 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{ {
Field **field_ptr; Field **field_ptr;
TABLE *table= param->table; TABLE *table= param->table;
uint parts= 0; uint parts= bitmap_bits_set(used_fields);
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index) &&
is_eits_usable(field))
parts++;
}
KEY_PART *key_part; KEY_PART *key_part;
uint keys= 0; uint keys= 0;
if (!parts)
return TRUE;
if (!(key_part= (KEY_PART *) alloc_root(param->mem_root, if (!(key_part= (KEY_PART *) alloc_root(param->mem_root,
sizeof(KEY_PART) * parts))) sizeof(KEY_PART) * parts)))
return TRUE; return TRUE;
...@@ -3163,9 +3199,6 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, ...@@ -3163,9 +3199,6 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
Field *field= *field_ptr; Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index)) if (bitmap_is_set(used_fields, field->field_index))
{ {
if (!is_eits_usable(field))
continue;
uint16 store_length; uint16 store_length;
uint16 max_key_part_length= (uint16) table->file->max_key_part_length(); uint16 max_key_part_length= (uint16) table->file->max_key_part_length();
key_part->key= keys; key_part->key= keys;
...@@ -3506,8 +3539,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) ...@@ -3506,8 +3539,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
PARAM param; PARAM param;
MEM_ROOT alloc; MEM_ROOT alloc;
SEL_TREE *tree; SEL_TREE *tree;
double rows;
init_sql_alloc(key_memory_quick_range_select_root, &alloc, init_sql_alloc(key_memory_quick_range_select_root, &alloc,
thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC)); thd->variables.range_alloc_block_size, 0, MYF(MY_THREAD_SPECIFIC));
param.thd= thd; param.thd= thd;
...@@ -3516,67 +3547,90 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) ...@@ -3516,67 +3547,90 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
param.table= table; param.table= table;
param.remove_false_where_parts= true; param.remove_false_where_parts= true;
if (create_key_parts_for_pseudo_indexes(&param, used_fields))
goto free_alloc;
param.prev_tables= param.read_tables= 0; param.prev_tables= param.read_tables= 0;
param.current_table= table->map; param.current_table= table->map;
param.using_real_indexes= FALSE; param.using_real_indexes= FALSE;
param.real_keynr[0]= 0; MEM_UNDEFINED(&param.real_keynr, sizeof(param.real_keynr));
param.alloced_sel_args= 0; param.alloced_sel_args= 0;
param.max_key_parts= 0; param.max_key_parts= 0;
thd->no_errors=1; thd->no_errors=1;
table->reginfo.impossible_range= 0;
tree= cond[0]->get_mm_tree(&param, cond); uint used_fields_buff_size= bitmap_buffer_size(table->s->fields);
uint32 *used_fields_buff= (uint32*)thd->alloc(used_fields_buff_size);
MY_BITMAP cols_for_indexes;
(void) my_bitmap_init(&cols_for_indexes, used_fields_buff, table->s->fields, 0);
bitmap_clear_all(&cols_for_indexes);
if (!tree) uint column_no= 0; // Start looping from the first column.
goto free_alloc; /*
Try getting selectivity estimates for every field that is used in the
table->reginfo.impossible_range= 0; query and has EITS statistics. We do this:
if (tree->type == SEL_TREE::IMPOSSIBLE)
{
rows= 0;
table->reginfo.impossible_range= 1;
goto free_alloc;
}
else if (tree->type == SEL_TREE::ALWAYS)
{
rows= table_records;
goto free_alloc;
}
else if (tree->type == SEL_TREE::MAYBE)
{
rows= table_records;
goto free_alloc;
}
for (uint idx= 0; idx < param.keys; idx++) for every usable field col
create a pseudo INDEX(col);
Run the range analyzer (get_mm_tree) for these pseudo-indexes;
Look at produced ranges and get their selectivity estimates;
Note that the range analyzer can process at most MAX_KEY indexes. If
the table has >MAX_KEY eligible columns, we will do several range
analyzer runs.
*/
while (1)
{ {
SEL_ARG *key= tree->keys[idx]; column_no= get_columns_for_pseudo_indexes(table, used_fields, column_no,
if (key) &cols_for_indexes);
if (column_no == (uint)-1)
break; /* Couldn't create any pseudo-indexes. This means we're done */
if (create_key_parts_for_pseudo_indexes(&param, &cols_for_indexes))
goto free_alloc;
tree= cond[0]->get_mm_tree(&param, cond);
if (!tree ||
tree->type == SEL_TREE::ALWAYS ||
tree->type == SEL_TREE::MAYBE)
{ {
Json_writer_object selectivity_for_column(thd); /* Couldn't infer anything. But there could be more fields, so continue */
selectivity_for_column.add("column_name", key->field->field_name); continue;
if (key->type == SEL_ARG::IMPOSSIBLE) }
{
rows= 0; if (tree->type == SEL_TREE::IMPOSSIBLE)
table->reginfo.impossible_range= 1; {
selectivity_for_column.add("selectivity_from_histogram", rows); table->reginfo.impossible_range= 1;
selectivity_for_column.add("cause", "impossible range"); goto free_alloc;
goto free_alloc; }
}
else for (uint idx= 0; idx < param.keys; idx++)
{
SEL_ARG *key= tree->keys[idx];
if (key)
{ {
enum_check_fields save_count_cuted_fields= thd->count_cuted_fields; Json_writer_object selectivity_for_column(thd);
thd->count_cuted_fields= CHECK_FIELD_IGNORE; selectivity_for_column.add("column_name", key->field->field_name);
rows= records_in_column_ranges(&param, idx, key); if (key->type == SEL_ARG::IMPOSSIBLE)
thd->count_cuted_fields= save_count_cuted_fields; {
if (rows != DBL_MAX) table->reginfo.impossible_range= 1;
selectivity_for_column.add("selectivity_from_histogram", 0);
selectivity_for_column.add("cause", "impossible range");
goto free_alloc;
}
else
{ {
key->field->cond_selectivity= rows/table_records; enum_check_fields save_count_cuted_fields= thd->count_cuted_fields;
selectivity_for_column.add("selectivity_from_histogram", thd->count_cuted_fields= CHECK_FIELD_IGNORE;
key->field->cond_selectivity); double rows= records_in_column_ranges(&param, idx, key);
thd->count_cuted_fields= save_count_cuted_fields;
if (rows != DBL_MAX)
{
key->field->cond_selectivity= rows/table_records;
selectivity_for_column.add("selectivity_from_histogram",
key->field->cond_selectivity);
}
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment