Commit b12b3cae authored by Igor Babaev's avatar Igor Babaev

Added comments.

Renamed the virtual method middle_point_pos for the class Field to
pos_in_interval.
parent f4cd2b37
......@@ -1274,7 +1274,24 @@ bool Field_num::get_int(CHARSET_INFO *cs, const char *from, uint len,
}
double Field_num::middle_point_pos(Field *min, Field *max)
/**
@brief
Determine the relative position of the field value in a numeric interval
@details
The function returns a double number between 0.0 and 1.0 as the relative
position of the value of the this field in the numeric interval of [min,max].
If the value is not in the interval the the function returns 0.0 when
the value is less than min, and, 1.0 when the value is greater than max.
@param min value of the left end of the interval
@param max value of the right end of the interval
@return
relative position of the field value in the numeric interval [min,max]
*/
double Field_num::pos_in_interval(Field *min, Field *max)
{
double n, d;
n= val_real() - min->val_real();
......@@ -6196,7 +6213,39 @@ inline ulonglong char_prefix_to_ulonglong(uchar *src)
return uint8korr(src);
}
double Field_str::middle_point_pos(Field *min, Field *max)
/**
@brief
Determine the relative position of the field value in a string interval
@details
The function returns a double number between 0.0 and 1.0 as the relative
position of the value of the this field in the string interval of [min,max].
If the value is not in the interval the the function returns 0.0 when
the value is less than min, and, 1.0 when the value is greater than max.
@note
To calculate the relative position of the string value v in the interval
[min, max] the function first converts the beginning of these three
strings v, min, max into the strings that are used for byte comparison.
For each string not more sizeof(ulonglong) first bytes are taken
from the result of conversion. Then these bytes are interpreted as the
big-endian representation of an ulonglong integer. The values of these
integer numbers obtained for the strings v, min, max are used to calculate
the position of v in [min,max] in the same way is it's done for numeric
fields (see Field_num::pos_in_interval).
@todo
Improve the procedure for the case when min and max have the same
beginning
@param min value of the left end of the interval
@param max value of the right end of the interval
@return
relative position of the field value in the string interval [min,max]
*/
double Field_str::pos_in_interval(Field *min, Field *max)
{
uchar mp_prefix[sizeof(ulonglong)];
uchar minp_prefix[sizeof(ulonglong)];
......@@ -8435,7 +8484,24 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
}
double Field_bit::middle_point_pos(Field *min, Field *max)
/**
@brief
Determine the relative position of the field value in a bit interval
@details
The function returns a double number between 0.0 and 1.0 as the relative
position of the value of the this field in the bit interval of [min,max].
If the value is not in the interval the the function returns 0.0 when
the value is less than min, and, 1.0 when the value is greater than max.
@param min value of the left end of the interval
@param max value of the right end of the interval
@return
relative position of the field value in the bit interval [min,max]
*/
double Field_bit::pos_in_interval(Field *min, Field *max)
{
double n, d;
n= val_real() - min->val_real();
......
......@@ -723,9 +723,10 @@ class Field
virtual bool hash_join_is_possible() { return TRUE; }
virtual bool eq_cmp_as_binary() { return TRUE; }
virtual double middle_point_pos(Field *min, Field *max)
/* Position of the field value within the interval of [min, max] */
virtual double pos_in_interval(Field *min, Field *max)
{
return (double) 1.0;
return (double) 0.5;
}
friend int cre_myisam(char * name, register TABLE *form, uint options,
......@@ -846,7 +847,7 @@ class Field_num :public Field {
bool get_int(CHARSET_INFO *cs, const char *from, uint len,
longlong *rnd, ulonglong unsigned_max,
longlong signed_min, longlong signed_max);
double middle_point_pos(Field *min, Field *max);
double pos_in_interval(Field *min, Field *max);
};
......@@ -893,7 +894,7 @@ class Field_str :public Field {
uint is_equal(Create_field *new_field);
bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
virtual uint length_size() { return 0; }
double middle_point_pos(Field *min, Field *max);
double pos_in_interval(Field *min, Field *max);
};
/* base class for Field_string, Field_varstring and Field_blob */
......@@ -2308,7 +2309,7 @@ class Field_bit :public Field {
{
store(*((longlong *)val), TRUE);
}
double middle_point_pos(Field *min, Field *max);
double pos_in_interval(Field *min, Field *max);
void get_image(uchar *buff, uint length, CHARSET_INFO *cs)
{ get_key_image(buff, length, itRAW); }
void set_image(const uchar *buff,uint length, CHARSET_INFO *cs)
......
......@@ -3222,6 +3222,26 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
* Condition selectivity module
****************************************************************************/
/*
Build descriptors of pseudo-indexes over columns to perform range analysis
SYNOPSIS
create_key_parts_for_pseudo_indexes()
param IN/OUT data structure for the descriptors to be built
used_fields bitmap of columns for which the descriptors are to be built
DESCRIPTION
For each column marked in the bitmap used_fields the function builds
a descriptor of a single-component pseudo-index over this column that
can be used for the range analysis of the predicates over this columns.
The descriptors are created in the memory of param->mem_root.
RETURN
FALSE in the case of success
TRUE otherwise
*/
static
bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
MY_BITMAP *used_fields)
......@@ -3275,6 +3295,31 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
}
/*
Estimate the number of rows in all ranges built for a column
by the range optimizer
SYNOPSIS
records_in_column_ranges()
param the data structure to access descriptors of pseudo indexes
built over columns used in the condition of the processed query
idx the index of the descriptor of interest in param
tree the tree representing ranges built for the interesting column
DESCRIPTION
This function retrieves the ranges represented by the SEL_ARG 'tree' and
for each of them r it calls the function get_column_range_cardinality()
that estimates the number of expected rows in r. It is assumed that param
is the data structure containing the descriptors of pseudo-indexes that
has been built to perform range analysis of the range conditions imposed
on the columns used in the processed query, while idx is the index of the
descriptor created in 'param' exactly for the column for which 'tree'
has been built by the range optimizer.
RETURN
the number of rows in the retrieved ranges
*/
static
double records_in_column_ranges(PARAM *param, uint idx,
SEL_ARG *tree)
......@@ -3322,6 +3367,29 @@ double records_in_column_ranges(PARAM *param, uint idx,
}
/*
Calculate the selectivity of the condition imposed on the rows of a table
SYNOPSIS
calculate_cond_selectivity_for_table()
thd the context handle
table the table of interest
cond conditions imposed on the rows of the table
DESCRIPTION
This function calculates the selectivity of range conditions cond imposed
on the rows of 'table' in the processed query.
The calculated selectivity is assigned to the field table->cond_selectivity.
NOTE
Currently the selectivities of range conditions over different columns are
considered independent.
RETURN
FALSE on success
TRUE otherwise
*/
bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
{
uint keynr;
......@@ -3338,6 +3406,11 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
!bitmap_is_clear_all(used_fields))
{
/*
Calculate the selectivity of the range conditions not supported
by any index
*/
PARAM param;
MEM_ROOT alloc;
SEL_TREE *tree;
......
......@@ -292,7 +292,8 @@ typedef struct st_join_table {
/* psergey-todo: make the below have type double, like POSITION::records_read? */
ha_rows records_read;
double cond_selectivity;
/* The selectivity of the conditions that can be pushed to the table */
double cond_selectivity;
/* Startup cost for execution */
double startup_cost;
......@@ -774,7 +775,8 @@ typedef struct st_position :public Sql_alloc
*/
double records_read;
double cond_selectivity;
/* The selectivity of the pushed down conditions */
double cond_selectivity;
/*
Cost accessing the table in course of the entire complete join execution,
......
......@@ -250,12 +250,13 @@ class Column_statistics_collected :public Column_statistics
and column_name with column_name taken out of the only parameter f of the
Field* type passed to this method. After this get_stat_values looks
for a row by the set key value. If the row is found the values of statistical
data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency
are read into internal structures. Values of nulls_ratio, avg_length,
avg_frequency are read into the corresponding fields of the read_stat
structure from the Field object f, while values from min_value and max_value
are copied into the min_value and max_value record buffers attached to the
TABLE structure for table t.
data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency,
hist_size, hist_type, histogram are read into internal structures. Values
of nulls_ratio, avg_length, avg_frequency, hist_size, hist_type, histogram
are read into the corresponding fields of the read_stat structure from
the Field object f, while values from min_value and max_value are copied
into the min_value and max_value record buffers attached to the TABLE
structure for table t.
If the value of a statistical column in the found row is null, then the
corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off.
Otherwise the flag is set on. If no row is found for the column the all flags
......@@ -868,11 +869,12 @@ class Column_stat: public Stat_table
@details
This implementation of a purely virtual method sets the value of the
columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length' and
'avg_frequency' of the stistical table columns_stat according to the
contents of the bitmap write_stat.column_stat_nulls and the values
of the fields min_value, max_value, nulls_ratio, avg_length and
avg_frequency of the structure write_stat from the Field structure
columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length',
'avg_frequency', 'hist_size', 'hist_type' and 'histogram' of the
stistical table columns_stat according to the contents of the bitmap
write_stat.column_stat_nulls and the values of the fields min_value,
max_value, nulls_ratio, avg_length, avg_frequency, hist_size, hist_type
and histogram of the structure write_stat from the Field structure
for the field 'table_field'.
The value of the k-th column in the table columns_stat is set to NULL
if the k-th bit in the bitmap 'column_stat_nulls' is set to 1.
......@@ -951,14 +953,15 @@ class Column_stat: public Stat_table
@details
This implementation of a purely virtual method first looks for a record
the statistical table column_stats by its primary key set the record
in the statistical table column_stats by its primary key set in the record
buffer with the help of Column_stat::set_key_fields. Then, if the row is
found, the function reads the values of the columns 'min_value',
'max_value', 'nulls_ratio', 'avg_length' and 'avg_frequency' of the
table column_stat and sets accordingly the value of the bitmap
read_stat.column_stat_nulls' and the values of the fields min_value,
max_value, nulls_ratio, avg_length and avg_frequency of the structure
read_stat from the Field structure for the field 'table_field'.
'max_value', 'nulls_ratio', 'avg_length', 'avg_frequency', 'hist_size' and
'hist_type" of the table column_stat and sets accordingly the value of
the bitmap read_stat.column_stat_nulls' and the values of the fields
min_value, max_value, nulls_ratio, avg_length, avg_frequency, hist_size and
hist_type of the structure read_stat from the Field structure for the field
'table_field'.
*/
void get_stat_values()
......@@ -1022,6 +1025,22 @@ class Column_stat: public Stat_table
}
}
/**
@brief
Read histogram from of column_stats
@details
This method first looks for a record in the statistical table column_stats
by its primary key set the record buffer with the help of
Column_stat::set_key_fields. Then, if the row is found, the function reads
the value of the column 'histogram' of the table column_stat and sets
accordingly the corresponding bit in the bitmap read_stat.column_stat_nulls.
The method assumes that the value of histogram size and the pointer to
the histogram location has been already set in the fields size and values
of read_stats->histogram.
*/
void get_histogram_value()
{
if (find_stat())
......@@ -1238,20 +1257,24 @@ class Index_stat: public Stat_table
};
/*
Histogram_builder is a helper class that is used to build histograms
for columns
*/
class Histogram_builder
{
Field *column;
uint col_length;
ha_rows records;
Field *min_value;
Field *max_value;
Histogram *histogram;
uint hist_width;
double bucket_capacity;
uint curr_bucket;
ulonglong count;
ulonglong count_distinct;
Field *column; /* table field for which the histogram is built */
uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */
Field *min_value; /* pointer to the minimal value for the field */
Field *max_value; /* pointer to the maximal value for the field */
Histogram *histogram; /* the histogram location */
uint hist_width; /* the number of points in the histogram */
double bucket_capacity; /* number of rows in a bucket of the histogram */
uint curr_bucket; /* number of the current bucket to be built */
ulonglong count; /* number of values retrieved */
ulonglong count_distinct; /* number of distinct values retrieved */
public:
Histogram_builder(Field *col, uint col_len, ha_rows rows)
......@@ -1280,7 +1303,7 @@ class Histogram_builder
{
column->store_field_value((uchar *) elem, col_length);
histogram->set_value(curr_bucket,
column->middle_point_pos(min_value, max_value));
column->pos_in_interval(min_value, max_value));
curr_bucket++;
while (curr_bucket != hist_width &&
count > bucket_capacity * (curr_bucket + 1))
......@@ -1389,6 +1412,10 @@ class Count_distinct_field: public Sql_alloc
return count;
}
/*
@brief
Build the histogram for the elements accumulated in the container of 'tree'
*/
ulonglong get_value_with_histogram(ha_rows rows)
{
Histogram_builder hist_builder(table_field, tree_key_length, rows);
......@@ -1396,11 +1423,19 @@ class Count_distinct_field: public Sql_alloc
return hist_builder.get_count_distinct();
}
/*
@brief
Get the size of the histogram in bytes built for table_field
*/
uint get_hist_size()
{
return table_field->collected_stats->histogram.get_size();
}
/*
@brief
Get the pointer to the histogram built for table_field
*/
uchar *get_histogram()
{
return table_field->collected_stats->histogram.get_values();
......@@ -1936,7 +1971,6 @@ inline bool statistics_for_command_is_needed(THD *thd)
@note
Currently the function always is called with the parameter is_safe set
to FALSE.
*/
int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share,
......@@ -2051,6 +2085,36 @@ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share,
DBUG_RETURN(0);
}
/**
@brief
Allocate memory for the histogram used by a table share
@param
thd Thread handler
@param
table_share Table share for which the memory for histogram data is allocated
@param
is_safe TRUE <-> at any time only one thread can perform the function
@note
The function allocates the memory for the histogram built for a table in the
table's share memory with the intention to read the data there from the
system persistent statistical table mysql.column_stats,
The memory is allocated in the table_share's mem_root.
If the parameter is_safe is TRUE then it is guaranteed that at any given time
only one thread is executed the code of the function.
@retval
0 If the memory for all statistical data has been successfully allocated
@retval
1 Otherwise
@note
Currently the function always is called with the parameter is_safe set
to FALSE.
*/
static
int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share,
bool is_safe)
......@@ -2781,6 +2845,38 @@ bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables)
}
/**
@brief
Read histogram for a table from the persistent statistical tables
@param
thd The thread handle
@param
table The table to read histograms for
@param
stat_tables The array of TABLE_LIST objects for statistical tables
@details
For the statistical table columns_stats the function looks for the rows
from this table that contain statistical data on 'table'. If such rows
are found the histograms from them are read into the memory allocated
for histograms of 'table'. Later at the query processing these histogram
are supposed to be used by the optimizer.
The parameter stat_tables should point to an array of TABLE_LIST
objects for all statistical tables linked into a list. All statistical
tables are supposed to be opened.
The function is called by read_statistics_for_tables_if_needed().
@retval
0 If data has been successfully read for the table
@retval
1 Otherwise
@note
Objects of the helper Column_stat are employed read histogram
from the statistical table column_stats now.
*/
static
int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
{
......@@ -3311,6 +3407,17 @@ void set_statistics_for_table(THD *thd, TABLE *table)
}
/**
@brief
Get the average frequency for a column
@param
field The column whose average frequency is required
@retval
The required average frequency
*/
double get_column_avg_frequency(Field * field)
{
double res;
......@@ -3337,6 +3444,27 @@ double get_column_avg_frequency(Field * field)
}
/**
@brief
Estimate the number of rows in a column range using data from stat tables
@param
field The column whose range cardinality is to be estimated
@param
min_endp The left end of the range whose cardinality is required
@param
max_endp The right end of the range whose cardinality is required
@param
range_flag The range flags
@details
The function gets an estimate of the number of rows in a column range
using the statistical data from the table column_stats.
@retval
The required estimate of the rows in the column range
*/
double get_column_range_cardinality(Field *field,
key_range *min_endp,
key_range *max_endp,
......@@ -3377,8 +3505,8 @@ double get_column_range_cardinality(Field *field,
Histogram *hist= &col_stats->histogram;
if (hist->is_available())
{
double pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
double pos= field->pos_in_interval(col_stats->min_value,
col_stats->max_value);
res= col_non_nulls *
hist->point_selectivity(pos,
avg_frequency / col_non_nulls);
......@@ -3396,8 +3524,8 @@ double get_column_range_cardinality(Field *field,
{
store_key_image_to_rec(field, (uchar *) min_endp->key,
min_endp->length);
min_mp_pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
min_mp_pos= field->pos_in_interval(col_stats->min_value,
col_stats->max_value);
}
else
min_mp_pos= 0.0;
......@@ -3405,8 +3533,8 @@ double get_column_range_cardinality(Field *field,
{
store_key_image_to_rec(field, (uchar *) max_endp->key,
max_endp->length);
max_mp_pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
max_mp_pos= field->pos_in_interval(col_stats->min_value,
col_stats->max_value);
}
else
max_mp_pos= 1.0;
......
......@@ -271,8 +271,8 @@ class Table_statistics
Column_statistics *column_stats; /* Array of statistical data for columns */
Index_statistics *index_stats; /* Array of statistical data for indexes */
ulong *idx_avg_frequency; /* Array of records per key for index prefixes */
ulong total_hist_size;
uchar *histograms; /* Sequence of histograms */
ulong total_hist_size; /* Total size of all histograms */
uchar *histograms; /* Sequence of histograms */
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment