Added comments.

Renamed the virtual method middle_point_pos for the class Field to pos_in_interval.

Added comments.
Renamed the virtual method middle_point_pos for the class Field to pos_in_interval.
b12b3cae · Igor Babaev · f4cd2b37 · b12b3cae · b12b3cae · b12b3cae
Commit b12b3cae authored Apr 15, 2013 by Igor Babaev
6 changed files
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -1274,7 +1274,24 @@ bool Field_num::get_int(CHARSET_INFO *cs, const char *from, uint len,
 }
-double Field_num::middle_point_pos(Field *min, Field *max)
+/**
+  @brief
+  Determine the relative position of the field value in a numeric interval
+  @details
+  The function returns a double number between 0.0 and 1.0 as the relative
+  position of the value of the this field in the numeric interval of [min,max].
+  If the value is not in the interval the the function returns 0.0 when
+  the value is less than min, and, 1.0 when the value is greater than max.
+  @param  min  value of the left end of the interval
+  @param  max  value of the right end of the interval
+  @return
+  relative position of the field value in the numeric interval [min,max] 
+*/
+double Field_num::pos_in_interval(Field *min, Field *max)
 {
  double n, d;
  n= val_real() - min->val_real();
@@ -6196,7 +6213,39 @@ inline ulonglong char_prefix_to_ulonglong(uchar *src)
  return uint8korr(src); 
 }
-double Field_str::middle_point_pos(Field *min, Field *max)
+/**
+  @brief
+  Determine the relative position of the field value in a string interval
+  @details
+  The function returns a double number between 0.0 and 1.0 as the relative
+  position of the value of the this field in the string interval of [min,max].
+  If the value is not in the interval the the function returns 0.0 when
+  the value is less than min, and, 1.0 when the value is greater than max.
+  @note
+  To calculate the relative position of the string value v in the interval
+  [min, max] the function first converts the beginning of these three
+  strings v, min, max into the strings that are used for byte comparison.
+  For each string not more sizeof(ulonglong) first bytes are taken
+  from the result of conversion. Then these bytes are interpreted as the
+  big-endian representation of an ulonglong integer. The values of these
+  integer numbers obtained for the strings v, min, max are used to calculate
+  the position of v in [min,max] in the same way is it's done for numeric
+  fields (see Field_num::pos_in_interval).
+  @todo
+  Improve the procedure for the case when min and max have the same
+  beginning
+  @param  min  value of the left end of the interval
+  @param  max  value of the right end of the interval
+  @return
+  relative position of the field value in the string interval [min,max] 
+*/
+double Field_str::pos_in_interval(Field *min, Field *max)
 {
  uchar mp_prefix[sizeof(ulonglong)];
  uchar minp_prefix[sizeof(ulonglong)];
@@ -8435,7 +8484,24 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
 }
-double Field_bit::middle_point_pos(Field *min, Field *max)
+/**
+  @brief
+  Determine the relative position of the field value in a bit interval
+  @details
+  The function returns a double number between 0.0 and 1.0 as the relative
+  position of the value of the this field in the bit interval of [min,max].
+  If the value is not in the interval the the function returns 0.0 when
+  the value is less than min, and, 1.0 when the value is greater than max.
+  @param  min  value of the left end of the interval
+  @param  max  value of the right end of the interval
+  @return
+  relative position of the field value in the bit interval [min,max] 
+*/
+double Field_bit::pos_in_interval(Field *min, Field *max)
 {
  double n, d;
  n= val_real() - min->val_real();

--- a/sql/field.h
+++ b/sql/field.h
@@ -723,9 +723,10 @@ class Field
  virtual bool hash_join_is_possible() { return TRUE; }
  virtual bool eq_cmp_as_binary() { return TRUE; }
-  virtual double middle_point_pos(Field *min, Field *max)
+  /* Position of the field value within the interval of [min, max] */
+  virtual double pos_in_interval(Field *min, Field *max)
  {
-    return (double) 1.0; 
+    return (double) 0.5; 
  }
  friend int cre_myisam(char * name, register TABLE *form, uint options,
@@ -846,7 +847,7 @@ class Field_num :public Field {
  bool get_int(CHARSET_INFO *cs, const char *from, uint len, 
               longlong *rnd, ulonglong unsigned_max, 
               longlong signed_min, longlong signed_max);
-  double middle_point_pos(Field *min, Field *max);
+  double pos_in_interval(Field *min, Field *max);
 };
@@ -893,7 +894,7 @@ class Field_str :public Field {
  uint is_equal(Create_field *new_field);
  bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); }
  virtual uint length_size() { return 0; }
-  double middle_point_pos(Field *min, Field *max);
+  double pos_in_interval(Field *min, Field *max);
 };
 /* base class for Field_string, Field_varstring and Field_blob */
@@ -2308,7 +2309,7 @@ class Field_bit :public Field {
  {
    store(*((longlong *)val), TRUE);
  }
-  double middle_point_pos(Field *min, Field *max);
+  double pos_in_interval(Field *min, Field *max);
  void get_image(uchar *buff, uint length, CHARSET_INFO *cs)
  { get_key_image(buff, length, itRAW); }   
  void set_image(const uchar *buff,uint length, CHARSET_INFO *cs)

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3222,6 +3222,26 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 * Condition selectivity module
 ****************************************************************************/
+/*
+  Build descriptors of pseudo-indexes over columns to perform range analysis
+  SYNOPSIS
+    create_key_parts_for_pseudo_indexes()
+      param       IN/OUT data structure for the descriptors to be built 
+      used_fields bitmap of columns for which the descriptors are to be built          
+  DESCRIPTION
+    For each column marked in the bitmap used_fields the function builds
+    a descriptor of a single-component pseudo-index over this column that
+    can be used for the range analysis of the predicates over this columns. 
+    The descriptors are created in the memory of param->mem_root. 
+  RETURN
+    FALSE  in the case of success
+    TRUE   otherwise
+*/
 static
 bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
                                         MY_BITMAP *used_fields)
@@ -3275,6 +3295,31 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
 }
+/*
+  Estimate the number of rows in all ranges built for a column
+  by the range optimizer  
+  SYNOPSIS
+    records_in_column_ranges()
+      param      the data structure to access descriptors of pseudo indexes
+                 built over columns used in the condition of the processed query
+      idx        the index of the descriptor of interest in param
+      tree       the tree representing ranges built for the interesting column         
+  DESCRIPTION
+    This function retrieves the ranges represented by the SEL_ARG 'tree' and
+    for each of them r it calls the function get_column_range_cardinality()
+    that estimates the number of expected rows in r. It is assumed that param
+    is the data structure containing the descriptors of pseudo-indexes that
+    has been built to perform range analysis of the range conditions imposed
+    on the columns used in the processed query, while idx is the index of the
+    descriptor created in 'param' exactly for the column for which 'tree'
+    has been built by the range optimizer.    
+  RETURN
+    the number of rows in the retrieved ranges  
+*/
 static
 double records_in_column_ranges(PARAM *param, uint idx, 
                                SEL_ARG *tree)
@@ -3322,6 +3367,29 @@ double records_in_column_ranges(PARAM *param, uint idx,
 } 
+/*
+  Calculate the selectivity of the condition imposed on the rows of a table
+  SYNOPSIS
+    calculate_cond_selectivity_for_table()
+      thd        the context handle 
+      table      the table of interest
+      cond       conditions imposed on the rows of the table        
+  DESCRIPTION
+    This function calculates the selectivity of range conditions cond imposed
+    on the rows of 'table' in the processed query.
+    The calculated selectivity is assigned to the field table->cond_selectivity.
+  NOTE
+    Currently the selectivities of range conditions over different columns are
+    considered independent. 
+  RETURN
+    FALSE  on success
+    TRUE   otherwise 
+*/
 bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
 {
  uint keynr;
@@ -3338,6 +3406,11 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond)
  if (thd->variables.optimizer_use_condition_selectivity > 2 &&
      !bitmap_is_clear_all(used_fields))
  {
+    /* 
+      Calculate the selectivity of the range conditions not supported
+      by any index
+    */
    PARAM param;
    MEM_ROOT alloc;
    SEL_TREE *tree;

--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -292,7 +292,8 @@ typedef struct st_join_table {
  /* psergey-todo: make the below have type double, like POSITION::records_read? */
  ha_rows       records_read;
-  double        cond_selectivity;
+  /* The selectivity of the conditions that can be pushed to the table */ 
+  double        cond_selectivity;  
  /* Startup cost for execution */
  double        startup_cost;
@@ -774,7 +775,8 @@ typedef struct st_position :public Sql_alloc
  */
  double records_read;
-  double cond_selectivity;
+  /* The selectivity of the pushed down conditions */
+  double cond_selectivity; 
  /* 
    Cost accessing the table in course of the entire complete join execution,

--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -271,8 +271,8 @@ class Table_statistics
  Column_statistics *column_stats;  /* Array of statistical data for columns */
  Index_statistics *index_stats;    /* Array of statistical data for indexes */
  ulong *idx_avg_frequency;   /* Array of records per key for index prefixes */
-  ulong total_hist_size; 
+  ulong total_hist_size;            /* Total size of all histograms */
-  uchar *histograms;                /* Sequence of histograms      */                    
+  uchar *histograms;                /* Sequence of histograms       */                    
 };