MDEV-22535 TABLE::initialize_quick_structures() takes 0.5% in oltp_read_only

Fixed by: - Make all quick_* variable allocated according to real number keys instead of MAX_KEY - Store all the quick* items in separated allocated structure (OPT_RANGE) - Ensure we don't access any quick* variable without first checking opt_range_keys.is_set(). Thanks to this, we don't need any pre-initialization of quick* variables anymore. Some renames was done to use the new structure: table->quick_keys -> table->opt_range_keys table->quick_rows[X] -> table->opt_range[X].rows table->quick_key_parts[X] -> table->opt_range[X].key_parts table->quick_costs[X] -> table->opt_range[X].cost table->quick_index_only_costs[X] -> table->opt_range[X].index_only_cost table->quick_n_ranges[X] -> table->opt_range[X].ranges table->quick_condition_rows -> table->opt_range_condition_rows This patch should both decrease memory needed for TABLE objects (3528 -> 984 + keyinfo) and increase performance, thanks to less initializations per query, and more localized memory, thanks to the opt_range structure.

MDEV-22535 TABLE::initialize_quick_structures() takes 0.5% in oltp_read_only
Fixed by: - Make all quick_* variable allocated according to real number keys instead of MAX_KEY - Store all the quick* items in separated allocated structure (OPT_RANGE) - Ensure we don't access any quick* variable without first checking opt_range_keys.is_set(). Thanks to this, we don't need any pre-initialization of quick* variables anymore. Some renames was done to use the new structure: table->quick_keys -> table->opt_range_keys table->quick_rows[X] -> table->opt_range[X].rows table->quick_key_parts[X] -> table->opt_range[X].key_parts table->quick_costs[X] -> table->opt_range[X].cost table->quick_index_only_costs[X] -> table->opt_range[X].index_only_cost table->quick_n_ranges[X] -> table->opt_range[X].ranges table->quick_condition_rows -> table->opt_range_condition_rows This patch should both decrease memory needed for TABLE objects (3528 -> 984 + keyinfo) and increase performance, thanks to less initializations per query, and more localized memory, thanks to the opt_range structure.
6cee9b19 · Monty · 5cbb18cb · 6cee9b19 · 6cee9b19 · 6cee9b19
Commit 6cee9b19 authored Jun 26, 2020 by Monty
8 changed files
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2599,10 +2599,10 @@ static int fill_used_fields_bitmap(PARAM *param)
    In the table struct the following information is updated:
      quick_keys           - Which keys can be used
      quick_rows           - How many rows the key matches
-      quick_condition_rows - E(# rows that will satisfy the table condition)
+      opt_range_condition_rows - E(# rows that will satisfy the table condition)

  IMPLEMENTATION
-    quick_condition_rows value is obtained as follows:
+    opt_range_condition_rows value is obtained as follows:
      
      It is a minimum of E(#output rows) for all considered table access
      methods (range and index_merge accesses over various indexes).
@@ -2626,7 +2626,7 @@ static int fill_used_fields_bitmap(PARAM *param)
    which is currently produced.

  TODO
-   * Change the value returned in quick_condition_rows from a pessimistic
+   * Change the value returned in opt_range_condition_rows from a pessimistic
     estimate to true E(#rows that satisfy table condition). 
     (we can re-use some of E(#rows) calcuation code from
     index_merge/intersection for this)
@@ -2957,7 +2957,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
        {
          best_trp= intersect_trp;
          best_read_time= best_trp->read_cost; 
-          set_if_smaller(param.table->quick_condition_rows, 
+          set_if_smaller(param.table->opt_range_condition_rows,
                         intersect_trp->records);
        }
      }
@@ -2977,7 +2977,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
        {
          new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
          if (new_conj_trp)
-            set_if_smaller(param.table->quick_condition_rows, 
+            set_if_smaller(param.table->opt_range_condition_rows,
                           new_conj_trp->records);
          if (new_conj_trp &&
              (!best_conj_trp || 
@@ -3004,7 +3004,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
        restore_nonrange_trees(&param, tree, backup_keys);
      if ((group_trp= get_best_group_min_max(&param, tree, read_time)))
      {
-        param.table->quick_condition_rows= MY_MIN(group_trp->records,
+        param.table->opt_range_condition_rows= MY_MIN(group_trp->records,
                                                  head->stat_records());
        Json_writer_object grp_summary(thd, "best_group_range_summary");

@@ -3340,8 +3340,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)

  for (keynr= 0;  keynr < table->s->keys; keynr++)
  {
-    if (table->quick_keys.is_set(keynr))
-      set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]);
+    if (table->opt_range_keys.is_set(keynr))
+      set_if_bigger(max_quick_key_parts, table->opt_range[keynr].key_parts);
  }

  /* 
@@ -3353,13 +3353,13 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
  {
    for (keynr= 0;  keynr < table->s->keys; keynr++)
    {
-      if (table->quick_keys.is_set(keynr) &&
-          table->quick_key_parts[keynr] == quick_key_parts)
+      if (table->opt_range_keys.is_set(keynr) &&
+          table->opt_range[keynr].key_parts == quick_key_parts)
      {
        uint i;
-        uint used_key_parts= table->quick_key_parts[keynr];
-        double quick_cond_selectivity= table->quick_rows[keynr] / 
-	                               table_records;
+        uint used_key_parts= table->opt_range[keynr].key_parts;
+        double quick_cond_selectivity= (table->opt_range[keynr].rows /
+                                        table_records);
        KEY *key_info= table->key_info + keynr;
        KEY_PART_INFO* key_part= key_info->key_part;
        /*
@@ -5777,7 +5777,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
      continue;
    }

-    cost= table->quick_index_only_costs[(*index_scan)->keynr];
+    cost= table->opt_range[(*index_scan)->keynr].index_only_cost;

    idx_scan.add("cost", cost);

@@ -7188,7 +7188,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
    ha_rows best_rows = double2rows(intersect_best->out_rows);
    if (!best_rows)
      best_rows= 1;
-    set_if_smaller(param->table->quick_condition_rows, best_rows);
+    set_if_smaller(param->table->opt_range_condition_rows, best_rows);
    trp->records= best_rows;
    trp->index_scan_costs= intersect_best->index_scan_costs;
    trp->cpk_scan= cpk_scan;
@@ -7357,7 +7357,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
  trp->read_cost= total_cost;
  trp->records= records;
  trp->cpk_scan= NULL;
-  set_if_smaller(param->table->quick_condition_rows, records); 
+  set_if_smaller(param->table->opt_range_condition_rows, records);

  DBUG_PRINT("info",
             ("Returning covering ROR-intersect plan: cost %g, records %lu",
@@ -11083,11 +11083,11 @@ void SEL_ARG::test_use_count(SEL_ARG *root)
                        about range scan we've evaluated.
      mrr_flags   INOUT MRR access flags
      cost        OUT   Scan cost
+      is_ror_scan       is set to reflect if the key scan is a ROR (see
+                        is_key_scan_ror function for more info)

  NOTES
-    param->is_ror_scan is set to reflect if the key scan is a ROR (see
-    is_key_scan_ror function for more info)
-    param->table->quick_*, param->range_count (and maybe others) are
+    param->table->opt_range*, param->range_count (and maybe others) are
    updated with data of given key scan, see quick_range_seq_next for details.

  RETURN
@@ -11157,6 +11157,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
  if (param->table->pos_in_table_list->is_non_derived())
    rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
                                            bufsize, mrr_flags, cost);
+  param->quick_rows[keynr]= rows;
  if (rows != HA_POS_ERROR)
  {
    ha_rows table_records= param->table->stat_records();
@@ -11164,30 +11165,30 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
    {
      /*
        For any index the total number of records within all ranges
-        cannot be be bigger than the number of records in the table
+        cannot be be bigger than the number of records in the table.
+        This check is needed as sometimes that table statistics or range
+        estimates may be slightly out of sync.
      */
      rows= table_records;
      set_if_bigger(rows, 1);
+      param->quick_rows[keynr]= rows;
    }
-    param->quick_rows[keynr]= rows;
    param->possible_keys.set_bit(keynr);
    if (update_tbl_stats)
    {
-      param->table->quick_keys.set_bit(keynr);
-      param->table->quick_key_parts[keynr]= param->max_key_parts;
-      param->table->quick_n_ranges[keynr]= param->range_count;
-      param->table->quick_condition_rows=
-        MY_MIN(param->table->quick_condition_rows, rows);
-      param->table->quick_rows[keynr]= rows;
-      param->table->quick_costs[keynr]= cost->total_cost();
+      param->table->opt_range_keys.set_bit(keynr);
+      param->table->opt_range[keynr].key_parts= param->max_key_parts;
+      param->table->opt_range[keynr].ranges= param->range_count;
+      param->table->opt_range_condition_rows=
+        MY_MIN(param->table->opt_range_condition_rows, rows);
+      param->table->opt_range[keynr].rows= rows;
+      param->table->opt_range[keynr].cost= cost->total_cost();
      if (param->table->file->is_clustering_key(keynr))
-	param->table->quick_index_only_costs[keynr]= 0;
+	param->table->opt_range[keynr].index_only_cost= 0;
      else
-        param->table->quick_index_only_costs[keynr]= cost->index_only_cost();
+        param->table->opt_range[keynr].index_only_cost= cost->index_only_cost();
    }
  }
-  else
-    param->quick_rows[keynr]= HA_POS_ERROR;

  /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */
  enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;

--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -2500,7 +2500,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
          double rows= 1.0;
          while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
            rows= COST_MULT(rows,
-			    join->map2table[tableno]->table->quick_condition_rows);
+			    join->map2table[tableno]->table->opt_range_condition_rows);
          sjm->rows= MY_MIN(sjm->rows, rows);
        }
        memcpy((uchar*) sjm->positions,

--- a/sql/rowid_filter.cc
+++ b/sql/rowid_filter.cc
@@ -110,10 +110,12 @@ Range_rowid_filter_cost_info::set_adjusted_gain_param(double access_cost_factor)
 void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type,
                                        TABLE *tab, uint idx)
 {
+  DBUG_ASSERT(tab->opt_range_keys.is_set(idx));
+
  container_type= cont_type;
  table= tab;
  key_no= idx;
-  est_elements= (ulonglong) (table->quick_rows[key_no]);
+  est_elements= (ulonglong) table->opt_range[key_no].rows;
  b= build_cost(container_type);
  selectivity= est_elements/((double) table->stat_records());
  a= avg_access_and_eval_gain_per_row(container_type);
@@ -134,8 +136,9 @@ double
 Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type)
 {
  double cost= 0;
+  DBUG_ASSERT(table->opt_range_keys.is_set(key_no));

-  cost+= table->quick_index_only_costs[key_no];
+  cost+= table->opt_range[key_no].index_only_cost;

  switch (cont_type) {

@@ -345,7 +348,7 @@ void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd)
  uint key_no;
  key_map usable_range_filter_keys;
  usable_range_filter_keys.clear_all();
-  key_map::Iterator it(quick_keys);
+  key_map::Iterator it(opt_range_keys);

  /*
    From all indexes that can be used for range accesses select only such that
@@ -359,7 +362,7 @@ void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd)
      continue;
    if (file->is_clustering_key(key_no))                              // !2
      continue;
-   if (quick_rows[key_no] >
+   if (opt_range[key_no].rows >
       get_max_range_rowid_filter_elems_for_table(thd, this,
                                                  SORTED_ARRAY_CONTAINER)) // !3
      continue;

--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -492,7 +492,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
  set_statistics_for_table(thd, table);

  table->covering_keys.clear_all();
-  table->quick_keys.clear_all();		// Can't use 'only index'
+  table->opt_range_keys.clear_all();

  select=make_select(table, 0, 0, conds, (SORT_INFO*) 0, 0, &error);
  if (unlikely(error))
@@ -518,7 +518,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
  }

  /* If running in safe sql mode, don't allow updates without keys */
-  if (table->quick_keys.is_clear_all())
+  if (table->opt_range_keys.is_clear_all())
  {
    thd->set_status_no_index_used();
    if (safe_update && !using_limit)

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -443,7 +443,7 @@ int mysql_update(THD *thd,
  
  /* Calculate "table->covering_keys" based on the WHERE */
  table->covering_keys= table->s->keys_in_use;
-  table->quick_keys.clear_all();
+  table->opt_range_keys.clear_all();

  query_plan.select_lex= thd->lex->first_select_lex();
  query_plan.table= table;
@@ -577,7 +577,7 @@ int mysql_update(THD *thd,
  }

  /* If running in safe sql mode, don't allow updates without keys */
-  if (table->quick_keys.is_clear_all())
+  if (table->opt_range_keys.is_clear_all())
  {
    thd->set_status_no_index_used();
    if (safe_update && !using_limit)

--- a/sql/table.cc
+++ b/sql/table.cc
@@ -3976,6 +3976,15 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share,
                                                  sizeof(Field*)))))
    goto err;                                   /* purecov: inspected */

+  /* Allocate storage for range optimizer */
+  if (!multi_alloc_root(&outparam->mem_root,
+                        &outparam->opt_range,
+                        share->keys * sizeof(TABLE::OPT_RANGE),
+                        &outparam->const_key_parts,
+                        share->keys * sizeof(key_part_map),
+                        NullS))
+    goto err;
+
  outparam->field= field_ptr;

  record= (uchar*) outparam->record[0]-1;	/* Fieldstart = 1 */
@@ -5383,9 +5392,9 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
  range_rowid_filter_cost_info_ptr= NULL;
  range_rowid_filter_cost_info= NULL;
  vers_write= s->versioned;
-  quick_condition_rows=0;
+  opt_range_condition_rows=0;
  no_cache= false;
-  initialize_quick_structures();
+  initialize_opt_range_structures();
 #ifdef HAVE_REPLICATION
  /* used in RBR Triggers */
  master_had_triggers= 0;
@@ -7773,12 +7782,28 @@ void TABLE::restore_blob_values(String *blob_storage)

 bool TABLE::alloc_keys(uint key_count)
 {
-  key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*(s->keys+key_count));
+  KEY *new_key_info;
+  key_part_map *new_const_key_parts;
+  DBUG_ASSERT(s->tmp_table == INTERNAL_TMP_TABLE);
+
+  if (!multi_alloc_root(&mem_root,
+                        &new_key_info, sizeof(*key_info)*(s->keys+key_count),
+                        &new_const_key_parts,
+                        sizeof(*new_const_key_parts)*(s->keys+key_count),
+                        NullS))
+    return TRUE;
  if (s->keys)
-    memmove(key_info, s->key_info, sizeof(KEY)*s->keys);
-  s->key_info= key_info;
+  {
+    memmove(new_key_info, s->key_info, sizeof(*key_info) * s->keys);
+    memmove(new_const_key_parts, const_key_parts,
+            s->keys * sizeof(const_key_parts));
+  }
+  s->key_info= key_info= new_key_info;
+  const_key_parts= new_const_key_parts;
+  bzero((char*) (const_key_parts + s->keys),
+        sizeof(*const_key_parts) * key_count);
  max_keys= s->keys+key_count;
-  return !(key_info);
+  return FALSE;
 }


@@ -9898,20 +9923,18 @@ bool TABLE::export_structure(THD *thd, Row_definition_list *defs)

 /*
  @brief
-    Initialize all the quick structures that are used to stored the
+    Initialize all the opt_range structures that are used to stored the
    estimates when the range optimizer is run.
-  @details
-    This is specifically needed when we read the TABLE structure from the
-    table cache. There can be some garbage data from previous queries
-    that need to be reset here.
+    As these are initialized by the range optimizer for all index
+    marked in opt_range_keys, we only mark the memory as undefined
+    to be able to find wrong usage of data with valgrind or MSAN.
 */

-void TABLE::initialize_quick_structures()
+void TABLE::initialize_opt_range_structures()
 {
-  bzero(quick_rows, sizeof(quick_rows));
-  bzero(quick_key_parts, sizeof(quick_key_parts));
-  bzero(quick_costs, sizeof(quick_costs));
-  bzero(quick_n_ranges, sizeof(quick_n_ranges));
+  TRASH_ALLOC(&opt_range_keys, sizeof(opt_range_keys));
+  TRASH_ALLOC(opt_range, s->keys * sizeof(*opt_range));
+  TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts));
 }

 /*

--- a/sql/table.h
+++ b/sql/table.h
@@ -1257,8 +1257,7 @@ struct TABLE
    Map of keys that can be used to retrieve all data from this table 
    needed by the query without reading the row.
  */
-  key_map covering_keys;
-  key_map quick_keys, intersect_keys;
+  key_map covering_keys, intersect_keys;
  /*
    A set of keys that can be used in the query that references this
    table.
@@ -1340,28 +1339,29 @@ struct TABLE
  /* The estimate of the number of records in the table used by optimizer */ 
  ha_rows used_stat_records;

+  key_map opt_range_keys;
  /* 
-    For each key that has quick_keys.is_set(key) == TRUE: estimate of #records
-    and max #key parts that range access would use.
+    The following structure is filled for each key that has
+    opt_range_keys.is_set(key) == TRUE
  */
-  ha_rows	quick_rows[MAX_KEY];
-  uint          quick_key_parts[MAX_KEY];
-
-  double 	quick_costs[MAX_KEY];
-  /*
-    If there is a range access by i-th index then the cost of
-    index only access for it is stored in quick_index_only_costs[i]
-  */
-  double 	quick_index_only_costs[MAX_KEY];
-
+  struct OPT_RANGE
+  {
+    uint        key_parts;
+    uint        ranges;
+    ha_rows     rows;
+    double      cost;
+    /*
+      If there is a range access by i-th index then the cost of
+      index only access for it is stored in index_only_costs[i]
+    */
+    double      index_only_cost;
+  } *opt_range;
  /* 
-    Bitmaps of key parts that =const for the duration of join execution. If
-    we're in a subquery, then the constant may be different across subquery
-    re-executions.
+     Bitmaps of key parts that =const for the duration of join execution. If
+     we're in a subquery, then the constant may be different across subquery
+     re-executions.
  */
-  key_part_map  const_key_parts[MAX_KEY];
-
-  uint    quick_n_ranges[MAX_KEY];
+  key_part_map *const_key_parts;

  /* 
    Estimate of number of records that satisfy SARGable part of the table
@@ -1371,7 +1371,7 @@ struct TABLE
    that will pass the table condition (condition that depends on fields of 
    this table and constants)
  */
-  ha_rows       quick_condition_rows;
+  ha_rows       opt_range_condition_rows;

  double cond_selectivity;
  List<st_cond_statistic> *cond_selectivity_sampling_explain;
@@ -1637,7 +1637,7 @@ struct TABLE
  bool is_filled_at_execution();

  bool update_const_key_parts(COND *conds);
-  void initialize_quick_structures();
+  void initialize_opt_range_structures();

  my_ptrdiff_t default_values_offset() const
  { return (my_ptrdiff_t) (s->default_values - record[0]); }