better fix for MySQL bugs

BUG#26447 prefer a clustered key for an index scan, as secondary index is always slower ... which was fixed to cause BUG#35850 queries take 50% longer ... and was reverted and BUG#39653 prefer a secondary index for an index scan, as clustered key is always slower ... which was fixed to cause BUG#55656 mysqldump takes six days instead of half an hour ... and was amended with a special case workaround sql/opt_range.cc: move get_index_only_read_time() into the handler class sql/sql_select.cc: use cost not an index length when choosing a cheaper index

better fix for MySQL bugs
BUG#26447 prefer a clustered key for an index scan, as secondary index is always slower ... which was fixed to cause BUG#35850 queries take 50% longer ... and was reverted and BUG#39653 prefer a secondary index for an index scan, as clustered key is always slower ... which was fixed to cause BUG#55656 mysqldump takes six days instead of half an hour ... and was amended with a special case workaround sql/opt_range.cc: move get_index_only_read_time() into the handler class sql/sql_select.cc: use cost not an index length when choosing a cheaper index
60c15066 · Sergei Golubchik · b56086cb · 60c15066 · 60c15066 · 60c15066
Commit 60c15066 authored Oct 20, 2010 by Sergei Golubchik
Hide whitespace changes
Inline Side-by-side

Showing with 34 additions and 49 deletions

sql/handler.cc sql/handler.cc +15 -1

sql/handler.h sql/handler.h +10 -0

sql/opt_range.cc sql/opt_range.cc +5 -45

sql/sql_select.cc sql/sql_select.cc +4 -3

No files found.
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -2047,7 +2047,21 @@ handler *handler::clone(MEM_ROOT *mem_root)
  return new_handler;
 }

-
+double handler::keyread_time(uint index, uint ranges, ha_rows rows)
+{
+  /*
+    It is assumed that we will read trough the whole key range and that all
+    key blocks are half full (normally things are much better). It is also
+    assumed that each time we read the next key from the index, the handler
+    performs a random seek, thus the cost is proportional to the number of
+    blocks read. This model does not take into account clustered indexes -
+    engines that support that (e.g. InnoDB) may want to overwrite this method.
+  */
+  double keys_per_block= (stats.block_size/2.0/
+                          (table->key_info[index].key_length +
+                           ref_length) + 1);
+  return (rows + keys_per_block - 1)/ keys_per_block;
+}

 void handler::ha_statistic_increment(ulong SSV::*offset) const
 {

--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1304,6 +1304,16 @@ class handler :public Sql_alloc
  { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
  virtual double read_time(uint index, uint ranges, ha_rows rows)
  { return rows2double(ranges+rows); }
+
+  /**
+    Calculate cost of 'keyread' scan for given index and number of records.
+
+     @param index    index to read
+     @param ranges   #of ranges to read
+     @param rows     #of records to read
+  */
+  virtual double keyread_time(uint index, uint ranges, ha_rows rows);
+
  virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
  bool has_transactions()
  { return (ha_table_flags() & HA_NO_TRANSACTIONS) == 0; }

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -709,8 +709,6 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
                                         double read_time);
 static
 TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree);
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr);

 #ifndef DBUG_OFF
 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
@@ -2315,9 +2313,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
    if (!head->covering_keys.is_clear_all())
    {
      int key_for_use= find_shortest_key(head, &head->covering_keys);
-      double key_read_time= (get_index_only_read_time(&param, records,
-                                                     key_for_use) +
-                             (double) records / TIME_FOR_COMPARE);
+      double key_read_time= head->file->keyread_time(key_for_use, 1, records) +
+                            (double) records / TIME_FOR_COMPARE;
      DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                           "read time %g", key_for_use, key_read_time));
      if (key_read_time < read_time)
@@ -3938,43 +3935,6 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
  DBUG_RETURN(imerge_trp);
 }

-
-/*
-  Calculate cost of 'index only' scan for given index and number of records.
-
-  SYNOPSIS
-    get_index_only_read_time()
-      param    parameters structure
-      records  #of records to read
-      keynr    key to read
-
-  NOTES
-    It is assumed that we will read trough the whole key range and that all
-    key blocks are half full (normally things are much better). It is also
-    assumed that each time we read the next key from the index, the handler
-    performs a random seek, thus the cost is proportional to the number of
-    blocks read.
-
-  TODO:
-    Move this to handler->read_time() by adding a flag 'index-only-read' to
-    this call. The reason for doing this is that the current function doesn't
-    handle the case when the row is stored in the b-tree (like in innodb
-    clustered index)
-*/
-
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr)
-{
-  double read_time;
-  uint keys_per_block= (param->table->file->stats.block_size/2/
-			(param->table->key_info[keynr].key_length+
-			 param->table->file->ref_length) + 1);
-  read_time=((double) (records+keys_per_block-1)/
-             (double) keys_per_block);
-  return read_time;
-}
-
-
 typedef struct st_ror_scan_info
 {
  uint      idx;      /* # of used key in param->keys */
@@ -4051,8 +4011,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
      bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr-1);
  }
  ror_scan->index_read_cost=
-    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
-                             ror_scan->keynr);
+    param->table->file->keyread_time(ror_scan->keynr, 1,
+                                     param->table->quick_rows[ror_scan->keynr]);
  DBUG_RETURN(ror_scan);
 }

@@ -4887,7 +4847,7 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
          We can resolve this by only reading through this key. 
          0.01 is added to avoid races between range and 'index' scan.
        */
-        found_read_time= get_index_only_read_time(param,found_records,keynr) +
+        found_read_time= param->table->file->keyread_time(keynr, 1, found_records) +
                         cpu_cost + 0.01;
      }
      else

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -13367,7 +13367,7 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,

 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
 {
-  uint min_length= (uint) ~0;
+  double min_cost= DBL_MAX;
  uint best= MAX_KEY;
  if (!usable_keys->is_clear_all())
  {
@@ -13375,9 +13375,10 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys)
    {
      if (usable_keys->is_set(nr))
      {
-        if (table->key_info[nr].key_length < min_length)
+        double cost= table->file->keyread_time(nr, 1, table->file->records());
+        if (cost < min_cost)
        {
-          min_length=table->key_info[nr].key_length;
+          min_cost= cost;
          best=nr;
        }
      }