MDEV-6384: It seems like OPTIMIZER take into account the order of indexes in the table

When ORDER BY ... LIMIT check whether it should switch from index IDX1 to index IDX2, it should not ignore the fact that IDX2 may have a potential range or ref(const) access. Istead, it should calculate their costs: there is now a saved range optimizer cost and code to re-calculate what best_access_path() calculated for ref(const). /* in current cost model these two can be very different numbers unfortunately */

MDEV-6384: It seems like OPTIMIZER take into account the order of indexes in the table
When ORDER BY ... LIMIT check whether it should switch from index IDX1 to index IDX2, it should not ignore the fact that IDX2 may have a potential range or ref(const) access. Istead, it should calculate their costs: there is now a saved range optimizer cost and code to re-calculate what best_access_path() calculated for ref(const). /* in current cost model these two can be very different numbers unfortunately */
f1a16833 · Sergei Petrunia · be00e279 · f1a16833 · f1a16833 · f1a16833
Commit f1a16833 authored Aug 27, 2014 by Sergei Petrunia
Hide whitespace changes
Inline Side-by-side

Showing with 119 additions and 2 deletions

mysql-test/r/innodb_ext_key.result mysql-test/r/innodb_ext_key.result +1 -1

sql/opt_range.cc sql/opt_range.cc +1 -0

sql/sql_select.cc sql/sql_select.cc +117 -1

No files found.
--- a/mysql-test/r/innodb_ext_key.result
+++ b/mysql-test/r/innodb_ext_key.result
@@ -1002,7 +1002,7 @@ insert into t2 (b) values (null), (null), (null);
 set optimizer_switch='extended_keys=on';
 explain select a from t1 where b is null order by a desc limit 2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ref	b	b	9	const	2	Using where; Using filesort
+1	SIMPLE	t1	index	b	PRIMARY	8	NULL	3	Using where
 select a from t1 where b is null order by a desc limit 2;
 a
 3

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -10690,6 +10690,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
      param->table->quick_condition_rows=
        MY_MIN(param->table->quick_condition_rows, rows);
      param->table->quick_rows[keynr]= rows;
+      param->table->quick_costs[keynr]= cost->total_cost();
    }
  }
  /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -20224,7 +20224,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
         !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
      goto use_filesort;

-    if (select &&
+    if (select && // psergey:  why doesn't this use a quick?
        table->quick_keys.is_set(best_key) && best_key != ref_key)
    {
      key_map map;
@@ -24693,6 +24693,109 @@ void JOIN::cache_const_exprs()
  }
 }

+ 
+/*
+  Get a cost of reading rows_limit rows through index keynr.
+
+  @detail
+   - If there is a quick select, we try to use it.
+   - if there is a ref(const) access, we try to use it, too.
+   - quick and ref(const) use different cost formulas, so if both are possible
+      we should make a cost-based choice.
+
+  @return 
+    true   There was a possible quick or ref access, its cost is in the OUT
+           parameters.
+    false  No quick or ref(const) possible (and so, the caller will attempt 
+           to use a full index scan on this index).
+*/
+
+static bool get_range_limit_read_cost(const JOIN_TAB *tab, 
+                                      const TABLE *table, 
+                                      uint keynr, 
+                                      ha_rows rows_limit,
+                                      double *read_time)
+{
+  bool res= false;
+  /* 
+    We need to adjust the estimates if we had a quick select (or ref(const)) on
+    index keynr.
+  */
+  if (table->quick_keys.is_set(keynr))
+  {
+    /*
+      Start from quick select's rows and cost. These are always cheaper than
+      full index scan/cost.
+    */
+    double best_rows= table->quick_rows[keynr];
+    double best_cost= table->quick_costs[keynr];
+    
+    /*
+      Check if ref(const) access was possible on this index. 
+    */
+    if (tab)
+    {
+      key_part_map const_parts= 0;
+      key_part_map map= 1;
+      uint kp;
+      /* Find how many key parts would be used by ref(const) */
+      for (kp=0; kp < MAX_REF_PARTS; map=map << 1, kp++)
+      {
+        if (!(table->const_key_parts[keynr] & map))
+          break;
+        const_parts |= map;
+      }
+      
+      if (kp > 0)
+      {
+        ha_rows ref_rows;
+        /*
+          Two possible cases:
+          1. ref(const) uses the same #key parts as range access. 
+          2. ref(const) uses fewer key parts, becasue there is a
+            range_cond(key_part+1).
+        */
+        if (kp == table->quick_key_parts[keynr])
+          ref_rows= table->quick_rows[keynr];
+        else
+          ref_rows= table->key_info[keynr].actual_rec_per_key(kp-1);
+
+        if (ref_rows > 0)
+        {
+          double tmp= ref_rows;
+          /* Reuse the cost formula from best_access_path: */
+          set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key);
+          if (table->covering_keys.is_set(keynr))
+            tmp= table->file->keyread_time(keynr, 1, (ha_rows) tmp);
+          else
+            tmp= table->file->read_time(keynr, 1,
+                                        (ha_rows) MY_MIN(tmp,tab->worst_seeks));
+          if (tmp < best_cost)
+          {
+            best_cost= tmp;
+            best_rows= ref_rows;
+          }
+        }
+      }
+    }
+ 
+    if (best_rows > rows_limit)
+    {
+      /*
+        LIMIT clause specifies that we will need to read fewer records than
+        quick select will return. Assume that quick select's cost is
+        proportional to the number of records we need to return (e.g. if we 
+        only need 1/3rd of records, it will cost us 1/3rd of quick select's
+        read time)
+      */
+      best_cost *= rows_limit / best_rows;
+    }
+    *read_time= best_cost;
+    res= true;
+  }
+  return res;
+}
+

 /**
  Find a cheaper access key than a given @a key
@@ -24786,6 +24889,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
  }
  else
    read_time= table->file->scan_time();
+  
+  /*
+    TODO: add cost of sorting here.
+  */
+  read_time += COST_EPS;

  /*
    Calculate the selectivity of the ref_key for REF_ACCESS. For
@@ -24945,6 +25053,14 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
        */
        index_scan_time= select_limit/rec_per_key *
                         MY_MIN(rec_per_key, table->file->scan_time());
+        double range_scan_time;
+        if (get_range_limit_read_cost(tab, table, nr, select_limit, 
+                                       &range_scan_time))
+        {
+          if (range_scan_time < index_scan_time)
+            index_scan_time= range_scan_time;
+        }
+
        if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
            index_scan_time < read_time)
        {