Added 'records_out' and join_type to POSITION

records_out is the numbers of rows expected to be accepted from a table. records_read is in contrast the number of rows that the optimizer excepts to read from the engine. This patch causes not plan changes. The differences in test results comes from renaming "records" to "records_read" and printing of record_out in the optimizer trace. Other things: - Renamed table_cond_selectivity() to table_after_join_selectivity() to make the purpose of the function more clear.

Added 'records_out' and join_type to POSITION
records_out is the numbers of rows expected to be accepted from a table. records_read is in contrast the number of rows that the optimizer excepts to read from the engine. This patch causes not plan changes. The differences in test results comes from renaming "records" to "records_read" and printing of record_out in the optimizer trace. Other things: - Renamed table_cond_selectivity() to table_after_join_selectivity() to make the purpose of the function more clear.
2387ee9b · Monty · Sergei Petrunia · 9db877c9 · 2387ee9b · 2387ee9b
Commit 2387ee9b authored Apr 11, 2022 by Monty Committed by Sergei Petrunia Feb 02, 2023
13 changed files
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -928,3 +928,37 @@ set optimizer_trace='enabled=off';

 --echo # End of 10.6 tests

+
+--echo #
+--echo # Testing of records_out
+--echo #
+
+set @save_optimizer_switch= @@optimizer_switch;
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_histogram_size= @@histogram_size;
+set @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
+set optimizer_switch='rowid_filter=on';
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=4;
+set histogram_size=127;
+create table t1 (a int, b int, c int, key(a),key(b));
+insert into t1 select seq, seq*2, seq/10 from seq_1_to_1000;
+analyze table t1;
+--optimizer_trace
+explain select * from t1 where a<10 and b between 10 and 50 and c < 10;
+drop table t1;
+
+create table three (a int);
+insert into three values (1),(2),(3);
+create table t1 (a int, b int, c int, key(a),key(b));
+insert into t1 select mod(seq,10), seq, seq from seq_1_to_10000;
+analyze table t1;
+
+--optimizer_trace
+explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and t1.c<1000;
+drop table three, t1;
+
+set  @@optimizer_switch= @save_optimizer_switch;
+set  @@use_stat_tables= @save_use_stat_tables;
+set  @@histogram_size= @save_histogram_size;
+set  @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
--- a/mysql-test/main/opt_trace_index_merge.result
+++ b/mysql-test/main/opt_trace_index_merge.result
@@ -226,7 +226,8 @@ explain select * from t1 where a=1 or b=1	{
                      ],
                      "chosen_access_method": {
                        "type": "index_merge",
-                        "records": 2,
+                        "records_read": 2,
+                        "records_out": 2,
                        "cost": 2.601171589,
                        "uses_join_buffering": false
                      }

--- a/mysql-test/main/opt_trace_index_merge_innodb.result
+++ b/mysql-test/main/opt_trace_index_merge_innodb.result
@@ -226,7 +226,8 @@ explain select * from t1 where pk1 != 0  and key1 = 1	{
                      ],
                      "chosen_access_method": {
                        "type": "ref",
-                        "records": 1,
+                        "records_read": 1,
+                        "records_out": 1,
                        "cost": 1.250146475,
                        "uses_join_buffering": false
                      }

--- a/mysql-test/main/opt_trace_security.result
+++ b/mysql-test/main/opt_trace_security.result
@@ -110,7 +110,8 @@ select * from db1.t1	{
                      ],
                      "chosen_access_method": {
                        "type": "scan",
-                        "records": 3,
+                        "records_read": 3,
+                        "records_out": 3,
                        "cost": 1.752563477,
                        "uses_join_buffering": false
                      }
@@ -248,7 +249,8 @@ select * from db1.v1	{
                      ],
                      "chosen_access_method": {
                        "type": "scan",
-                        "records": 3,
+                        "records_read": 3,
+                        "records_out": 3,
                        "cost": 1.752563477,
                        "uses_join_buffering": false
                      }

--- a/mysql-test/main/opt_trace_selectivity.result
+++ b/mysql-test/main/opt_trace_selectivity.result
@@ -86,7 +86,8 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
                        "chosen_access_method": 
                        {
                            "type": "index_merge",
-                            "records": 7,
+                            "records_read": 7,
+                            "records_out": 7,
                            "cost": 13.79559815,
                            "uses_join_buffering": false
                        }
@@ -177,7 +178,8 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans'))
                        "chosen_access_method": 
                        {
                            "type": "ref",
-                            "records": 6,
+                            "records_read": 6,
+                            "records_out": 0.6,
                            "cost": 5.002343464,
                            "uses_join_buffering": false
                        }

--- a/sql/opt_trace.cc
+++ b/sql/opt_trace.cc
@@ -689,15 +689,15 @@ void print_final_join_order(JOIN *join)
 }


-void print_best_access_for_table(THD *thd, POSITION *pos,
-                                 enum join_type type)
+void print_best_access_for_table(THD *thd, POSITION *pos)
 {
  DBUG_ASSERT(thd->trace_started());

  Json_writer_object obj(thd, "chosen_access_method");
  obj.
-    add("type", type == JT_ALL ? "scan" : join_type_str[type]).
-    add("records", pos->records_read).
+    add("type", pos->type == JT_ALL ? "scan" : join_type_str[pos->type]).
+    add("records_read", pos->records_read).
+    add("records_out", pos->records_out).
    add("cost", pos->read_time).
    add("uses_join_buffering", pos->use_join_buffer);
  if (pos->range_rowid_filter_info)

--- a/sql/opt_trace.h
+++ b/sql/opt_trace.h
@@ -109,8 +109,7 @@ void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex,
 void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab);
 void trace_plan_prefix(JOIN *join, uint idx, table_map join_tables);
 void print_final_join_order(JOIN *join);
-void print_best_access_for_table(THD *thd, POSITION *pos,
-                                 enum join_type type);
+void print_best_access_for_table(THD *thd, POSITION *pos);

 void trace_condition(THD * thd, const char *name, const char *transform_type,
                    Item *item, const char *table_name= nullptr);

--- a/sql/rowid_filter.cc
+++ b/sql/rowid_filter.cc
@@ -460,10 +460,16 @@ void Range_rowid_filter_cost_info::trace_info(THD *thd)
  @brief
    Choose the best range filter for the given access of the table

-  @param access_key_no    The index by which the table is accessed
-  @param records   The estimated total number of key tuples with this access
-  @param access_cost_factor the cost of a random seek to access the table
-
+  @param access_key_no      The index by which the table is accessed
+  @param records            The estimated total number of key tuples with
+                            this access
+  @param fetch_cost_factor  The cost of fetching 'records' rows
+  @param index_only_cost    The cost of fetching 'records' rows with
+                            index only reads
+  @param prev_records       How many row combinations we have in
+                            preceding tables
+  @parma records_out        Will be updated to the minimum result rows for any
+                            usable filter.
  @details
    The function looks through the array of cost info for range filters
    and chooses the element for the range filter that promise the greatest
@@ -478,7 +484,8 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
                                                double records,
                                                double fetch_cost,
                                                double index_only_cost,
-                                                double prev_records)
+                                                double prev_records,
+                                                double *records_out)
 {
  if (range_rowid_filter_cost_info_elems == 0 ||
      covering_keys.is_set(access_key_no))
@@ -521,13 +528,14 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
      continue;

    new_records= records * filter->selectivity;
+    set_if_smaller(*records_out, new_records);
    cost_of_accepted_rows= fetch_cost * filter->selectivity;
    cost_of_rejected_rows= index_only_cost * (1 - filter->selectivity);
    new_cost= (cost_of_accepted_rows + cost_of_rejected_rows +
               records * filter->lookup_cost());
    new_total_cost= ((new_cost + new_records *
-                      in_use->variables.optimizer_where_cost) * prev_records +
-                     filter->get_setup_cost());
+                      in_use->variables.optimizer_where_cost) *
+                     prev_records + filter->get_setup_cost());

    if (best_filter_gain > new_total_cost)
    {

--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -491,7 +491,8 @@ class Range_rowid_filter_cost_info final: public Sql_alloc
                                                  double records,
                                                  double fetch_cost,
                                                  double index_only_cost,
-                                                  double prev_records);
+                                                  double prev_records,
+                                                  double *records_out);
  Range_rowid_filter_cost_info *
    apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
                 double *startup_cost, double fetch_cost,

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -343,7 +343,10 @@ typedef struct st_join_table {
  
  /* Copy of POSITION::records_read, set by get_best_combination() */
  double        records_read;
-  
+
+  /* Copy of POSITION::records_out, set by get_best_combination() */
+  double        records_out;
+
  /* The selectivity of the conditions that can be pushed to the table */ 
  double        cond_selectivity;  
  
@@ -938,12 +941,23 @@ class POSITION
  /* The table that's put into join order */
  JOIN_TAB *table;

+  /*
+    The number of rows that will be read from the table
+  */
+  double records_read;
+
  /*
    The "fanout": number of output rows that will be produced (after
    pushed down selection condition is applied) per each row combination of
    previous tables.
+
+    This takes into account table->cond_selectivity, the WHERE clause
+    related to this table calculated in
+    calculate_cond_selectivity_for_table(), and the used rowid filter but
+    does not take into account the WHERE clause involving preceding tables
+    calculated in table_after_join_selectivity().
  */
-  double records_read;
+  double records_out;

  /* The selectivity of the pushed down conditions */
  double cond_selectivity;
@@ -1007,6 +1021,7 @@ class POSITION

  /* Type of join (EQ_REF, REF etc) */
  enum join_type type;
+
  /*
    Valid only after fix_semijoin_strategies_for_picked_join_order() call:
    if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that

--- a/sql/table.h
+++ b/sql/table.h
@@ -1810,7 +1810,8 @@ struct TABLE
                                           double records,
                                           double fetch_cost,
                                           double index_only_cost,
-                                           double prev_records);
+                                           double prev_records,
+                                           double *records_out);
  /**
    System Versioning support
   */