MDEV-26301: Split optimization refills: Optimizer Trace coverage

Add Optimizer Trace printouts.

MDEV-26301: Split optimization refills: Optimizer Trace coverage
Add Optimizer Trace printouts.
ed3e6f66 · Sergei Petrunia · Oleksandr Byelkin · ce7ffe61 · ed3e6f66 · ed3e6f66
Commit ed3e6f66 authored May 03, 2023 by Sergei Petrunia Committed by Oleksandr Byelkin May 03, 2023
4 changed files
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -449,6 +449,11 @@ select * from v2	{
                    }
                  ]
                },
+                {
+                  "check_split_materialized": {
+                    "not_applicable": "no candidate field can be accessed through ref"
+                  }
+                },
                {
                  "best_join_order": ["t1"]
                },
@@ -772,6 +777,11 @@ explain select * from v1	{
                    }
                  ]
                },
+                {
+                  "check_split_materialized": {
+                    "not_applicable": "group list has no candidates"
+                  }
+                },
                {
                  "best_join_order": ["t1"]
                },
@@ -8861,5 +8871,109 @@ SET optimizer_trace=DEFAULT;
 DROP VIEW v;
 DROP TABLE t;
 #
+# MDEV-26301: Split optimization improvements: Optimizer Trace coverage
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+test.t3	analyze	status	Engine-independent statistics collected
+test.t3	analyze	status	Table is already up to date
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	Table is already up to date
+test.t11	analyze	status	Engine-independent statistics collected
+test.t11	analyze	status	OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
+from information_schema.optimizer_trace;
+JS
+[
+    {
+        "split_candidates": 
+        ["t10.grp_id"]
+    }
+]
+select 
+json_detailed(
+json_remove(
+json_extract(trace, '$**.choose_best_splitting')
+, '$[0].split_plan_search[0]'
+    )
+) as JS
+from information_schema.optimizer_trace;
+JS
+[
+    {
+        "considered_keys": 
+        [
+            {
+                "table_name": "t10",
+                "index": "grp_id",
+                "rec_per_key": 100,
+                "param_tables": 1
+            }
+        ],
+        "refills": 5,
+        "spl_pd_boundary": 2,
+        "split_plan_search": 
+        [],
+        "lead_table": "t10",
+        "index": "grp_id",
+        "parts": 1,
+        "split_sel": 0.001,
+        "cost": 2536,
+        "records": 100,
+        "refills": 5,
+        "chosen": true
+    }
+]
+drop table t1,t2,t3,t10,t11;
+set optimizer_trace=DEFAULT;
+#
 # End of 10.4 tests
 #
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -696,6 +696,76 @@ SET optimizer_trace=DEFAULT;
 DROP VIEW v;
 DROP TABLE t;
+--echo #
+--echo # MDEV-26301: Split optimization improvements: Optimizer Trace coverage
+--echo #
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+create table t10 (
+  grp_id int,
+  col1 int,
+  key(grp_id)
+);
+# 100 groups of 100 values each
+insert into t10
+select
+  A.seq,
+  B.seq
+from
+  seq_1_to_100 A,
+  seq_1_to_100 B;
+# and X10 multiplier
+create table t11 (
+  col1 int,
+  col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+explain
+select * from
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b;
+select json_detailed(json_extract(trace, '$**.check_split_materialized')) as JS
+from information_schema.optimizer_trace;
+select 
+  json_detailed(
+    json_remove(
+      json_extract(trace, '$**.choose_best_splitting')
+      , '$[0].split_plan_search[0]'
+    )
+  ) as JS
+from information_schema.optimizer_trace;
+drop table t1,t2,t3,t10,t11;
+set optimizer_trace=DEFAULT;
 --echo #
 --echo # End of 10.4 tests
 --echo #
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -228,6 +228,7 @@
 #include "mariadb.h"
 #include "sql_select.h"
+#include "opt_trace.h"
 /* Info on a splitting field */
 struct SplM_field_info
@@ -387,6 +388,9 @@ bool JOIN::check_for_splittable_materialized()
  if (!partition_list)
    return false;
+  Json_writer_object trace_wrapper(thd);
+  Json_writer_object trace_split(thd, "check_split_materialized");
  ORDER *ord;
  Dynamic_array<SplM_field_ext_info> candidates;
@@ -432,7 +436,10 @@ bool JOIN::check_for_splittable_materialized()
    }
  }
  if (candidates.elements() == 0)  // no candidates satisfying (8.1) && (8.2)
+  {
+    trace_split.add("not_applicable", "group list has no candidates");
    return false;
+  }
  /*
    For each table from this join find the keys that can be used for ref access
@@ -491,7 +498,11 @@ bool JOIN::check_for_splittable_materialized()
  }
  if (!spl_field_cnt)  // No candidate field can be accessed by ref => !(9)
+  {
+    trace_split.add("not_applicable",
+                    "no candidate field can be accessed through ref");
    return false;
+  }
  /*
    Create a structure of the type SplM_opt_info and fill it with
@@ -509,10 +520,15 @@ bool JOIN::check_for_splittable_materialized()
  spl_opt_info->tables_usable_for_splitting= 0;
  spl_opt_info->spl_field_cnt= spl_field_cnt;
  spl_opt_info->spl_fields= spl_field;
+  {
+    Json_writer_array trace_range(thd, "split_candidates");
    for (cand= cand_start; cand < cand_end; cand++)
    {
      if (!cand->is_usable_for_ref_access)
        continue;
+      trace_range.add(cand->producing_item);
      spl_field->producing_item= cand->producing_item;
      spl_field->underlying_field= cand->underlying_field;
      spl_field->mat_field= cand->mat_field;
@@ -520,6 +536,7 @@ bool JOIN::check_for_splittable_materialized()
      cand->underlying_field->table->map;
      spl_field++;
    }
+  }
  /* Attach this info to the table T */
  derived->table->set_spl_opt_info(spl_opt_info);
@@ -773,7 +790,7 @@ void JOIN::add_keyuses_for_splitting()
  bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end));
  if (ext_keyuses_for_splitting->push(keyuse_ext_end))
    goto err;
+  // psergey-todo: trace anything here?
  spl_opt_info->unsplit_card= join_record_count;
  rec_len= table->s->rec_buff_length;
@@ -946,7 +963,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
  uint best_key= 0;
  uint best_key_parts= 0;
  table_map best_param_tables;
+  Json_writer_object trace_obj(thd, "choose_best_splitting");
+  Json_writer_array  trace_arr(thd, "considered_keys");
  /*
    Check whether there are keys that can be used to join T employing splitting
    and if so, select the best out of such keys
@@ -999,6 +1017,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
          best_rec_per_key= rec_per_key;
          best_key_keyuse_ext_start= key_keyuse_ext_start;
          best_param_tables= needed_in_prefix;
+           // trace table, key_name, parts, needed_tables.
+          Json_writer_object cur_index(thd);
+          cur_index.
+            add("table_name", best_table->alias.ptr()).
+            add("index", best_table->key_info[best_key].name).
+            add("rec_per_key", best_rec_per_key).
+            add("param_tables", best_param_tables);
        }
        keyuse_ext++;
      }
@@ -1006,6 +1031,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
    }
    while (keyuse_ext->table == table);
  }
+  trace_arr.end();
  spl_opt_info->last_plan= 0;
  double refills= DBL_MAX;
  table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables;
@@ -1031,6 +1058,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
          break;
      }
    }
+    trace_obj.add("refills", refills).
+      add("spl_pd_boundary", *spl_pd_boundary);
    /*
      The key for splitting was chosen, look for the plan for this key
      in the cache
@@ -1042,11 +1073,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
        The plan for the chosen key has not been found in the cache.
        Build a new plan and save info on it in the cache
      */
+      Json_writer_array wrapper(thd, "split_plan_search");
      table_map all_table_map= (((table_map) 1) << join->table_count) - 1;
      reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
                                      best_key, excluded_tables, true);
      choose_plan(join, all_table_map & ~join->const_table_map);
+      wrapper.end();
      /*
        Check that the chosen plan is really a splitting plan.
        If not or if there is not enough memory to save the plan in the cache
@@ -1064,6 +1097,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
      {
        reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
                                        best_key, excluded_tables, false);
+        trace_obj.add("split_plan_discarded", "constructed unapplicable query plan");
        return 0;
      }
@@ -1089,9 +1123,20 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
      reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
                                      best_key, excluded_tables, false);
    }
+    else
+      trace_obj.add("cached_plan_found", 1);
    if (spl_plan)
    {
+      trace_obj.
+        add("lead_table", spl_plan->table->alias.ptr()).
+        add("index",      spl_plan->table->key_info[spl_plan->key].name).
+        add("parts",      spl_plan->parts).
+        add("split_sel",  spl_plan->split_sel).
+        add("cost",       spl_plan->cost).
+        add("records",    (ha_rows) (records * spl_plan->split_sel)).
+        add("refills",    refills);
      if (refills * spl_plan->cost < spl_opt_info->unsplit_cost)
      {
        /*
@@ -1100,7 +1145,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
 	*/
        spl_opt_info->last_plan= spl_plan;
        spl_opt_info->last_refills= refills;
+        trace_obj.add("chosen", true);
      }
+      else
+        trace_obj.add("chosen", false);
    }
  }
@@ -1132,7 +1180,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx,
  @details
    This function injects equalities pushed into a derived table T for which
    the split optimization has been chosen by the optimizer. The function
-    is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op().
+    is called by JOIN::inject_splitting_cond_for_all_tables_with_split_opt().
    All equalities usable for splitting T whose right parts do not depend on
    any of the 'excluded_tables' can be pushed into the where clause of the
    derived table T.

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7444,7 +7444,6 @@ best_access_path(JOIN      *join,
  DBUG_ENTER("best_access_path");
  Json_writer_object trace_wrapper(thd, "best_access_path");
-  Json_writer_array trace_paths(thd, "considered_access_paths");
  bitmap_clear_all(eq_join_set);
@@ -7455,6 +7454,7 @@ best_access_path(JOIN      *join,
                                       remaining_tables,
                                       &spl_pd_boundary);
+  Json_writer_array trace_paths(thd, "considered_access_paths");
  if (s->keyuse)
  {                                            /* Use key if possible */
    KEYUSE *keyuse;