Commit 480a0671 authored by Igor Babaev's avatar Igor Babaev

MDEV-25128 Wrong result from join with materialized semi-join and

           splittable derived

If one of joined tables of the processed query is a materialized derived
table (or view or CTE) with GROUP BY clause then under some conditions it
can be subject to split optimization. With this optimization new equalities
are injected into the WHERE condition of the SELECT that specifies this
derived table. The injected equalities are generated for all join orders
with which the split optimization can employed. After the best join order
has been chosen only certain of this equalities are really needed. The
others can be safely removed. If it's not done and some of injected
equalities involve expressions over semi-joins with look-up access then
the query may return a wrong result set.
This patch effectively removes equalities injected for split optimization
that are needed only at the optimization stage and not needed for execution.

Approved by serg@mariadb.com
parent 7d5ec9f1
...@@ -17134,4 +17134,215 @@ a ...@@ -17134,4 +17134,215 @@ a
3 3
DROP VIEW v1; DROP VIEW v1;
DROP TABLE t1; DROP TABLE t1;
#
# MDEV-25128: Split optimization for join with materialized semi-join
#
create table t1 (id int, a int, index (a), index (id, a)) engine=myisam;
insert into t1 values
(17,1),(17,3010),(17,3013),(17,3053),(21,2446),(21,2467),(21,2);
create table t2 (a int) engine=myisam;
insert into t2 values (1),(2),(3);
create table t3 (id int) engine=myisam;
insert into t3 values (1),(2);
analyze table t1,t2,t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
test.t2 analyze status OK
test.t3 analyze status OK
set optimizer_switch="split_materialized=off";
select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
id a a id
17 1 1 1
21 2 2 2
explain select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where
1 PRIMARY t1 ref a a 5 test.t3.id 1
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 func 1
1 PRIMARY <derived2> ref key0 key0 5 test.t3.id 2
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 3
2 DERIVED cp2 index NULL a 5 NULL 7 Using index
explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
EXPLAIN
{
"query_block": {
"select_id": 1,
"table": {
"table_name": "t3",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "t3.`id` is not null and t3.`id` is not null"
},
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["a"],
"key": "a",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t3.id"],
"rows": 1,
"filtered": 100
},
"table": {
"table_name": "<subquery3>",
"access_type": "eq_ref",
"possible_keys": ["distinct_key"],
"key": "distinct_key",
"key_length": "4",
"used_key_parts": ["a"],
"ref": ["func"],
"rows": 1,
"filtered": 100,
"materialized": {
"unique": 1,
"query_block": {
"select_id": 3,
"table": {
"table_name": "t2",
"access_type": "ALL",
"rows": 3,
"filtered": 100
}
}
}
},
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t3.id"],
"rows": 2,
"filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
"table": {
"table_name": "cp2",
"access_type": "index",
"key": "a",
"key_length": "5",
"used_key_parts": ["a"],
"rows": 7,
"filtered": 100,
"using_index": true
}
}
}
}
}
}
set optimizer_switch="split_materialized=default";
select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
id a a id
17 1 1 1
21 2 2 2
explain select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where
1 PRIMARY t1 ref a a 5 test.t3.id 1
1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 func 1
1 PRIMARY <derived2> ref key0 key0 5 test.t3.id 2
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 3
2 LATERAL DERIVED cp2 ref a a 5 test.t1.a 1 Using index
explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
EXPLAIN
{
"query_block": {
"select_id": 1,
"table": {
"table_name": "t3",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
"attached_condition": "t3.`id` is not null and t3.`id` is not null"
},
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["a"],
"key": "a",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t3.id"],
"rows": 1,
"filtered": 100
},
"table": {
"table_name": "<subquery3>",
"access_type": "eq_ref",
"possible_keys": ["distinct_key"],
"key": "distinct_key",
"key_length": "4",
"used_key_parts": ["a"],
"ref": ["func"],
"rows": 1,
"filtered": 100,
"materialized": {
"unique": 1,
"query_block": {
"select_id": 3,
"table": {
"table_name": "t2",
"access_type": "ALL",
"rows": 3,
"filtered": 100
}
}
}
},
"table": {
"table_name": "<derived2>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t3.id"],
"rows": 2,
"filtered": 100,
"materialized": {
"lateral": 1,
"query_block": {
"select_id": 2,
"outer_ref_condition": "t1.a is not null",
"table": {
"table_name": "cp2",
"access_type": "ref",
"possible_keys": ["a"],
"key": "a",
"key_length": "5",
"used_key_parts": ["a"],
"ref": ["test.t1.a"],
"rows": 1,
"filtered": 100,
"using_index": true
}
}
}
}
}
}
prepare stmt from "select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2)";
execute stmt;
id a a id
17 1 1 1
21 2 2 2
execute stmt;
id a a id
17 1 1 1
21 2 2 2
deallocate prepare stmt;
drop table t1,t2,t3;
# End of 10.3 tests # End of 10.3 tests
...@@ -3479,4 +3479,41 @@ SELECT * from v1 WHERE a=3; ...@@ -3479,4 +3479,41 @@ SELECT * from v1 WHERE a=3;
DROP VIEW v1; DROP VIEW v1;
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # MDEV-25128: Split optimization for join with materialized semi-join
--echo #
create table t1 (id int, a int, index (a), index (id, a)) engine=myisam;
insert into t1 values
(17,1),(17,3010),(17,3013),(17,3053),(21,2446),(21,2467),(21,2);
create table t2 (a int) engine=myisam;
insert into t2 values (1),(2),(3);
create table t3 (id int) engine=myisam;
insert into t3 values (1),(2);
analyze table t1,t2,t3;
let $q=
select * from t1, (select a from t1 cp2 group by a) dt, t3
where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2);
set optimizer_switch="split_materialized=off";
eval $q;
eval explain $q;
eval explain format=json $q;
set optimizer_switch="split_materialized=default";
eval $q;
eval explain $q;
eval explain format=json $q;
eval prepare stmt from "$q";
execute stmt;
execute stmt;
deallocate prepare stmt;
drop table t1,t2,t3;
--echo # End of 10.3 tests --echo # End of 10.3 tests
...@@ -236,6 +236,8 @@ class SplM_opt_info : public Sql_alloc ...@@ -236,6 +236,8 @@ class SplM_opt_info : public Sql_alloc
SplM_field_info *spl_fields; SplM_field_info *spl_fields;
/* The number of elements in the above list */ /* The number of elements in the above list */
uint spl_field_cnt; uint spl_field_cnt;
/* The list of equalities injected into WHERE for split optimization */
List<Item> inj_cond_list;
/* Contains the structures to generate all KEYUSEs for pushable equalities */ /* Contains the structures to generate all KEYUSEs for pushable equalities */
List<KEY_FIELD> added_key_fields; List<KEY_FIELD> added_key_fields;
/* The cache of evaluated execution plans for 'join' with pushed equalities */ /* The cache of evaluated execution plans for 'join' with pushed equalities */
...@@ -1047,22 +1049,22 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, ...@@ -1047,22 +1049,22 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
bool JOIN::inject_best_splitting_cond(table_map remaining_tables) bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
{ {
Item *inj_cond= 0; Item *inj_cond= 0;
List<Item> inj_cond_list; List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields); List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field; KEY_FIELD *added_key_field;
while ((added_key_field= li++)) while ((added_key_field= li++))
{ {
if (remaining_tables & added_key_field->val->used_tables()) if (remaining_tables & added_key_field->val->used_tables())
continue; continue;
if (inj_cond_list.push_back(added_key_field->cond, thd->mem_root)) if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true; return true;
} }
DBUG_ASSERT(inj_cond_list.elements); DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list.elements) { switch (inj_cond_list->elements) {
case 1: case 1:
inj_cond= inj_cond_list.head(); break; inj_cond= inj_cond_list->head(); break;
default: default:
inj_cond= new (thd->mem_root) Item_cond_and(thd, inj_cond_list); inj_cond= new (thd->mem_root) Item_cond_and(thd, *inj_cond_list);
if (!inj_cond) if (!inj_cond)
return true; return true;
} }
...@@ -1080,6 +1082,40 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables) ...@@ -1080,6 +1082,40 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
} }
/**
@brief
Test if equality is injected for split optimization
@param
eq_item equality to to test
@retval
true eq_item is equality injected for split optimization
false otherwise
*/
bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
{
Item *left_item= eq_item->arguments()[0]->real_item();
if (left_item->type() != Item::FIELD_ITEM)
return false;
Field *field= ((Item_field *) left_item)->field;
if (!field->table->reginfo.join_tab)
return false;
JOIN *join= field->table->reginfo.join_tab->join;
if (!join->spl_opt_info)
return false;
List_iterator_fast<Item> li(join->spl_opt_info->inj_cond_list);
Item *item;
while ((item= li++))
{
if (item == eq_item)
return true;
}
return false;
}
/** /**
@brief @brief
Fix the splitting chosen for a splittable table in the final query plan Fix the splitting chosen for a splittable table in the final query plan
......
...@@ -292,6 +292,8 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab); ...@@ -292,6 +292,8 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab);
static Item **get_sargable_cond(JOIN *join, TABLE *table); static Item **get_sargable_cond(JOIN *join, TABLE *table);
bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item);
#ifndef DBUG_OFF #ifndef DBUG_OFF
/* /*
...@@ -21787,6 +21789,21 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, ...@@ -21787,6 +21789,21 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
cond->marker=3; // Checked when read cond->marker=3; // Checked when read
return (COND*) 0; return (COND*) 0;
} }
/*
If cond is an equality injected for split optimization then
a. when retain_ref_cond == false : cond is removed unconditionally
(cond that supports ref access is removed by the preceding code)
b. when retain_ref_cond == true : cond is removed if it does not
support ref access
*/
if (left_item->type() == Item::FIELD_ITEM &&
is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) &&
(!retain_ref_cond ||
!test_if_ref(root_cond, (Item_field*) left_item,right_item)))
{
cond->marker=3;
return (COND*) 0;
}
} }
cond->marker=2; cond->marker=2;
cond->set_join_tab_idx(join_tab_idx_arg); cond->set_join_tab_idx(join_tab_idx_arg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment