Commit 37dee22d authored by Varun Gupta's avatar Varun Gupta

MDEV-15454: Nested SELECT IN returns wrong results

In this case we are setting the field Item_func_eq::in_eqaulity_no for the semi-join equalities.
This helps us to remove these equalites as the inner tables are not available during parent select execution
while the outer tables are not available during materialization phase.
We only have it set for the equalites for the fields involved with the IN subquery
and reset it for the equalities which do not belong to the IN subquery.

For example in case of nested IN subqueries:

    SELECT t1.a FROM t1 WHERE t1.a IN
      (SELECT t2.a FROM t2 where t2.b IN
          (select t3.b from t3 where t3.c=27 ))

there are two equalites involving the fields of the IN subquery

1) t2.b = t3.b :  the field Item_func_eq::in_eqaulity_no is set when we merge the grandchild select into the child select
2) t1.a = t2.a :  the field Item_func_eq::in_eqaulity_no is set when we merge the child select into the parent select

But when we perform case 2) we should ensure that we reset the equalities in the child's WHERE clause.
parent f9b43c25
......@@ -2395,6 +2395,47 @@ SELECT 1 FROM t1 where t1.id IN (SELECT t2.i1 FROM t2 WHERE t2.i1 = t2.i2);
1
1
drop table t1,t2;
#
# MDEV-15454: Nested SELECT IN returns wrong results
#
CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY);
CREATE TABLE t2 ( a int, b int );
INSERT INTO t2 VALUES (7878, 96),(3465, 96),(1403, 96),(4189, 96),(8732, 96), (5,96);
CREATE TABLE t3 (c int unsigned NOT NULL, b int unsigned NOT NULL, PRIMARY KEY (c,b));
INSERT INTO t3 (c, b) VALUES (27, 96);
CREATE PROCEDURE prepare_data()
BEGIN
DECLARE i INT DEFAULT 1;
WHILE i < 1000 DO
INSERT INTO t1 (a) VALUES (i);
INSERT INTO t2 (a,b) VALUES (i,56);
INSERT INTO t3 (c,b) VALUES (i,i);
SET i = i + 1;
END WHILE;
END$$
CALL prepare_data();
SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27);
a
7878
3465
1403
4189
8732
5
set @save_optimizer_switch= @@optimizer_switch;
SET optimizer_switch='materialization=off';
SELECT t1.a FROM t1
WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5;
a
5
SET optimizer_switch='materialization=on';
SELECT t1.a FROM t1
WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5;
a
5
drop procedure prepare_data;
set @@optimizer_switch= @save_optimizer_switch;
drop table t1,t2,t3;
# End of 5.5 tests
set @subselect_mat_test_optimizer_switch_value=null;
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
......
......@@ -2435,4 +2435,45 @@ SELECT 1 FROM t1 where t1.id IN (SELECT t2.i1 FROM t2 WHERE t2.i1 = t2.i2);
1
1
drop table t1,t2;
#
# MDEV-15454: Nested SELECT IN returns wrong results
#
CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY);
CREATE TABLE t2 ( a int, b int );
INSERT INTO t2 VALUES (7878, 96),(3465, 96),(1403, 96),(4189, 96),(8732, 96), (5,96);
CREATE TABLE t3 (c int unsigned NOT NULL, b int unsigned NOT NULL, PRIMARY KEY (c,b));
INSERT INTO t3 (c, b) VALUES (27, 96);
CREATE PROCEDURE prepare_data()
BEGIN
DECLARE i INT DEFAULT 1;
WHILE i < 1000 DO
INSERT INTO t1 (a) VALUES (i);
INSERT INTO t2 (a,b) VALUES (i,56);
INSERT INTO t3 (c,b) VALUES (i,i);
SET i = i + 1;
END WHILE;
END$$
CALL prepare_data();
SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27);
a
7878
3465
1403
4189
8732
5
set @save_optimizer_switch= @@optimizer_switch;
SET optimizer_switch='materialization=off';
SELECT t1.a FROM t1
WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5;
a
5
SET optimizer_switch='materialization=on';
SELECT t1.a FROM t1
WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5;
a
5
drop procedure prepare_data;
set @@optimizer_switch= @save_optimizer_switch;
drop table t1,t2,t3;
# End of 5.5 tests
......@@ -2176,4 +2176,49 @@ SELECT 1 FROM t1 where t1.id IN (SELECT t2.i1 FROM t2 WHERE t2.i1 = t2.i2);
SELECT 1 FROM t1 where t1.id IN (SELECT t2.i1 FROM t2 WHERE t2.i1 = t2.i2);
drop table t1,t2;
--echo #
--echo # MDEV-15454: Nested SELECT IN returns wrong results
--echo #
CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY);
CREATE TABLE t2 ( a int, b int );
INSERT INTO t2 VALUES (7878, 96),(3465, 96),(1403, 96),(4189, 96),(8732, 96), (5,96);
CREATE TABLE t3 (c int unsigned NOT NULL, b int unsigned NOT NULL, PRIMARY KEY (c,b));
INSERT INTO t3 (c, b) VALUES (27, 96);
DELIMITER $$;
CREATE PROCEDURE prepare_data()
BEGIN
DECLARE i INT DEFAULT 1;
WHILE i < 1000 DO
INSERT INTO t1 (a) VALUES (i);
INSERT INTO t2 (a,b) VALUES (i,56);
INSERT INTO t3 (c,b) VALUES (i,i);
SET i = i + 1;
END WHILE;
END$$
DELIMITER ;$$
CALL prepare_data();
SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27);
set @save_optimizer_switch= @@optimizer_switch;
SET optimizer_switch='materialization=off';
SELECT t1.a FROM t1
WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5;
SET optimizer_switch='materialization=on';
SELECT t1.a FROM t1
WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5;
drop procedure prepare_data;
set @@optimizer_switch= @save_optimizer_switch;
drop table t1,t2,t3;
--echo # End of 5.5 tests
......@@ -440,6 +440,7 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
static bool replace_where_subcondition(JOIN *, Item **, Item *, Item *, bool);
static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
void *arg);
static void reset_equality_number_for_subq_conds(Item * cond);
static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
static bool convert_subq_to_jtbm(JOIN *parent_join,
Item_in_subselect *subq_pred, bool *remove);
......@@ -1456,6 +1457,67 @@ static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
}
/**
@brief
reset the value of the field in_eqaulity_no for all Item_func_eq
items in the where clause of the subquery.
Look for in_equality_no description in Item_func_eq class
DESCRIPTION
Lets have an example:
SELECT t1.a FROM t1 WHERE t1.a IN
(SELECT t2.a FROM t2 where t2.b IN
(select t3.b from t3 where t3.c=27 ))
So for such a query we have the parent, child and
grandchild select.
So for the equality t2.b = t3.b we set the value for in_equality_no to
0 according to its description. Wewe do the same for t1.a = t2.a.
But when we look at the child select (with the grandchild select merged),
the query would be
SELECT t1.a FROM t1 WHERE t1.a IN
(SELECT t2.a FROM t2 where t2.b = t3.b and t3.c=27)
and then when the child select is merged into the parent select the query
would look like
SELECT t1.a FROM t1, semi-join-nest(t2,t3)
WHERE t1.a =t2.a and t2.b = t3.b and t3.c=27
Still we would have in_equality_no set for t2.b = t3.b
though it does not take part in the semi-join equality for the parent select,
so we should reset its value to UINT_MAX.
@param cond WHERE clause of the subquery
*/
static void reset_equality_number_for_subq_conds(Item * cond)
{
if (!cond)
return;
if (cond->type() == Item::COND_ITEM)
{
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
Item *item;
while ((item=li++))
{
if (item->type() == Item::FUNC_ITEM &&
((Item_func*)item)->functype()== Item_func::EQ_FUNC)
((Item_func_eq*)item)->in_equality_no= UINT_MAX;
}
}
else
{
if (cond->type() == Item::FUNC_ITEM &&
((Item_func*)cond)->functype()== Item_func::EQ_FUNC)
((Item_func_eq*)cond)->in_equality_no= UINT_MAX;
}
return;
}
/*
Convert a subquery predicate into a TABLE_LIST semi-join nest
......@@ -1713,6 +1775,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
*/
sj_nest->sj_in_exprs= subq_pred->left_expr->cols();
sj_nest->nested_join->sj_outer_expr_list.empty();
reset_equality_number_for_subq_conds(sj_nest->sj_on_expr);
if (subq_pred->left_expr->cols() == 1)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment