Commit 7f9a5047 authored by Jorgen Loland's avatar Jorgen Loland

Bug#48177 - SELECTs with NOT IN subqueries containing NULL

            values return too many records

WHERE clauses with "outer_value_list NOT IN subselect" were
handled incorrectly if the outer value list contained multiple 
items where at least one of these could be NULL. The first 
outer record with NULL value was handled correctly, but if a 
second record with NULL value existed, the optimizer would 
choose to reuse the result it got on the last execution of the 
subselect. This is incorrect if the outer value list has 
multiple items.
     
The fix is to make Item_in_optimizer::val_int (in 
item_cmpfunc.cc) reuse the result of the latest execution
for NULL values only if all values in the outer_value_list 
are NULL.
parent c3345f3e
...@@ -895,3 +895,72 @@ t1.a < (select t4.a+10 ...@@ -895,3 +895,72 @@ t1.a < (select t4.a+10
from t4, t5 limit 2)); from t4, t5 limit 2));
ERROR 21000: Subquery returns more than 1 row ERROR 21000: Subquery returns more than 1 row
drop table t0, t1, t2, t3, t4, t5; drop table t0, t1, t2, t3, t4, t5;
#
# BUG#48177 - SELECTs with NOT IN subqueries containing NULL
# values return too many records
#
CREATE TABLE t1 (
i1 int DEFAULT NULL,
i2 int DEFAULT NULL
) ;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (2, 3);
INSERT INTO t1 VALUES (4, NULL);
INSERT INTO t1 VALUES (4, 0);
INSERT INTO t1 VALUES (NULL, NULL);
CREATE TABLE t2 (
i1 int DEFAULT NULL,
i2 int DEFAULT NULL
) ;
INSERT INTO t2 VALUES (4, NULL);
INSERT INTO t2 VALUES (5, 0);
Data in t1
SELECT i1, i2 FROM t1;
i1 i2
1 NULL
2 3
4 NULL
4 0
NULL NULL
Data in subquery (should be filtered out)
SELECT i1, i2 FROM t2 ORDER BY i1;
i1 i2
4 NULL
5 0
FLUSH STATUS;
SELECT i1, i2
FROM t1
WHERE (i1, i2)
NOT IN (SELECT i1, i2 FROM t2);
i1 i2
1 NULL
2 3
# Check that the subquery only has to be evaluated once
# for all-NULL values even though there are two (NULL,NULL) records
# Baseline:
SHOW STATUS LIKE '%Handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 17
INSERT INTO t1 VALUES (NULL, NULL);
FLUSH STATUS;
SELECT i1, i2
FROM t1
WHERE (i1, i2)
NOT IN (SELECT i1, i2 FROM t2);
i1 i2
1 NULL
2 3
# Handler_read_rnd_next should be one more than baseline
# (read record from t1, but do not read from t2)
SHOW STATUS LIKE '%Handler_read_rnd_next';
Variable_name Value
Handler_read_rnd_next 18
DROP TABLE t1,t2;
End of 5.1 tests
...@@ -728,3 +728,69 @@ where ...@@ -728,3 +728,69 @@ where
from t4, t5 limit 2)); from t4, t5 limit 2));
drop table t0, t1, t2, t3, t4, t5; drop table t0, t1, t2, t3, t4, t5;
--echo #
--echo # BUG#48177 - SELECTs with NOT IN subqueries containing NULL
--echo # values return too many records
--echo #
CREATE TABLE t1 (
i1 int DEFAULT NULL,
i2 int DEFAULT NULL
) ;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (2, 3);
INSERT INTO t1 VALUES (4, NULL);
INSERT INTO t1 VALUES (4, 0);
INSERT INTO t1 VALUES (NULL, NULL);
CREATE TABLE t2 (
i1 int DEFAULT NULL,
i2 int DEFAULT NULL
) ;
INSERT INTO t2 VALUES (4, NULL);
INSERT INTO t2 VALUES (5, 0);
--echo
--echo Data in t1
SELECT i1, i2 FROM t1;
--echo
--echo Data in subquery (should be filtered out)
SELECT i1, i2 FROM t2 ORDER BY i1;
FLUSH STATUS;
--echo
SELECT i1, i2
FROM t1
WHERE (i1, i2)
NOT IN (SELECT i1, i2 FROM t2);
--echo
--echo # Check that the subquery only has to be evaluated once
--echo # for all-NULL values even though there are two (NULL,NULL) records
--echo # Baseline:
SHOW STATUS LIKE '%Handler_read_rnd_next';
--echo
INSERT INTO t1 VALUES (NULL, NULL);
FLUSH STATUS;
--echo
SELECT i1, i2
FROM t1
WHERE (i1, i2)
NOT IN (SELECT i1, i2 FROM t2);
--echo
--echo # Handler_read_rnd_next should be one more than baseline
--echo # (read record from t1, but do not read from t2)
SHOW STATUS LIKE '%Handler_read_rnd_next';
DROP TABLE t1,t2;
--echo End of 5.1 tests
...@@ -1585,61 +1585,73 @@ longlong Item_in_optimizer::val_int() ...@@ -1585,61 +1585,73 @@ longlong Item_in_optimizer::val_int()
if (cache->null_value) if (cache->null_value)
{ {
/*
We're evaluating
"<outer_value_list> [NOT] IN (SELECT <inner_value_list>...)"
where one or more of the outer values is NULL.
*/
if (((Item_in_subselect*)args[1])->is_top_level_item()) if (((Item_in_subselect*)args[1])->is_top_level_item())
{ {
/* /*
We're evaluating "NULL IN (SELECT ...)". The result can be NULL or We're evaluating a top level item, e.g.
FALSE, and we can return one instead of another. Just return NULL. "<outer_value_list> IN (SELECT <inner_value_list>...)",
and in this case a NULL value in the outer_value_list means
that the result shall be NULL/FALSE (makes no difference for
top level items). The cached value is NULL, so just return
NULL.
*/ */
null_value= 1; null_value= 1;
} }
else else
{ {
if (!((Item_in_subselect*)args[1])->is_correlated && /*
result_for_null_param != UNKNOWN) We're evaluating an item where a NULL value in either the
outer or inner value list does not automatically mean that we
can return NULL/FALSE. An example of such a query is
"<outer_value_list> NOT IN (SELECT <inner_value_list>...)"
The result when there is at least one NULL value is: NULL if the
SELECT evaluated over the non-NULL values produces at least
one row, FALSE otherwise
*/
Item_in_subselect *item_subs=(Item_in_subselect*)args[1];
bool all_left_cols_null= true;
const uint ncols= cache->cols();
/*
Turn off the predicates that are based on column compares for
which the left part is currently NULL
*/
for (uint i= 0; i < ncols; i++)
{ {
/* Use cached value from previous execution */ if (cache->element_index(i)->null_value)
null_value= result_for_null_param; item_subs->set_cond_guard_var(i, FALSE);
else
all_left_cols_null= false;
} }
else
if (!((Item_in_subselect*)args[1])->is_correlated &&
all_left_cols_null && result_for_null_param != UNKNOWN)
{ {
/* /*
We're evaluating "NULL IN (SELECT ...)". The result is: This is a non-correlated subquery, all values in the outer
FALSE if SELECT produces an empty set, or value list are NULL, and we have already evaluated the
NULL otherwise. subquery for all NULL values: Return the same result we
We disable the predicates we've pushed down into subselect, run the did last time without evaluating the subquery.
subselect and see if it has produced any rows.
*/ */
Item_in_subselect *item_subs=(Item_in_subselect*)args[1]; null_value= result_for_null_param;
if (cache->cols() == 1) }
{ else
item_subs->set_cond_guard_var(0, FALSE); {
(void) args[1]->val_bool_result(); /* The subquery has to be evaluated */
result_for_null_param= null_value= !item_subs->engine->no_rows(); (void) args[1]->val_bool_result();
item_subs->set_cond_guard_var(0, TRUE); null_value= !item_subs->engine->no_rows();
} if (all_left_cols_null)
else result_for_null_param= null_value;
{
uint i;
uint ncols= cache->cols();
/*
Turn off the predicates that are based on column compares for
which the left part is currently NULL
*/
for (i= 0; i < ncols; i++)
{
if (cache->element_index(i)->null_value)
item_subs->set_cond_guard_var(i, FALSE);
}
(void) args[1]->val_bool_result();
result_for_null_param= null_value= !item_subs->engine->no_rows();
/* Turn all predicates back on */
for (i= 0; i < ncols; i++)
item_subs->set_cond_guard_var(i, TRUE);
}
} }
/* Turn all predicates back on */
for (uint i= 0; i < ncols; i++)
item_subs->set_cond_guard_var(i, TRUE);
} }
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment