Commit b917fb63 authored by unknown's avatar unknown

Fix bug lp:1009187, mdev-373, mysql bug#58628

Analysis:
The queries in question use the [unique | index]_subquery execution methods.
These methods reuse the ref keys constructed by create_ref_for_key(). The
way create_ref_for_key() works is that it doesn't store in ref.key_copy[]
store_key elements that represent constants. In particular it doesn't store
the store_key for NULL constants.

The execution of [unique | index]_subquery calls
subselect_uniquesubquery_engine::copy_ref_key, which in addition to copy
the left IN argument into a index lookup key, is supposed to detect if
the left IN argument contains NULLs. Since the store_key for the NULL
constant is not copied into the key array, the null is not detected, and
execution erroneously proceeds as if it should look for a complete match.

Solution:
The solution (unlike MySQL) is to reuse already computed information about
NULL presence. Item_in_optimizer::val_int already finds out if the left IN
operand contains NULLs. The fix propagates this to the execution methods
subselect_[unique | index]subquery_engine::exec so it knows if there were
NULL values independent of the presence of keys.

In addition the patch siplifies copy_ref_key() and the logic that hanldes
the case of NULLs in the left IN operand.
parent caea98a4
......@@ -197,5 +197,86 @@ default(a)
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
DROP TABLE t;
#
# LP BUG#1009187, MDEV-373, MYSQL bug#58628
# Wrong result for a query with [NOT] IN subquery predicate if
# the left part of the predicate is explicit NULL
#
CREATE TABLE t1 (pk INT NOT NULL, i INT NOT NULL);
INSERT INTO t1 VALUES (0,10), (1,20), (2,30), (3,40);
CREATE TABLE t2a (pk INT NOT NULL, i INT NOT NULL, PRIMARY KEY(i,pk));
INSERT INTO t2a VALUES (0,0), (1,1), (2,2), (3,3);
CREATE TABLE t2b (pk INT, i INT);
INSERT INTO t2b VALUES (0,0), (1,1), (2,2), (3,3);
CREATE TABLE t2c (pk INT NOT NULL, i INT NOT NULL);
INSERT INTO t2c VALUES (0,0), (1,1), (2,2), (3,3);
create index it2c on t2c (i,pk);
EXPLAIN
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
2 DEPENDENT SUBQUERY t2a unique_subquery PRIMARY PRIMARY 8 const,test.t1.pk 1 Using index; Using where; Full scan on NULL key
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
pk i
SELECT * FROM t1 WHERE 1+NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
pk i
SELECT * FROM t1 WHERE NULL IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) IS UNKNOWN;
pk i
0 10
1 20
2 30
3 40
SELECT t1.pk, NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) FROM t1;
pk NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk)
0 NULL
1 NULL
2 NULL
3 NULL
EXPLAIN
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
2 DEPENDENT SUBQUERY t2b ALL NULL NULL NULL NULL 4 Using where
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
pk i
SELECT * FROM t1 WHERE NULL IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) IS UNKNOWN;
pk i
0 10
1 20
2 30
3 40
SELECT t1.pk, NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) FROM t1;
pk NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk)
0 NULL
1 NULL
2 NULL
3 NULL
EXPLAIN
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
2 DEPENDENT SUBQUERY t2c index_subquery it2c it2c 8 const,test.t1.pk 2 Using index; Using where; Full scan on NULL key
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
pk i
SELECT * FROM t1 WHERE NULL IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) IS UNKNOWN;
pk i
0 10
1 20
2 30
3 40
SELECT t1.pk, NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) FROM t1;
pk NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk)
0 NULL
1 NULL
2 NULL
3 NULL
EXPLAIN
SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
2 DEPENDENT SUBQUERY t2a eq_ref PRIMARY PRIMARY 8 const,test.t1.pk 2 Using where; Using index; Full scan on NULL key
SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
pk i
drop table t1, t2a, t2b, t2c;
#
# End of 5.1 tests.
#
......@@ -170,6 +170,50 @@ INSERT INTO t VALUES (''),(''),(''),(''),(''),(''),(''),(''),(''),(''),('');
SELECT * FROM (SELECT default(a) FROM t GROUP BY a) d;
DROP TABLE t;
--echo #
--echo # LP BUG#1009187, MDEV-373, MYSQL bug#58628
--echo # Wrong result for a query with [NOT] IN subquery predicate if
--echo # the left part of the predicate is explicit NULL
--echo #
CREATE TABLE t1 (pk INT NOT NULL, i INT NOT NULL);
INSERT INTO t1 VALUES (0,10), (1,20), (2,30), (3,40);
CREATE TABLE t2a (pk INT NOT NULL, i INT NOT NULL, PRIMARY KEY(i,pk));
INSERT INTO t2a VALUES (0,0), (1,1), (2,2), (3,3);
CREATE TABLE t2b (pk INT, i INT);
INSERT INTO t2b VALUES (0,0), (1,1), (2,2), (3,3);
CREATE TABLE t2c (pk INT NOT NULL, i INT NOT NULL);
INSERT INTO t2c VALUES (0,0), (1,1), (2,2), (3,3);
create index it2c on t2c (i,pk);
EXPLAIN
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
SELECT * FROM t1 WHERE 1+NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk);
SELECT * FROM t1 WHERE NULL IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) IS UNKNOWN;
SELECT t1.pk, NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) FROM t1;
EXPLAIN
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk);
SELECT * FROM t1 WHERE NULL IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) IS UNKNOWN;
SELECT t1.pk, NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) FROM t1;
EXPLAIN
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk);
SELECT * FROM t1 WHERE NULL IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) IS UNKNOWN;
SELECT t1.pk, NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) FROM t1;
EXPLAIN
SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk);
drop table t1, t2a, t2b, t2c;
--echo #
--echo # End of 5.1 tests.
--echo #
......@@ -1686,6 +1686,12 @@ bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
}
inline bool Item_in_subselect::left_expr_has_null()
{
return (*(optimizer->get_cache()))->null_value;
}
Item_subselect::trans_res
Item_allany_subselect::select_transformer(JOIN *join)
{
......@@ -2131,39 +2137,19 @@ int subselect_uniquesubquery_engine::scan_table()
subselect_uniquesubquery_engine::copy_ref_key()
DESCRIPTION
Copy ref key and check for null parts in it.
Depending on the nullability and conversion problems this function
recognizes and processes the following states :
1. Partial match on top level. This means IN has a value of FALSE
regardless of the data in the subquery table.
Detected by finding a NULL in the left IN operand of a top level
expression.
We may actually skip reading the subquery, so return TRUE to skip
the table scan in subselect_uniquesubquery_engine::exec and make
the value of the IN predicate a NULL (that is equal to FALSE on
top level).
2. No exact match when IN is nested inside another predicate.
Detected by finding a NULL in the left IN operand when IN is not
a top level predicate.
We cannot have an exact match. But we must proceed further with a
table scan to find out if it's a partial match (and IN has a value
of NULL) or no match (and IN has a value of FALSE).
So we return FALSE to continue with the scan and see if there are
any record that would constitute a partial match (as we cannot
determine that from the index).
3. Error converting the left IN operand to the column type of the
right IN operand. This counts as no match (and IN has the value of
FALSE). We mark the subquery table cursor as having no more rows
(to ensure that the processing that follows will not find a match)
and return FALSE, so IN is not treated as returning NULL.
Copy ref key and check for conversion problems.
If there is an error converting the left IN operand to the column type of
the right IN operand count it as no match. In this case IN has the value of
FALSE. We mark the subquery table cursor as having no more rows (to ensure
that the processing that follows will not find a match) and return FALSE,
so IN is not treated as returning NULL.
RETURN
FALSE - The value of the IN predicate is not known. Proceed to find the
value of the IN predicate using the determined values of
null_keypart and table->status.
TRUE - IN predicate has a value of NULL. Stop the processing right there
and return NULL to the outer predicates.
value of the IN predicate using the copied key.
TRUE - IN predicate has a value of FALSE. Stop the processing right there
and tell the outer predicate it is FALSE.
*/
bool subselect_uniquesubquery_engine::copy_ref_key()
......@@ -2176,30 +2162,6 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
continue;
tab->ref.key_err= (*copy)->copy();
/*
When there is a NULL part in the key we don't need to make index
lookup for such key thus we don't need to copy whole key.
If we later should do a sequential scan return OK. Fail otherwise.
See also the comment for the subselect_uniquesubquery_engine::exec()
function.
*/
null_keypart= (*copy)->null_key;
if (null_keypart)
{
bool top_level= ((Item_in_subselect *) item)->is_top_level_item();
if (top_level)
{
/* Partial match on top level */
DBUG_RETURN(1);
}
else
{
/* No exact match when IN is nested inside another predicate */
break;
}
}
/*
Check if the error is equal to STORE_KEY_FATAL. This is not expressed
using the store_key::store_key_result enum because ref.key_err is a
......@@ -2215,10 +2177,10 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
IN operand.
*/
tab->table->status= STATUS_NOT_FOUND;
break;
DBUG_RETURN(true);
}
}
DBUG_RETURN(0);
DBUG_RETURN(false);
}
......@@ -2248,8 +2210,9 @@ bool subselect_uniquesubquery_engine::copy_ref_key()
NOTE
RETURN
FALSE - ok
TRUE - an error occured while scanning
0 - ok
1 - notify caller to call Item_subselect::reset(),
in most cases reset() sets the result to NULL
*/
int subselect_uniquesubquery_engine::exec()
......@@ -2259,22 +2222,26 @@ int subselect_uniquesubquery_engine::exec()
TABLE *table= tab->table;
empty_result_set= TRUE;
table->status= 0;
Item_in_subselect *in_subs= (Item_in_subselect *) item;
/* TODO: change to use of 'full_scan' here? */
if (copy_ref_key())
DBUG_RETURN(1);
if (table->status)
if (in_subs->left_expr_has_null())
{
/*
We know that there will be no rows even if we scan.
Can be set in copy_ref_key.
The case when all values in left_expr are NULL is handled by
Item_in_optimizer::val_int().
*/
((Item_in_subselect *) item)->value= 0;
DBUG_RETURN(0);
if (in_subs->is_top_level_item())
DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */
else
DBUG_RETURN(scan_table());
}
if (null_keypart)
DBUG_RETURN(scan_table());
if (copy_ref_key())
{
/* We know that there will be no rows even if we scan. */
in_subs->value= 0;
DBUG_RETURN(0);
}
if (!table->file->inited)
table->file->ha_index_init(tab->ref.key, 0);
......@@ -2357,9 +2324,9 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine()
cheaper. We can use index statistics to quickly check whether "ref" scan
will be cheaper than full table scan.
RETURN
0
1
0 - ok
1 - notify caller to call Item_subselect::reset(),
in most cases reset() sets the result to NULL
*/
int subselect_indexsubquery_engine::exec()
......@@ -2368,10 +2335,10 @@ int subselect_indexsubquery_engine::exec()
int error;
bool null_finding= 0;
TABLE *table= tab->table;
Item_in_subselect *in_subs= (Item_in_subselect *) item;
((Item_in_subselect *) item)->value= 0;
in_subs->value= 0;
empty_result_set= TRUE;
null_keypart= 0;
table->status= 0;
if (check_null)
......@@ -2381,22 +2348,24 @@ int subselect_indexsubquery_engine::exec()
((Item_in_subselect *) item)->was_null= 0;
}
/* Copy the ref key and check for nulls... */
if (copy_ref_key())
DBUG_RETURN(1);
if (table->status)
if (in_subs->left_expr_has_null())
{
/*
We know that there will be no rows even if we scan.
Can be set in copy_ref_key.
The case when all values in left_expr are NULL is handled by
Item_in_optimizer::val_int().
*/
((Item_in_subselect *) item)->value= 0;
DBUG_RETURN(0);
if (in_subs->is_top_level_item())
DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */
else
DBUG_RETURN(scan_table());
}
if (null_keypart)
DBUG_RETURN(scan_table());
if (copy_ref_key())
{
/* We know that there will be no rows even if we scan. */
in_subs->value= 0;
DBUG_RETURN(0);
}
if (!table->file->inited)
table->file->ha_index_init(tab->ref.key, 1);
......
......@@ -334,6 +334,7 @@ class Item_in_subselect :public Item_exists_subselect
bool test_limit(st_select_lex_unit *unit);
virtual void print(String *str, enum_query_type query_type);
bool fix_fields(THD *thd, Item **ref);
inline bool left_expr_has_null();
friend class Item_ref_null_helper;
friend class Item_is_not_null_test;
......@@ -513,7 +514,6 @@ class subselect_uniquesubquery_engine: public subselect_engine
expression is NULL.
*/
bool empty_result_set;
bool null_keypart; /* TRUE <=> constructed search tuple has a NULL */
public:
// constructor can assign THD because it will be called after JOIN::prepare
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment