Commit 94768542 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-9369 IN operator with ( num, NULL ) gives inconsistent result

Based on this commit into MySQL-5.7:
> commit 8e51b845aafc8b4cdebd763c8aebda262ac2d4cd
> Author: Guilhem Bichot <guilhem.bichot@oracle.com>
> Date:   Mon Nov 4 15:44:55 2013 +0100
>
>    Bug#13944462 'NULL IN (XX)' RETURNS WRONG RESULTS
parent 7cb16dc2
......@@ -50,6 +50,12 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select ((1,2,(3,4)) in ((3,2,(3,4)),(1,2,(3,NULL)))) AS `row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,row(3,NULL)))`
select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(4,5)));
row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(4,5)))
0
select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(3,5)));
row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(3,5)))
NULL
SELECT (1,2,3)=(0,NULL,3);
(1,2,3)=(0,NULL,3)
0
......@@ -130,7 +136,7 @@ ROW(c,2,3) IN(row(1,b,a), row(2,3,1))
0
0
1
NULL
0
select ROW(a,b,c) IN(row(1,2,3), row(3,2,1)) from t1;
ROW(a,b,c) IN(row(1,2,3), row(3,2,1))
1
......@@ -509,5 +515,11 @@ Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where ((`test`.`t1`.`a` = 10) and (`test`.`t1`.`b` = 10))
DROP TABLE t1;
#
# MDEV-9369 IN operator with ( num, NULL ) gives inconsistent result
#
SELECT (1,null) NOT IN ((2,2),(3,3)), (1,null) NOT IN ((2,2)), (1,null) NOT IN ((3,3));
(1,null) NOT IN ((2,2),(3,3)) (1,null) NOT IN ((2,2)) (1,null) NOT IN ((3,3))
1 1 1
#
# End of 10.1 tests
#
......@@ -19,10 +19,12 @@ select (1,2,(3,4)) IN ((3,2,(3,4)), (1,2,(3,4)));
select row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,4));
select row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,row(3,NULL)));
explain extended select row(1,2,row(3,4)) IN (row(3,2,row(3,4)), row(1,2,row(3,NULL)));
select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(4,5)));
select row(1,2,row(3,null)) IN (row(3,2,row(3,4)), row(1,2,row(3,5)));
SELECT (1,2,3)=(0,NULL,3);
SELECT (1,2,3)=(1,NULL,3);
# here's something for Sanja to fix :)
SELECT (1,2,3)=(1,NULL,0);
SELECT ROW(1,2,3)=ROW(1,2,3);
......@@ -300,6 +302,13 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=10 AND b=10 AND a>=10;
EXPLAIN EXTENDED SELECT * FROM t1 WHERE (a,b)=(10,10) AND a>=10;
DROP TABLE t1;
--echo #
--echo # MDEV-9369 IN operator with ( num, NULL ) gives inconsistent result
--echo #
SELECT (1,null) NOT IN ((2,2),(3,3)), (1,null) NOT IN ((2,2)), (1,null) NOT IN ((3,3));
--echo #
--echo # End of 10.1 tests
--echo #
......@@ -2914,7 +2914,7 @@ Item *Item_func_case::find_item(String *str)
return else_expr_num != -1 ? args[else_expr_num] : 0;
value_added_map|= 1U << (uint)cmp_type;
}
if (!cmp_items[(uint)cmp_type]->cmp(args[i]) && !args[i]->null_value)
if (cmp_items[(uint)cmp_type]->cmp(args[i]) == FALSE)
return args[i + 1];
}
}
......@@ -3545,11 +3545,11 @@ static int cmp_decimal(void *cmp_arg, my_decimal *a, my_decimal *b)
}
int in_vector::find(Item *item)
bool in_vector::find(Item *item)
{
uchar *result=get_value(item);
if (!result || !used_count)
return 0; // Null value
return false; // Null value
uint start,end;
start=0; end=used_count-1;
......@@ -3558,13 +3558,13 @@ int in_vector::find(Item *item)
uint mid=(start+end+1)/2;
int res;
if ((res=(*compare)(collation, base+mid*size, result)) == 0)
return 1;
return true;
if (res < 0)
start=mid;
else
end=mid-1;
}
return (int) ((*compare)(collation, base+start*size, result) == 0);
return ((*compare)(collation, base+start*size, result) == 0);
}
in_string::in_string(uint elements,qsort2_cmp cmp_func, CHARSET_INFO *cs)
......@@ -3894,14 +3894,20 @@ int cmp_item_row::cmp(Item *arg)
arg->bring_value();
for (uint i=0; i < n; i++)
{
if (comparators[i]->cmp(arg->element_index(i)))
const int rc= comparators[i]->cmp(arg->element_index(i));
switch (rc)
{
if (!arg->element_index(i)->null_value)
return 1;
was_null= 1;
case UNKNOWN:
was_null= true;
break;
case TRUE:
return TRUE;
case FALSE:
break; // elements #i are equal
}
arg->null_value|= arg->element_index(i)->null_value;
}
return (arg->null_value= was_null);
return was_null ? UNKNOWN : FALSE;
}
......@@ -3924,15 +3930,15 @@ void cmp_item_decimal::store_value(Item *item)
/* val may be zero if item is nnull */
if (val && val != &value)
my_decimal2decimal(val, &value);
m_null_value= item->null_value;
}
int cmp_item_decimal::cmp(Item *arg)
{
my_decimal tmp_buf, *tmp= arg->val_decimal(&tmp_buf);
if (arg->null_value)
return 1;
return my_decimal_cmp(&value, tmp);
return (m_null_value || arg->null_value) ?
UNKNOWN : (my_decimal_cmp(&value, tmp) != 0);
}
......@@ -3956,12 +3962,14 @@ void cmp_item_datetime::store_value(Item *item)
enum_field_types f_type=
tmp_item[0]->field_type_for_temporal_comparison(warn_item);
value= get_datetime_value(thd, &tmp_item, &lval_cache, f_type, &is_null);
m_null_value= item->null_value;
}
int cmp_item_datetime::cmp(Item *arg)
{
return value != arg->val_temporal_packed(warn_item);
const bool rc= value != arg->val_temporal_packed(warn_item);
return (m_null_value || arg->null_value) ? UNKNOWN : rc;
}
......@@ -3985,10 +3993,10 @@ bool Item_func_in::count_sargable_conds(uchar *arg)
}
bool Item_func_in::nulls_in_row()
bool Item_func_in::list_contains_null()
{
Item **arg,**arg_end;
for (arg= args+1, arg_end= args+arg_count; arg != arg_end ; arg++)
for (arg= args + 1, arg_end= args+arg_count; arg != arg_end ; arg++)
{
if ((*arg)->null_inside())
return 1;
......@@ -4103,6 +4111,32 @@ void Item_func_in::fix_length_and_dec()
}
}
/*
First conditions for bisection to be possible:
1. All types are similar, and
2. All expressions in <in value list> are const
*/
bool bisection_possible=
type_cnt == 1 && // 1
const_itm; // 2
if (bisection_possible)
{
/*
In the presence of NULLs, the correct result of evaluating this item
must be UNKNOWN or FALSE. To achieve that:
- If type is scalar, we can use bisection and the "have_null" boolean.
- If type is ROW, we will need to scan all of <in value list> when
searching, so bisection is impossible. Unless:
3. UNKNOWN and FALSE are equivalent results
4. Neither left expression nor <in value list> contain any NULL value
*/
if (m_compare_type == ROW_RESULT &&
((!is_top_level_item() || negated) && // 3
(list_contains_null() || args[0]->maybe_null))) // 4
bisection_possible= false;
}
if (type_cnt == 1)
{
if (m_compare_type == STRING_RESULT &&
......@@ -4115,7 +4149,7 @@ void Item_func_in::fix_length_and_dec()
uint cols= args[0]->cols();
cmp_item_row *cmp= 0;
if (const_itm && !nulls_in_row())
if (bisection_possible)
{
array= new (thd->mem_root) in_row(thd, arg_count-1, 0);
cmp= &((in_row*)array)->tmp;
......@@ -4144,11 +4178,8 @@ void Item_func_in::fix_length_and_dec()
}
}
}
/*
Row item with NULLs inside can return NULL or FALSE =>
they can't be processed as static
*/
if (type_cnt == 1 && const_itm && !nulls_in_row())
if (bisection_possible)
{
/*
IN must compare INT columns and constants as int values (the same
......@@ -4203,20 +4234,25 @@ void Item_func_in::fix_length_and_dec()
array= new (thd->mem_root) in_datetime(date_arg, arg_count - 1);
break;
}
if (array && !(thd->is_fatal_error)) // If not EOM
if (!array || thd->is_fatal_error) // OOM
return;
uint j=0;
for (uint i=1 ; i < arg_count ; i++)
{
uint j=0;
for (uint i=1 ; i < arg_count ; i++)
array->set(j,args[i]);
if (!args[i]->null_value)
j++; // include this cell in the array.
else
{
array->set(j,args[i]);
if (!args[i]->null_value) // Skip NULL values
j++;
else
have_null= 1;
/*
We don't put NULL values in array, to avoid erronous matches in
bisection.
*/
have_null= 1;
}
if ((array->used_count= j))
array->sort();
}
if ((array->used_count= j))
array->sort();
}
else
{
......@@ -4284,7 +4320,14 @@ longlong Item_func_in::val_int()
uint value_added_map= 0;
if (array)
{
int tmp=array->find(args[0]);
bool tmp=array->find(args[0]);
/*
NULL on left -> UNKNOWN.
Found no match, and NULL on right -> UNKNOWN.
NULL on right can never give a match, as it is not stored in
array.
See also the 'bisection_possible' variable in fix_length_and_dec().
*/
null_value=args[0]->null_value || (!tmp && have_null);
return (longlong) (!null_value && tmp != negated);
}
......@@ -4306,13 +4349,12 @@ longlong Item_func_in::val_int()
if (!(value_added_map & (1U << (uint)cmp_type)))
{
in_item->store_value(args[0]);
if ((null_value= args[0]->null_value))
return 0;
value_added_map|= 1U << (uint)cmp_type;
}
if (!in_item->cmp(args[i]) && !args[i]->null_value)
const int rc= in_item->cmp(args[i]);
if (rc == FALSE)
return (longlong) (!negated);
have_null|= args[i]->null_value;
have_null|= (rc == UNKNOWN);
}
null_value= have_null;
......@@ -6434,7 +6476,8 @@ longlong Item_equal::val_int()
/* Skip fields of tables that has not been read yet */
if (!field->table->status || (field->table->status & STATUS_NULL_ROW))
{
if (eval_item->cmp(item) || (null_value= item->null_value))
const int rc= eval_item->cmp(item);
if ((rc == TRUE) || (null_value= (rc == UNKNOWN)))
return 0;
}
}
......
......@@ -796,6 +796,7 @@ class Item_func_opt_neg :public Item_bool_func
public:
inline void negate() { negated= !negated; }
inline void top_level_item() { pred_level= 1; }
bool is_top_level_item() const { return pred_level; }
Item *neg_transformer(THD *thd)
{
negated= !negated;
......@@ -1075,7 +1076,7 @@ class in_vector :public Sql_alloc
{
my_qsort2(base,used_count,size,compare,(void*)collation);
}
int find(Item *item);
bool find(Item *item);
/*
Create an instance of Item_{type} (e.g. Item_decimal) constant object
......@@ -1243,6 +1244,10 @@ class cmp_item :public Sql_alloc
cmp_item() { cmp_charset= &my_charset_bin; }
virtual ~cmp_item() {}
virtual void store_value(Item *item)= 0;
/**
@returns result (TRUE, FALSE or UNKNOWN) of
"stored argument's value <> item's value"
*/
virtual int cmp(Item *item)= 0;
// for optimized IN with row
virtual int compare(cmp_item *item)= 0;
......@@ -1255,7 +1260,14 @@ class cmp_item :public Sql_alloc
}
};
class cmp_item_string :public cmp_item
/// cmp_item which stores a scalar (i.e. non-ROW).
class cmp_item_scalar : public cmp_item
{
protected:
bool m_null_value; ///< If stored value is NULL
};
class cmp_item_string : public cmp_item_scalar
{
protected:
String *value_res;
......@@ -1281,14 +1293,20 @@ class cmp_item_sort_string :public cmp_item_string
void store_value(Item *item)
{
value_res= item->val_str(&value);
m_null_value= item->null_value;
}
int cmp(Item *arg)
{
char buff[STRING_BUFFER_USUAL_SIZE];
String tmp(buff, sizeof(buff), cmp_charset), *res;
res= arg->val_str(&tmp);
return (value_res ? (res ? sortcmp(value_res, res, cmp_charset) : 1) :
(res ? -1 : 0));
String tmp(buff, sizeof(buff), cmp_charset), *res= arg->val_str(&tmp);
if (m_null_value || arg->null_value)
return UNKNOWN;
if (value_res && res)
return sortcmp(value_res, res, cmp_charset) != 0;
else if (!value_res && !res)
return FALSE;
else
return TRUE;
}
int compare(cmp_item *ci)
{
......@@ -1303,7 +1321,7 @@ class cmp_item_sort_string :public cmp_item_string
}
};
class cmp_item_int :public cmp_item
class cmp_item_int : public cmp_item_scalar
{
longlong value;
public:
......@@ -1311,10 +1329,12 @@ class cmp_item_int :public cmp_item
void store_value(Item *item)
{
value= item->val_int();
m_null_value= item->null_value;
}
int cmp(Item *arg)
{
return value != arg->val_int();
const bool rc= value != arg->val_int();
return (m_null_value || arg->null_value) ? UNKNOWN : rc;
}
int compare(cmp_item *ci)
{
......@@ -1330,7 +1350,7 @@ class cmp_item_int :public cmp_item
If the left item is a constant one then its value is cached in the
lval_cache variable.
*/
class cmp_item_datetime :public cmp_item
class cmp_item_datetime : public cmp_item_scalar
{
longlong value;
public:
......@@ -1348,7 +1368,7 @@ class cmp_item_datetime :public cmp_item
cmp_item *make_same();
};
class cmp_item_real :public cmp_item
class cmp_item_real : public cmp_item_scalar
{
double value;
public:
......@@ -1356,10 +1376,12 @@ class cmp_item_real :public cmp_item
void store_value(Item *item)
{
value= item->val_real();
m_null_value= item->null_value;
}
int cmp(Item *arg)
{
return value != arg->val_real();
const bool rc= value != arg->val_real();
return (m_null_value || arg->null_value) ? UNKNOWN : rc;
}
int compare(cmp_item *ci)
{
......@@ -1370,7 +1392,7 @@ class cmp_item_real :public cmp_item
};
class cmp_item_decimal :public cmp_item
class cmp_item_decimal : public cmp_item_scalar
{
my_decimal value;
public:
......@@ -1397,12 +1419,13 @@ class cmp_item_sort_string_in_static :public cmp_item_string
void store_value(Item *item)
{
value_res= item->val_str(&value);
m_null_value= item->null_value;
}
int cmp(Item *item)
{
// Should never be called
DBUG_ASSERT(0);
return 1;
DBUG_ASSERT(false);
return TRUE;
}
int compare(cmp_item *ci)
{
......@@ -1467,33 +1490,43 @@ class Item_func_case :public Item_func_hybrid_field_type
};
/*
The Item_func_in class implements the in_expr IN(values_list) function.
The Item_func_in class implements
in_expr IN (<in value list>)
and
in_expr NOT IN (<in value list>)
The current implementation distinguishes 2 cases:
1) all items in the value_list are constants and have the same
1) all items in <in value list> are constants and have the same
result type. This case is handled by in_vector class.
2) items in the value_list have different result types or there is some
non-constant items.
In this case Item_func_in employs several cmp_item objects to performs
comparisons of in_expr and an item from the values_list. One cmp_item
2) otherwise Item_func_in employs several cmp_item objects to perform
comparisons of in_expr and an item from <in value list>. One cmp_item
object for each result type. Different result types are collected in the
fix_length_and_dec() member function by means of collect_cmp_types()
function.
*/
class Item_func_in :public Item_func_opt_neg
{
/**
Usable if <in value list> is made only of constants. Returns true if one
of these constants contains a NULL. Example:
IN ( (-5, (12,NULL)), ... ).
*/
bool list_contains_null();
protected:
SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param,
Field *field, Item *value);
public:
/*
an array of values when the right hand arguments of IN
are all SQL constant and there are no nulls
*/
/// An array of values, created when the bisection lookup method is used
in_vector *array;
/**
If there is some NULL among <in value list>, during a val_int() call; for
example
IN ( (1,(3,'col')), ... ), where 'col' is a column which evaluates to
NULL.
*/
bool have_null;
/*
true when all arguments of the IN clause are of compatible types
/**
true when all arguments of the IN list are of compatible types
and can be used safely as comparisons for key conditions
*/
bool arg_types_compatible;
......@@ -1547,7 +1580,6 @@ class Item_func_in :public Item_func_opt_neg
virtual void print(String *str, enum_query_type query_type);
enum Functype functype() const { return IN_FUNC; }
const char *func_name() const { return " IN "; }
bool nulls_in_row();
bool eval_not_null_tables(uchar *opt_arg);
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
bool count_sargable_conds(uchar *arg);
......
......@@ -2,7 +2,7 @@
#define ITEM_ROW_INCLUDED
/*
Copyright (c) 2002, 2010, Oracle and/or its affiliates.
Copyright (c) 2002, 2013, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -28,11 +28,20 @@
@endverbatim
*/
/**
Item which stores (x,y,...) and ROW(x,y,...).
Note that this can be recursive: ((x,y),(z,t)) is a ROW of ROWs.
*/
class Item_row: public Item,
private Item_args,
private Used_tables_and_const_cache
{
table_map not_null_tables_cache;
/**
If elements are made only of constants, of which one or more are
NULL. For example, this item is (1,2,NULL), or ( (1,NULL), (2,3) ).
*/
bool with_null;
public:
Item_row(THD *thd, List<Item> &list):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment