Commit 75d65b5f authored by Alexander Barkov's avatar Alexander Barkov

MDEV-6989 BINARY and COLLATE xxx_bin comparisions are not used for optimization in some cases

parent bd210585
......@@ -618,10 +618,10 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref a a 11 const 2 Using index condition
explain select * from t1 where a=binary 'aaa';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 11 NULL 2 Using index condition
1 SIMPLE t1 ref a a 11 const 2 Using index condition
explain select * from t1 where a='aaa' collate latin1_bin;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 11 NULL 2 Using index condition
1 SIMPLE t1 ref a a 11 const 2 Using index condition
explain select * from t1 where a='aaa' collate latin1_german1_ci;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL a NULL NULL NULL 9 Using where
......
......@@ -620,10 +620,10 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref a a 11 const 2 Using index condition
explain select * from t1 where a=binary 'aaa';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 11 NULL 2 Using index condition; Rowid-ordered scan
1 SIMPLE t1 ref a a 11 const 2 Using index condition
explain select * from t1 where a='aaa' collate latin1_bin;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 11 NULL 2 Using index condition; Rowid-ordered scan
1 SIMPLE t1 ref a a 11 const 2 Using index condition
explain select * from t1 where a='aaa' collate latin1_german1_ci;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL a NULL NULL NULL 9 Using where
......
......@@ -549,5 +549,65 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index PRIMARY PRIMARY 22 NULL 2 Using where; Using index
DROP TABLE IF EXISTS t1,t2;
#
# MDEV-6989 BINARY and COLLATE xxx_bin comparisions are not used for optimization in some cases
#
CREATE TABLE t1 (c1 VARCHAR(20) CHARACTER SET latin1, PRIMARY KEY(c1));
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d');
SELECT * FROM t1 WHERE c1=BINARY 'a';
c1
a
EXPLAIN SELECT * FROM t1 WHERE c1=BINARY 'a';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 const PRIMARY PRIMARY 22 const 1 Using index
SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
c1
a
EXPLAIN SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 const PRIMARY PRIMARY 22 const 1 Using index
DROP TABLE t1;
CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
INSERT INTO t1 VALUES ('a');
CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
INSERT INTO t2 VALUES ('a'),('b');
SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
c1 c1
a a
EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 system NULL NULL NULL NULL 1
1 SIMPLE t2 const PRIMARY PRIMARY 12 const 1 Using index
ALTER TABLE t1 MODIFY c1 VARBINARY(10);
SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
c1 c1
a a
EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 system NULL NULL NULL NULL 1
1 SIMPLE t2 const PRIMARY PRIMARY 12 const 1 Using index
DROP TABLE t1, t2;
CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
INSERT INTO t1 VALUES ('a'),('c');
CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
INSERT INTO t2 VALUES ('a'),('b');
SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
c1
a
c
# t2 should be eliminated
EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
ALTER TABLE t1 MODIFY c1 VARBINARY(10);
SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
c1
a
c
# t2 should be eliminated
EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
DROP TABLE t1,t2;
#
# End of 10.0 tests
#
......@@ -240,6 +240,42 @@ SELECT t1.* FROM t1 LEFT JOIN t2 ON t1.c1=t2.c1;
EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 ON t1.c1=t2.c1;
DROP TABLE IF EXISTS t1,t2;
--echo #
--echo # MDEV-6989 BINARY and COLLATE xxx_bin comparisions are not used for optimization in some cases
--echo #
CREATE TABLE t1 (c1 VARCHAR(20) CHARACTER SET latin1, PRIMARY KEY(c1));
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d');
SELECT * FROM t1 WHERE c1=BINARY 'a';
EXPLAIN SELECT * FROM t1 WHERE c1=BINARY 'a';
SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
EXPLAIN SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
DROP TABLE t1;
CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
INSERT INTO t1 VALUES ('a');
CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
INSERT INTO t2 VALUES ('a'),('b');
SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
ALTER TABLE t1 MODIFY c1 VARBINARY(10);
SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
DROP TABLE t1, t2;
CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
INSERT INTO t1 VALUES ('a'),('c');
CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
INSERT INTO t2 VALUES ('a'),('b');
SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
--echo # t2 should be eliminated
EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
ALTER TABLE t1 MODIFY c1 VARBINARY(10);
SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
--echo # t2 should be eliminated
EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
DROP TABLE t1,t2;
--echo #
--echo # End of 10.0 tests
--echo #
......@@ -1246,11 +1246,23 @@ double Field::pos_in_interval_val_str(Field *min, Field *max, uint data_offset)
}
/*
This handles all numeric and BIT data types.
*/
bool Field::can_optimize_keypart_ref(const Item_func *cond,
const Item *item) const
{
DBUG_ASSERT(cmp_type() != STRING_RESULT);
DBUG_ASSERT(cmp_type() != TIME_RESULT);
return item->cmp_type() != TIME_RESULT;
}
/*
This handles all numeric and BIT data types.
*/
bool Field::can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item)
const Item *const_item) const
{
DBUG_ASSERT(cmp_type() != STRING_RESULT);
DBUG_ASSERT(cmp_type() != TIME_RESULT);
......@@ -5270,8 +5282,15 @@ my_decimal *Field_temporal::val_decimal(my_decimal *d)
}
bool Field_temporal::can_optimize_keypart_ref(const Item_func *cond,
const Item *value) const
{
return true; // Field is of TIME_RESULT, which supersedes everything else.
}
bool Field_temporal::can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item)
const Item *const_item) const
{
return true; // Field is of TIME_RESULT, which supersedes everything else.
}
......@@ -6468,15 +6487,50 @@ uint32 Field_longstr::max_data_length() const
}
bool Field_longstr::can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item)
bool
Field_longstr::cmp_to_string_with_same_collation(const Item_func *cond,
const Item *item) const
{
// Can't use indexes when comparing a string to a number or a date
if (const_item->cmp_type() != STRING_RESULT)
return false;
return item->cmp_type() == STRING_RESULT &&
charset() == cond->compare_collation();
}
// Don't use an index when comparing strings of different collations.
return charset() == cond->compare_collation();
bool
Field_longstr::cmp_to_string_with_stricter_collation(const Item_func *cond,
const Item *item) const
{
return item->cmp_type() == STRING_RESULT &&
(charset() == cond->compare_collation() ||
cond->compare_collation()->state & MY_CS_BINSORT);
}
bool Field_longstr::can_optimize_keypart_ref(const Item_func *cond,
const Item *item) const
{
DBUG_ASSERT(cmp_type() == STRING_RESULT);
return cmp_to_string_with_stricter_collation(cond, item);
}
bool Field_longstr::can_optimize_hash_join(const Item_func *cond,
const Item *item) const
{
DBUG_ASSERT(cmp_type() == STRING_RESULT);
return cmp_to_string_with_same_collation(cond, item);
}
bool Field_longstr::can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item) const
{
/*
Can't use indexes when comparing a string to a number or a date
Don't use an index when comparing strings of different collations.
*/
DBUG_ASSERT(cmp_type() == STRING_RESULT);
return cmp_to_string_with_same_collation(cond, const_item);
}
......@@ -8489,6 +8543,31 @@ uint Field_num::is_equal(Create_field *new_field)
}
bool Field_enum::can_optimize_keypart_ref(const Item_func *cond,
const Item *item) const
{
DBUG_ASSERT(cmp_type() == INT_RESULT);
DBUG_ASSERT(result_type() == STRING_RESULT);
switch (item->cmp_type())
{
case TIME_RESULT:
return false;
case INT_RESULT:
case DECIMAL_RESULT:
case REAL_RESULT:
return true;
case STRING_RESULT:
return charset() == ((Item_func*)cond)->compare_collation();
case IMPOSSIBLE_RESULT:
case ROW_RESULT:
DBUG_ASSERT(0);
break;
}
return false;
}
/*
Bit field.
......
......@@ -39,6 +39,7 @@ class Relay_log_info;
class Field;
class Column_statistics;
class Column_statistics_collected;
class Item_func;
class Item_bool_func2;
enum enum_check_fields
......@@ -964,9 +965,21 @@ class Field
return (double) 0.5;
}
virtual bool can_optimize_keypart_ref(const Item_func *cond,
const Item *item) const;
virtual bool can_optimize_hash_join(const Item_func *cond,
const Item *item) const
{
return can_optimize_keypart_ref(cond, item);
}
virtual bool can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item);
const Item *const_item) const;
bool can_optimize_outer_join_table_elimination(const Item_func *cond,
const Item *item) const
{
// Exactly the same rules with REF access
return can_optimize_keypart_ref(cond, item);
}
friend int cre_myisam(char * name, register TABLE *form, uint options,
ulonglong auto_increment_value);
friend class Copy_field;
......@@ -1147,6 +1160,10 @@ class Field_longstr :public Field_str
return report_if_important_data(copier->source_end_pos(),
end, count_spaces);
}
bool cmp_to_string_with_same_collation(const Item_func *cond,
const Item *item) const;
bool cmp_to_string_with_stricter_collation(const Item_func *cond,
const Item *item) const;
public:
Field_longstr(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
uchar null_bit_arg, utype unireg_check_arg,
......@@ -1158,8 +1175,10 @@ class Field_longstr :public Field_str
int store_decimal(const my_decimal *d);
uint32 max_data_length() const;
bool match_collation_to_optimize_range() const { return true; }
bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const;
bool can_optimize_hash_join(const Item_func *cond, const Item *item) const;
bool can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item);
const Item *const_item) const;
};
/* base class for float and double and decimal (old one) */
......@@ -1587,8 +1606,13 @@ class Field_null :public Field_str {
uint size_of() const { return sizeof(*this); }
uint32 max_display_length() { return 4; }
void move_field_offset(my_ptrdiff_t ptr_diff) {}
bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const
{
DBUG_ASSERT(0);
return false;
}
bool can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item)
const Item *const_item) const
{
DBUG_ASSERT(0);
return false;
......@@ -1625,8 +1649,9 @@ class Field_temporal: public Field {
{
return pos_in_interval_val_real(min, max);
}
bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const;
bool can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item);
const Item *const_item) const;
};
......@@ -2664,8 +2689,9 @@ class Field_enum :public Field_str {
virtual const uchar *unpack(uchar *to, const uchar *from,
const uchar *from_end, uint param_data);
bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const;
bool can_optimize_group_min_max(const Item_bool_func2 *cond,
const Item *const_item)
const Item *const_item) const
{
/*
Can't use GROUP_MIN_MAX optimization for ENUM and SET,
......
......@@ -1486,28 +1486,8 @@ void check_equality(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
left->real_item()->type() == Item::FIELD_ITEM)
{
Field *field= ((Item_field*)left->real_item())->field;
if (right->cmp_type() == TIME_RESULT && field->cmp_type() != TIME_RESULT)
if (!field->can_optimize_outer_join_table_elimination(cond, right))
return;
if (field->result_type() == STRING_RESULT)
{
if (right->result_type() != STRING_RESULT)
{
if (field->cmp_type() != right->result_type())
return;
}
else
{
/*
We can't assume there's a functional dependency if the effective
collation of the operation differ from the field collation.
*/
if ((field->cmp_type() == STRING_RESULT ||
field->real_type() == MYSQL_TYPE_ENUM ||
field->real_type() == MYSQL_TYPE_SET) &&
field->charset() != cond->compare_collation())
return;
}
}
Dep_value_field *field_val;
if ((field_val= ctx->get_field_value(field)))
add_module_expr(ctx, eq_mod, and_level, field_val, right, NULL);
......
......@@ -4124,6 +4124,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
/// Used when finding key fields
typedef struct key_field_t {
Field *field;
Item_func *cond;
Item *val; ///< May be empty if diff constant
uint level;
uint optimize;
......@@ -4466,46 +4467,6 @@ add_key_field(JOIN *join,
}
if (!eq_func) // eq_func is NEVER true when num_values > 1
return;
if ((*value)->cmp_type() == TIME_RESULT &&
field->cmp_type() != TIME_RESULT)
return;
/*
Note, for ITEM/ENUM columns:
- field->cmp_type() returns INT_RESULT
- field->result_type() returns STRING_RESULT
- field->type() returns MYSQL_TYPE_STRING
Using field->real_type() to detect ENUM/SET,
as they need a special handling:
- Conditions between a ENUM/SET filter and a TIME expression
cannot be optimized. They were filtered out in the previous if block.
- It's Ok to use ref access for an ENUM/SET field compared to an
INT/REAL/DECIMAL expression.
- It's Ok to use ref for an ENUM/SET field compared to a STRING
expression if the collation of the field and the collation of
the condition match.
*/
if ((field->real_type() == MYSQL_TYPE_ENUM ||
field->real_type() == MYSQL_TYPE_SET) &&
(*value)->cmp_type () == STRING_RESULT &&
field->charset() != cond->compare_collation())
return;
/*
We can't use indexes when comparing a string index to a
number or two strings if the effective collation
of the operation differ from the field collation.
*/
if (field->cmp_type() == STRING_RESULT)
{
if ((*value)->cmp_type() != STRING_RESULT)
return;
if (field->charset() != cond->compare_collation())
return;
}
}
}
/*
......@@ -4517,6 +4478,7 @@ add_key_field(JOIN *join,
(*key_fields)->field= field;
(*key_fields)->eq_func= eq_func;
(*key_fields)->val= *value;
(*key_fields)->cond= cond;
(*key_fields)->level= and_level;
(*key_fields)->optimize= optimize;
/*
......@@ -4958,7 +4920,8 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
uint key_parts= form->actual_n_key_parts(keyinfo);
for (uint part=0 ; part < key_parts ; part++)
{
if (field->eq(form->key_info[key].key_part[part].field))
if (field->eq(form->key_info[key].key_part[part].field) &&
field->can_optimize_keypart_ref(key_field->cond, key_field->val))
{
if (add_keyuse(keyuse_array, key_field, key, part))
return TRUE;
......@@ -4969,6 +4932,8 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
(key_field->optimize & KEY_OPTIMIZE_EQ) &&
key_field->val->used_tables())
{
if (!field->can_optimize_hash_join(key_field->cond, key_field->val))
return false;
/*
If a key use is extracted from an equi-join predicate then it is
added not only as a key use for every index whose component can
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment