Commit e987b935 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-31496: Make optimizer handle UCASE(varchar_col)=...

(Review input addressed)
(Added handling of UPDATE/DELETE and partitioning w/o index)

If the properties of the used collation allow, do the following
equivalent rewrites:

1. UPPER(key_col)=expr  ->  key_col=expr
   expr=UPPER(key_col)  ->  expr=key_col
   (also rewrite both sides of the equality at the same time)

2. UPPER(key_col) IN (constant-list)  -> key_col IN (constant-list)

- Mark utf8mb{3,4}_general_ci as collations that allow this.
- Add optimizer_switch='sargable_casefold=ON' to control this.
  (ON by default in this patch)
- Cover the rewrite in Optimizer Trace, rewrite name is
  "sargable_casefold_removal".
parent 8ad1e26b
...@@ -287,6 +287,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; ...@@ -287,6 +287,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters #define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters
to weights, e.g. contractions, expansions, to weights, e.g. contractions, expansions,
ignorable characters */ ignorable characters */
#define MY_CS_UPPER_EQUAL_AS_EQUAL 0x80000 /* (UPPER(x)=UPPER(y)) <=> (x=y)*/
#define MY_CHARSET_UNDEFINED 0 #define MY_CHARSET_UNDEFINED 0
/* Character repertoire flags */ /* Character repertoire flags */
......
...@@ -71,6 +71,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc ...@@ -71,6 +71,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/mf_iocache.cc ../sql/my_decimal.cc ../sql/mf_iocache.cc ../sql/my_decimal.cc
../sql/net_serv.cc ../sql/opt_range.cc ../sql/net_serv.cc ../sql/opt_range.cc
../sql/opt_rewrite_date_cmp.cc ../sql/opt_rewrite_date_cmp.cc
../sql/opt_rewrite_remove_casefold.cc
../sql/opt_sum.cc ../sql/opt_sum.cc
../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc ../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc
../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc ../sql/records.cc ../sql/repl_failsafe.cc ../sql/rpl_filter.cc
......
# Check sargable_casefold rewrite for $collation
eval create table t1 (
col1 varchar(32),
col2 varchar(32),
col3 char(32),
col4 text,
key(col1),
key(col2),
key(col3),
key(col4(32))
) collate $collation;
insert into t1
select
concat('A-', seq),
concat('A-', seq),
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
analyze table t1 persistent for all;
--echo # Basic examples. All should use ref(col1):
explain
select * from t1 where upper(col1)='A-3';
select * from t1 where upper(col1)='A-3';
explain
select * from t1 where ucase(col1)='a-3';
select * from t1 where ucase(col1)='a-3';
explain select * from t1 where 'abc'=upper(col1);
explain select * from t1 where 'xyz'=ucase(col1);
create view v1 as select * from t1;
explain select * from v1 where 'abc'=upper(col1);
drop view v1;
explain select * from t1 where upper(col3)='a-3';
explain select * from t1 where upper(col4)='a-3';
# DROP TABLE t1 is missing intentionally here.
...@@ -804,7 +804,7 @@ The following specify which files/extra groups are read (specified before remain ...@@ -804,7 +804,7 @@ The following specify which files/extra groups are read (specified before remain
condition_pushdown_for_derived, split_materialized, condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter, condition_pushdown_for_subquery, rowid_filter,
condition_pushdown_from_having, not_null_range_scan, condition_pushdown_from_having, not_null_range_scan,
hash_join_cardinality hash_join_cardinality, sargable_casefold
--optimizer-trace=name --optimizer-trace=name
Controls tracing of the Optimizer: Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option optimizer_trace=option=val[,option=val...], where option
...@@ -1764,7 +1764,7 @@ optimizer-rowid-copy-cost 0.002653 ...@@ -1764,7 +1764,7 @@ optimizer-rowid-copy-cost 0.002653
optimizer-scan-setup-cost 10 optimizer-scan-setup-cost 10
optimizer-search-depth 62 optimizer-search-depth 62
optimizer-selectivity-sampling-limit 100 optimizer-selectivity-sampling-limit 100
optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on,sargable_casefold=on
optimizer-trace optimizer-trace
optimizer-trace-max-mem-size 1048576 optimizer-trace-max-mem-size 1048576
optimizer-use-condition-selectivity 4 optimizer-use-condition-selectivity 4
......
...@@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch'; ...@@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch';
set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off'; set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off';
-- Tracker : SESSION_TRACK_SYSTEM_VARIABLES -- Tracker : SESSION_TRACK_SYSTEM_VARIABLES
-- optimizer_switch -- optimizer_switch
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on -- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=on,sargable_casefold=on
Warnings: Warnings:
Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release
......
This diff is collapsed.
#
# MDEV-31496 Make optimizer handle UCASE(varchar_col)=...
#
--source include/have_sequence.inc
set
@tmp_switch_sarg_casefold=@@optimizer_switch,
optimizer_switch='sargable_casefold=on';
let $collation=utf8mb3_general_ci;
source include/sargable_casefold.inc;
--echo # must not be rewritten:
explain select * from t1 where ucase(col1 collate utf8mb3_bin)='a-3';
--echo # Will not do the rewrite due to collation mismatch:
explain select * from t1 where ucase(col1)=_utf8mb3'abc' COLLATE utf8mb3_bin;
drop table t1;
let $collation=utf8mb4_general_ci;
source include/sargable_casefold.inc;
--echo # must not be rewritten:
explain select * from t1 where ucase(col1 collate utf8mb4_bin)='a-3';
--echo # Will not do the rewrite due to collation mismatch:
explain select * from t1 where ucase(col1)=_utf8mb4'abc' COLLATE utf8mb4_bin;
--echo #
--echo # Check if optimizer_switch turns the rewrite off:
--echo #
set
@save_os=@@optimizer_switch,
optimizer_switch='sargable_casefold=off';
explain select * from t1 where upper(col1)='A-3';
explain select * from t1 where ucase(col1)='a-3';
set optimizer_switch=@save_os;
--echo # The following will not do the rewrite because the comparison
--echo # is done as DOUBLEs. Come to think of it, it won't harm to do
--echo # the rewrite but it is outside of the scope of this patch:
explain select * from t1 where ucase(col1)=123.456;
select
coercibility(upper(col1))
from t1 limit 1;
select coercibility(_utf8mb3'abc' COLLATE utf8mb3_bin);
--echo # This is transformed too even if it doesn't create any new
--echo # [potential] access paths:
explain format=json select * from t1 where upper(col1)=upper(col2);
--echo #
--echo # Check if ref access works
--echo #
create table t2 (
a varchar(32),
non_key varchar(32),
key(a)
) collate utf8mb4_general_ci;
insert into t2
select
concat('A-', seq),
concat('A-', seq)
from seq_1_to_10;
--echo # Must use ref access for t1:
explain select * from t1, t2 where upper(t1.col1)= t2.non_key;
# Check the interplay with equality propagation
create table t3 (
a varchar(32),
b varchar(32),
key(a),
key(b)
) collate utf8mb3_general_ci;
insert into t3 values ('abc','ABC'), ('xyz','XYZ');
explain extended
select a from t3 ignore index(a) where a=b and upper(b)='ABC';
--echo #
--echo # Check that rewrite isn't applied for non-applicable collations
--echo #
create table t4 (
col1 varchar(32) collate utf8mb3_bin,
col2 varchar(32) collate utf8mb3_czech_ci,
col3 varchar(32) collate latin1_bin,
key(col1),
key(col2),
key(col3)
);
insert into t4
select
concat('A-', seq),
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
analyze table t4 persistent for all;
--echo # None should use ref access:
explain select * from t4 where upper(col1)='A-3';
explain select * from t4 where upper(col2)='a-3';
explain select * from t4 where upper(col3)='a-3';
--echo #
--echo # Check that rewrite works for UPPER(col) IN (const-list)
--echo #
set
@tmp_ot= @@optimizer_trace,
optimizer_trace=1;
--echo # must use range:
explain
select * from t1 where upper(col1) IN ('A-3','A-4','a-5');
select * from t1 where upper(col1) IN ('A-3','A-4','a-5');
--echo # Will not use the rewrite:
explain
select * from t1 where upper(col1) IN ('A-3','A-4',col2);
--echo #
--echo # MDEV-31946: Optimizer handle UCASE(varchar_col)=... does not work for UPDATE/DELETE
--echo #
explain delete from t1 where upper(col1)='A';
explain delete from t1 where upper(col1) IN ('A','B');
explain update t1 set col2='ABC' where upper(col1)='A';
explain update t1 set col2='ABC' where upper(col1) IN ('A','B');
drop table t1,t2,t3,t4;
set optimizer_switch=@tmp_switch_sarg_casefold;
create table t1 (
col1 varchar(32),
col2 varchar(32),
key(col1),
key(col2)
) collate utf8mb3_general_ci;
insert into t1
select
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
set
@tmp_ot= @@optimizer_trace,
@tmp_os=@@optimizer_switch,
optimizer_switch='sargable_casefold=on',
optimizer_trace=1;
explain select * from t1 where 'abc'=upper(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 99 const 1 Using index condition
select
json_detailed(json_extract(trace, '$**.sargable_casefold_removal')) as JS
from information_schema.optimizer_trace;
JS
[
{
"before": "'abc' = ucase(t1.col1)",
"after": "'abc' = t1.col1"
}
]
explain select * from t1 where ucase(col2)=upper(col1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
select
json_detailed(json_extract(trace, '$**.sargable_casefold_removal')) as JS
from information_schema.optimizer_trace;
JS
[
{
"before": "ucase(t1.col2) = ucase(t1.col1)",
"after": "t1.col2 = t1.col1"
}
]
explain
select * from t1 where upper(col1) IN ('A-3','A-4','a-5');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range col1 col1 99 NULL 3 Using index condition
# Will show the rewrite:
select
json_detailed(json_extract(trace, '$**.sargable_casefold_removal')) as JS
from information_schema.optimizer_trace;
JS
[
{
"before": "ucase(t1.col1) in ('A-3','A-4','a-5')",
"after": "t1.col1 in ('A-3','A-4','a-5')"
}
]
set
optimizer_trace=@tmp_ot,
optimizer_switch=@tmp_os;
drop table t1;
#
# MDEV-31496 Make optimizer handle UCASE(varchar_col)=...
# Check the coverage in optimizer trace.
#
--source include/not_embedded.inc
--source include/have_sequence.inc
create table t1 (
col1 varchar(32),
col2 varchar(32),
key(col1),
key(col2)
) collate utf8mb3_general_ci;
insert into t1
select
concat('A-', seq),
concat('A-', seq)
from seq_1_to_100;
set
@tmp_ot= @@optimizer_trace,
@tmp_os=@@optimizer_switch,
optimizer_switch='sargable_casefold=on',
optimizer_trace=1;
explain select * from t1 where 'abc'=upper(col1);
select
json_detailed(json_extract(trace, '$**.sargable_casefold_removal')) as JS
from information_schema.optimizer_trace;
explain select * from t1 where ucase(col2)=upper(col1);
select
json_detailed(json_extract(trace, '$**.sargable_casefold_removal')) as JS
from information_schema.optimizer_trace;
explain
select * from t1 where upper(col1) IN ('A-3','A-4','a-5');
--echo # Will show the rewrite:
select
json_detailed(json_extract(trace, '$**.sargable_casefold_removal')) as JS
from information_schema.optimizer_trace;
set
optimizer_trace=@tmp_ot,
optimizer_switch=@tmp_os;
drop table t1;
set
@tmp_switch_sarg_casefold=@@optimizer_switch,
optimizer_switch='sargable_casefold=on';
create table t1 (
s1 varchar(15) collate utf8mb3_bin,
s2 varchar(15) collate utf8mb3_general_ci
) partition by key (s2) partitions 4;
insert into t1 values ('aa','aa'),('bb','bb');
explain format=json select * from t1 where upper(s2)='AA';
EXPLAIN
{
"query_block": {
"select_id": 1,
"nested_loop": [
{
"table": {
"table_name": "t1",
"partitions": ["p2"],
"access_type": "system",
"rows": 1,
"filtered": 100
}
}
]
}
}
explain format=json delete from t1 where upper(s2)='AA';
EXPLAIN
{
"query_block": {
"select_id": 1,
"table": {
"delete": 1,
"table_name": "t1",
"partitions": ["p2"],
"access_type": "ALL",
"rows": 1,
"attached_condition": "t1.s2 = 'AA'"
}
}
}
explain format=json update t1 set s1='aaa' where upper(s2)='AA';
EXPLAIN
{
"query_block": {
"select_id": 1,
"table": {
"update": 1,
"table_name": "t1",
"partitions": ["p2"],
"access_type": "ALL",
"rows": 1,
"attached_condition": "t1.s2 = 'AA'"
}
}
}
drop table t1;
set optimizer_switch=@tmp_switch_sarg_casefold;
--source include/have_partition.inc
#
# MDEV-31975: UCASE(varchar_col)=... not handled for partition tables
#
set
@tmp_switch_sarg_casefold=@@optimizer_switch,
optimizer_switch='sargable_casefold=on';
create table t1 (
s1 varchar(15) collate utf8mb3_bin,
s2 varchar(15) collate utf8mb3_general_ci
) partition by key (s2) partitions 4;
insert into t1 values ('aa','aa'),('bb','bb');
explain format=json select * from t1 where upper(s2)='AA';
explain format=json delete from t1 where upper(s2)='AA';
explain format=json update t1 set s1='aaa' where upper(s2)='AA';
drop table t1;
set optimizer_switch=@tmp_switch_sarg_casefold;
...@@ -2499,7 +2499,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior ...@@ -2499,7 +2499,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior
NUMERIC_MIN_VALUE NULL NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,sargable_casefold,default
READ_ONLY NO READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_TRACE VARIABLE_NAME OPTIMIZER_TRACE
......
...@@ -2699,7 +2699,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior ...@@ -2699,7 +2699,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior
NUMERIC_MIN_VALUE NULL NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,sargable_casefold,default
READ_ONLY NO READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_TRACE VARIABLE_NAME OPTIMIZER_TRACE
......
...@@ -26,6 +26,7 @@ outer_join_with_cache on ...@@ -26,6 +26,7 @@ outer_join_with_cache on
partial_match_rowid_merge on partial_match_rowid_merge on
partial_match_table_scan on partial_match_table_scan on
rowid_filter on rowid_filter on
sargable_casefold on
semijoin on semijoin on
semijoin_with_cache on semijoin_with_cache on
split_materialized on split_materialized on
......
...@@ -113,6 +113,7 @@ SET (SQL_SOURCE ...@@ -113,6 +113,7 @@ SET (SQL_SOURCE
../sql-common/client_plugin.c ../sql-common/client_plugin.c
opt_range.cc opt_range.cc
opt_rewrite_date_cmp.cc opt_rewrite_date_cmp.cc
opt_rewrite_remove_casefold.cc
opt_sum.cc opt_sum.cc
../sql-common/pack.c parse_file.cc password.c procedure.cc ../sql-common/pack.c parse_file.cc password.c procedure.cc
protocol.cc records.cc repl_failsafe.cc rpl_filter.cc protocol.cc records.cc repl_failsafe.cc rpl_filter.cc
......
...@@ -2514,6 +2514,8 @@ class Item :public Value_source, ...@@ -2514,6 +2514,8 @@ class Item :public Value_source,
{ return this; } { return this; }
virtual Item *multiple_equality_transformer(THD *thd, uchar *arg) virtual Item *multiple_equality_transformer(THD *thd, uchar *arg)
{ return this; } { return this; }
virtual Item* varchar_upper_cmp_transformer(THD *thd, uchar *arg)
{ return this; }
virtual Item* date_conds_transformer(THD *thd, uchar *arg) virtual Item* date_conds_transformer(THD *thd, uchar *arg)
{ return this; } { return this; }
virtual bool expr_cache_is_needed(THD *) { return FALSE; } virtual bool expr_cache_is_needed(THD *) { return FALSE; }
......
...@@ -790,6 +790,7 @@ class Item_func_eq :public Item_bool_rowready_func2 ...@@ -790,6 +790,7 @@ class Item_func_eq :public Item_bool_rowready_func2
{ return get_item_copy<Item_func_eq>(thd, this); } { return get_item_copy<Item_func_eq>(thd, this); }
Item* date_conds_transformer(THD *thd, uchar *arg) override Item* date_conds_transformer(THD *thd, uchar *arg) override
{ return do_date_conds_transformation(thd, this); } { return do_date_conds_transformation(thd, this); }
Item* varchar_upper_cmp_transformer(THD *thd, uchar *arg) override;
}; };
class Item_func_equal final :public Item_bool_rowready_func2 class Item_func_equal final :public Item_bool_rowready_func2
...@@ -2632,6 +2633,7 @@ class Item_func_in :public Item_func_opt_neg, ...@@ -2632,6 +2633,7 @@ class Item_func_in :public Item_func_opt_neg,
Item *in_predicate_to_in_subs_transformer(THD *thd, uchar *arg) override; Item *in_predicate_to_in_subs_transformer(THD *thd, uchar *arg) override;
Item *in_predicate_to_equality_transformer(THD *thd, uchar *arg) override; Item *in_predicate_to_equality_transformer(THD *thd, uchar *arg) override;
uint32 max_length_of_left_expr(); uint32 max_length_of_left_expr();
Item* varchar_upper_cmp_transformer(THD *thd, uchar *arg) override;
}; };
class cmp_item_row :public cmp_item class cmp_item_row :public cmp_item
......
#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation // gcc: Class implementation
#endif
#include "mariadb.h"
#include "sql_priv.h"
#include <m_ctype.h>
#include "sql_partition.h"
#include "sql_select.h"
#include "opt_trace.h"
/*
@brief
Check if passed item is "UCASE(table.colX)" where colX is either covered
by some index or is a part of partition expression.
@return
Argument of the UCASE if passed item matches
NULL otherwise.
*/
static Item* is_upper_key_col(Item *item)
{
Item_func_ucase *item_func;
if ((item_func= dynamic_cast<Item_func_ucase*>(item)))
{
Item *arg= item_func->arguments()[0];
Item *arg_real= arg->real_item();
if (arg_real->type() == Item::FIELD_ITEM)
{
if (dynamic_cast<const Type_handler_longstr*>(arg_real->type_handler()))
{
Field *field= ((Item_field*)arg_real)->field;
bool appl= (field->flags & PART_KEY_FLAG);
#ifdef WITH_PARTITION_STORAGE_ENGINE
partition_info *part_info;
if (!appl && ((part_info= field->table->part_info)))
{
appl= bitmap_is_set(&part_info->full_part_field_set,
field->field_index);
}
#endif
if (appl)
{
/*
Make sure COERCIBILITY(UPPER(col))=COERCIBILITY(col)
*/
DBUG_ASSERT(arg->collation.derivation ==
item_func->collation.derivation);
/* Return arg, not arg_real. Do not walk into Item_ref objects */
return arg;
}
}
}
}
return nullptr;
}
static void trace_upper_removal_rewrite(THD *thd, Item *old_item, Item *new_item)
{
Json_writer_object trace_wrapper(thd);
Json_writer_object obj(thd, "sargable_casefold_removal");
obj.add("before", old_item)
.add("after", new_item);
}
/*
@brief
Rewrite UPPER(key_varchar_col) = expr into key_varchar_col=expr
@detail
UPPER() may occur on both sides of the equality.
UCASE() is a synonym of UPPER() so we handle it, too.
*/
Item* Item_func_eq::varchar_upper_cmp_transformer(THD *thd, uchar *arg)
{
if (cmp.compare_type() == STRING_RESULT &&
cmp.compare_collation()->state & MY_CS_UPPER_EQUAL_AS_EQUAL)
{
Item *arg0= arguments()[0];
Item *arg1= arguments()[1];
bool do_rewrite= false;
Item *tmp;
// Try rewriting the left argument
if ((tmp= is_upper_key_col(arguments()[0])))
{
arg0= tmp;
do_rewrite= true;
}
// Try rewriting the right argument
if ((tmp= is_upper_key_col(arguments()[1])))
{
arg1=tmp;
do_rewrite= true;
}
if (do_rewrite)
{
Item *res= new (thd->mem_root) Item_func_eq(thd, arg0, arg1);
if (res && !res->fix_fields(thd, &res))
{
trace_upper_removal_rewrite(thd, this, res);
return res;
}
}
}
return this;
}
/*
@brief
Rewrite "UPPER(key_col) IN (const-list)" into "key_col IN (const-list)"
*/
Item* Item_func_in::varchar_upper_cmp_transformer(THD *thd, uchar *arg)
{
if (arg_types_compatible &&
m_comparator.cmp_type() == STRING_RESULT &&
cmp_collation.collation->state & MY_CS_UPPER_EQUAL_AS_EQUAL &&
all_items_are_consts(args + 1, arg_count - 1))
{
Item *arg0= arguments()[0];
Item *tmp;
if ((tmp= is_upper_key_col(arg0)))
{
Item_func_in *cl= (Item_func_in*)build_clone(thd);
Item *res;
cl->arguments()[0]= tmp;
cl->walk(&Item::cleanup_excluding_const_fields_processor, 0, 0);
res= cl;
if (res->fix_fields(thd, &res))
return this;
trace_upper_removal_rewrite(thd, this, res);
return res;
}
}
return this;
}
...@@ -476,6 +476,12 @@ bool Sql_cmd_delete::delete_from_single_table(THD *thd) ...@@ -476,6 +476,12 @@ bool Sql_cmd_delete::delete_from_single_table(THD *thd)
(uchar *) 0); (uchar *) 0);
} }
if (conds && optimizer_flag(thd, OPTIMIZER_SWITCH_SARGABLE_CASEFOLD))
{
conds= conds->top_level_transform(thd, &Item::varchar_upper_cmp_transformer,
(uchar *) 0);
}
#ifdef WITH_PARTITION_STORAGE_ENGINE #ifdef WITH_PARTITION_STORAGE_ENGINE
if (prune_partitions(thd, table, conds)) if (prune_partitions(thd, table, conds))
{ {
......
...@@ -239,6 +239,7 @@ ...@@ -239,6 +239,7 @@
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34) #define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35) #define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
#define OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY (1ULL << 36) #define OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY (1ULL << 36)
#define OPTIMIZER_SWITCH_SARGABLE_CASEFOLD (1ULL << 37)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ #define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
...@@ -270,7 +271,8 @@ ...@@ -270,7 +271,8 @@
OPTIMIZER_SWITCH_USE_ROWID_FILTER | \ OPTIMIZER_SWITCH_USE_ROWID_FILTER | \
OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING | \ OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING | \
OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE |\ OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE |\
OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY) OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY |\
OPTIMIZER_SWITCH_SARGABLE_CASEFOLD)
/* /*
Replication uses 8 bytes to store SQL_MODE in the binary log. The day you Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
use strictly more than 64 bits by adding one more define above, you should use strictly more than 64 bits by adding one more define above, you should
......
...@@ -2342,6 +2342,12 @@ JOIN::optimize_inner() ...@@ -2342,6 +2342,12 @@ JOIN::optimize_inner()
if (thd->lex->are_date_funcs_used()) if (thd->lex->are_date_funcs_used())
transform_date_conds_into_sargable(); transform_date_conds_into_sargable();
if (optimizer_flag(thd, OPTIMIZER_SWITCH_SARGABLE_CASEFOLD))
{
transform_all_conds_and_on_exprs(
thd, &Item::varchar_upper_cmp_transformer);
}
conds= optimize_cond(this, conds, join_list, ignore_on_expr, conds= optimize_cond(this, conds, join_list, ignore_on_expr,
&cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS); &cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS);
...@@ -440,6 +440,12 @@ bool Sql_cmd_update::update_single_table(THD *thd) ...@@ -440,6 +440,12 @@ bool Sql_cmd_update::update_single_table(THD *thd)
(uchar *) 0); (uchar *) 0);
} }
if (conds && optimizer_flag(thd, OPTIMIZER_SWITCH_SARGABLE_CASEFOLD))
{
conds= conds->top_level_transform(thd, &Item::varchar_upper_cmp_transformer,
(uchar *) 0);
}
// Don't count on usage of 'only index' when calculating which key to use // Don't count on usage of 'only index' when calculating which key to use
table->covering_keys.clear_all(); table->covering_keys.clear_all();
transactional_table= table->file->has_transactions_and_rollback(); transactional_table= table->file->has_transactions_and_rollback();
......
...@@ -2893,6 +2893,7 @@ export const char *optimizer_switch_names[]= ...@@ -2893,6 +2893,7 @@ export const char *optimizer_switch_names[]=
"condition_pushdown_from_having", "condition_pushdown_from_having",
"not_null_range_scan", "not_null_range_scan",
"hash_join_cardinality", "hash_join_cardinality",
"sargable_casefold",
"default", "default",
NullS NullS
}; };
......
...@@ -1243,7 +1243,8 @@ MY_CHARSET_HANDLER my_charset_utf8mb3_handler= ...@@ -1243,7 +1243,8 @@ MY_CHARSET_HANDLER my_charset_utf8mb3_handler=
struct charset_info_st my_charset_utf8mb3_general_ci= struct charset_info_st my_charset_utf8mb3_general_ci=
{ {
33,0,0, /* number */ 33,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, /* state */ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|
MY_CS_UPPER_EQUAL_AS_EQUAL, /* state */
{ charset_name_utf8mb3, charset_name_utf8mb3_length }, /* cs name */ { charset_name_utf8mb3, charset_name_utf8mb3_length }, /* cs name */
{ STRING_WITH_LEN(MY_UTF8MB3 "_general_ci") }, /* name */ { STRING_WITH_LEN(MY_UTF8MB3 "_general_ci") }, /* name */
"", /* comment */ "", /* comment */
...@@ -3573,7 +3574,8 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler= ...@@ -3573,7 +3574,8 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
struct charset_info_st my_charset_utf8mb4_general_ci= struct charset_info_st my_charset_utf8mb4_general_ci=
{ {
45,0,0, /* number */ 45,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */ MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|
MY_CS_UNICODE_SUPPLEMENT|MY_CS_UPPER_EQUAL_AS_EQUAL, /* state */
{ charset_name_utf8mb4, charset_name_utf8mb4_length}, /* cs name */ { charset_name_utf8mb4, charset_name_utf8mb4_length}, /* cs name */
{ STRING_WITH_LEN(MY_UTF8MB4_GENERAL_CI) }, /* name */ { STRING_WITH_LEN(MY_UTF8MB4_GENERAL_CI) }, /* name */
"UTF-8 Unicode", /* comment */ "UTF-8 Unicode", /* comment */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment