Commit c4defdc8 authored by Sergey Petrunya's avatar Sergey Petrunya

MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table

- Don't pull out a table out of a semi-join if it is on the inner side of an outer join.
- Make join->sort_by_table= get_sort_by_table(...) call after const table detection 
  is done. That way, the value of join->sort_by_table will match the actual execution.
  Which will allow the code in setup_semijoin_dups_elimination() (search for 
  "Make sure that possible sorting of rows from the head table is not to be employed." 
  to see that "Using filesort" is going to be used together with Duplicate Elimination (
  and change it to Using temporary + Using filesort)
parent 8af289d2
...@@ -2920,4 +2920,37 @@ COUNT(*) ...@@ -2920,4 +2920,37 @@ COUNT(*)
DROP TABLE t1, t2, t3; DROP TABLE t1, t2, t3;
set join_buffer_size = @tmp_join_buffer_size; set join_buffer_size = @tmp_join_buffer_size;
set max_heap_table_size = @tmp_max_heap_table_size; set max_heap_table_size = @tmp_max_heap_table_size;
#
# MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table
#
select @@optimizer_switch;
@@optimizer_switch
index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=off,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=off,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on,extended_keys=off
select @@join_cache_level;
@@join_cache_level
1
CREATE TABLE t1 (pk INT PRIMARY KEY, c1 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1,'v'),(2,'v'),(3,'c'),(4,NULL),(5,'x');
CREATE TABLE t2 (c2 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t2 VALUES ('x');
CREATE TABLE t3 (c3 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t3 VALUES ('x'),('d');
SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1;
pk c1 c2
4 NULL x
3 c x
1 v x
2 v x
5 x x
# This should show that "t1 left join t3" is still in the semi-join nest:
EXPLAIN EXTENDED
SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 system NULL NULL NULL NULL 1 100.00 Using temporary; Using filesort
1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 5 100.00 Start temporary
1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t1.pk 1 100.00
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 Using where; End temporary
Warnings:
Note 1003 select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`c1` AS `c1`,'x' AS `c2` from `test`.`t1` semi join (`test`.`t1` left join `test`.`t3` on((`test`.`t1`.`c1` = `test`.`t3`.`c3`))) where (`test`.`t1`.`pk` = `test`.`t1`.`pk`) order by 'x',`test`.`t1`.`c1`
DROP TABLE t1,t2,t3;
set optimizer_switch=@subselect_sj_tmp; set optimizer_switch=@subselect_sj_tmp;
...@@ -2934,6 +2934,39 @@ COUNT(*) ...@@ -2934,6 +2934,39 @@ COUNT(*)
DROP TABLE t1, t2, t3; DROP TABLE t1, t2, t3;
set join_buffer_size = @tmp_join_buffer_size; set join_buffer_size = @tmp_join_buffer_size;
set max_heap_table_size = @tmp_max_heap_table_size; set max_heap_table_size = @tmp_max_heap_table_size;
#
# MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table
#
select @@optimizer_switch;
@@optimizer_switch
index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=off,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=off,mrr_sort_keys=on,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=off,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=off
select @@join_cache_level;
@@join_cache_level
6
CREATE TABLE t1 (pk INT PRIMARY KEY, c1 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1,'v'),(2,'v'),(3,'c'),(4,NULL),(5,'x');
CREATE TABLE t2 (c2 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t2 VALUES ('x');
CREATE TABLE t3 (c3 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t3 VALUES ('x'),('d');
SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1;
pk c1 c2
4 NULL x
3 c x
1 v x
2 v x
5 x x
# This should show that "t1 left join t3" is still in the semi-join nest:
EXPLAIN EXTENDED
SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 system NULL NULL NULL NULL 1 100.00 Using temporary; Using filesort
1 PRIMARY t1 ALL PRIMARY NULL NULL NULL 5 100.00 Start temporary
1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t1.pk 1 100.00 Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 Using where; End temporary; Using join buffer (incremental, BNL join)
Warnings:
Note 1003 select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`c1` AS `c1`,'x' AS `c2` from `test`.`t1` semi join (`test`.`t1` left join `test`.`t3` on((`test`.`t1`.`c1` = `test`.`t3`.`c3`))) where (`test`.`t1`.`pk` = `test`.`t1`.`pk`) order by 'x',`test`.`t1`.`c1`
DROP TABLE t1,t2,t3;
set optimizer_switch=@subselect_sj_tmp; set optimizer_switch=@subselect_sj_tmp;
# #
# BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off # BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
......
...@@ -2622,5 +2622,26 @@ DROP TABLE t1, t2, t3; ...@@ -2622,5 +2622,26 @@ DROP TABLE t1, t2, t3;
set join_buffer_size = @tmp_join_buffer_size; set join_buffer_size = @tmp_join_buffer_size;
set max_heap_table_size = @tmp_max_heap_table_size; set max_heap_table_size = @tmp_max_heap_table_size;
--echo #
--echo # MDEV-5161: Wrong result (missing rows) with semijoin, LEFT JOIN, ORDER BY, constant table
--echo #
select @@optimizer_switch;
select @@join_cache_level;
CREATE TABLE t1 (pk INT PRIMARY KEY, c1 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1,'v'),(2,'v'),(3,'c'),(4,NULL),(5,'x');
CREATE TABLE t2 (c2 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t2 VALUES ('x');
CREATE TABLE t3 (c3 VARCHAR(1)) ENGINE=MyISAM;
INSERT INTO t3 VALUES ('x'),('d');
SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1;
--echo # This should show that "t1 left join t3" is still in the semi-join nest:
EXPLAIN EXTENDED
SELECT * FROM t1, t2 WHERE pk IN ( SELECT pk FROM t1 LEFT JOIN t3 ON (c1 = c3 ) ) ORDER BY c2, c1;
DROP TABLE t1,t2,t3;
# The following command must be the last one the file # The following command must be the last one the file
set optimizer_switch=@subselect_sj_tmp; set optimizer_switch=@subselect_sj_tmp;
...@@ -1924,11 +1924,25 @@ int pull_out_semijoin_tables(JOIN *join) ...@@ -1924,11 +1924,25 @@ int pull_out_semijoin_tables(JOIN *join)
} }
} }
table_map pulled_tables= 0; table_map pulled_tables= 0;
table_map dep_tables= 0;
if (have_join_nest_children) if (have_join_nest_children)
goto skip; goto skip;
/*
Calculate set of tables within this semi-join nest that have
other dependent tables
*/
child_li.rewind();
while ((tbl= child_li++))
{
TABLE *const table= tbl->table;
if (table &&
(table->reginfo.join_tab->dependent &
sj_nest->nested_join->used_tables))
dep_tables|= table->reginfo.join_tab->dependent;
}
/* Action #1: Mark the constant tables to be pulled out */ /* Action #1: Mark the constant tables to be pulled out */
child_li.rewind(); child_li.rewind();
while ((tbl= child_li++)) while ((tbl= child_li++))
...@@ -1979,7 +1993,8 @@ int pull_out_semijoin_tables(JOIN *join) ...@@ -1979,7 +1993,8 @@ int pull_out_semijoin_tables(JOIN *join)
child_li.rewind(); child_li.rewind();
while ((tbl= child_li++)) while ((tbl= child_li++))
{ {
if (tbl->table && !(pulled_tables & tbl->table->map)) if (tbl->table && !(pulled_tables & tbl->table->map) &&
!(dep_tables & tbl->table->map))
{ {
if (find_eq_ref_candidate(tbl->table, if (find_eq_ref_candidate(tbl->table,
sj_nest->nested_join->used_tables & sj_nest->nested_join->used_tables &
......
...@@ -241,7 +241,8 @@ static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array, ...@@ -241,7 +241,8 @@ static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
List<Item> &all_fields, List<Item> &all_fields,
bool *all_order_by_fields_used); bool *all_order_by_fields_used);
static bool test_if_subpart(ORDER *a,ORDER *b); static bool test_if_subpart(ORDER *a,ORDER *b);
static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables); static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables,
table_map const_tables);
static void calc_group_buffer(JOIN *join,ORDER *group); static void calc_group_buffer(JOIN *join,ORDER *group);
static bool make_group_fields(JOIN *main_join, JOIN *curr_join); static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
static bool alloc_group_fields(JOIN *join,ORDER *group); static bool alloc_group_fields(JOIN *join,ORDER *group);
...@@ -1211,7 +1212,8 @@ JOIN::optimize() ...@@ -1211,7 +1212,8 @@ JOIN::optimize()
goto setup_subq_exit; goto setup_subq_exit;
} }
error= -1; // Error is sent to client error= -1; // Error is sent to client
sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables); /* get_sort_by_table() call used to be here: */
MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table));
/* Calculate how to do the join */ /* Calculate how to do the join */
thd_proc_info(thd, "statistics"); thd_proc_info(thd, "statistics");
...@@ -3583,6 +3585,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, ...@@ -3583,6 +3585,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
} }
} while (join->const_table_map & found_ref && ref_changed); } while (join->const_table_map & found_ref && ref_changed);
join->sort_by_table= get_sort_by_table(join->order, join->group_list,
join->select_lex->leaf_tables,
join->const_table_map);
/* /*
Update info on indexes that can be used for search lookups as Update info on indexes that can be used for search lookups as
reading const tables may has added new sargable predicates. reading const tables may has added new sargable predicates.
...@@ -20730,7 +20735,8 @@ test_if_subpart(ORDER *a,ORDER *b) ...@@ -20730,7 +20735,8 @@ test_if_subpart(ORDER *a,ORDER *b)
*/ */
static TABLE * static TABLE *
get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables) get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables,
table_map const_tables)
{ {
TABLE_LIST *table; TABLE_LIST *table;
List_iterator<TABLE_LIST> ti(tables); List_iterator<TABLE_LIST> ti(tables);
...@@ -20744,6 +20750,23 @@ get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables) ...@@ -20744,6 +20750,23 @@ get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables)
for (; a && b; a=a->next,b=b->next) for (; a && b; a=a->next,b=b->next)
{ {
/* Skip elements of a that are constant */
while (!((*a->item)->used_tables() & ~const_tables))
{
if (!(a= a->next))
break;
}
/* Skip elements of b that are constant */
while (!((*b->item)->used_tables() & ~const_tables))
{
if (!(b= b->next))
break;
}
if (!a || !b)
break;
if (!(*a->item)->eq(*b->item,1)) if (!(*a->item)->eq(*b->item,1))
DBUG_RETURN(0); DBUG_RETURN(0);
map|=a->item[0]->used_tables(); map|=a->item[0]->used_tables();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment