Commit e2535dcc authored by Igor Babaev's avatar Igor Babaev

MDEV-17382 Hash join algorithm should not be used to join materialized

           derived table / view by equality

Now rows of a materialized derived table are always put into a
temporary table before join operation. If BNLH is used to join this
table with the result of a partial join then both operands of the
join are actually put into main memory. In most cases this is not
efficient.
We could avoid this by sending the rows of the derived table directly
to the join operation. However this kind of data flow is not supported
yet.
Fixed by not allowing usage of hash join algorithm to join a materialized
derived table if it's joined by an equality predicate of the form
f=e where f is a field of the derived table.

Change for the test case in 10.3: splitting must be turned off to preserve
the explain.
parent fbee3141
......@@ -499,9 +499,45 @@ where
D1.a= t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 10 Using where
1 PRIMARY <derived2> hash_ALL key0 #hash#key0 5 test.t1.a 100 Using join buffer (flat, BNLH join)
1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 10
2 DERIVED t2 ALL NULL NULL NULL NULL 100 Using filesort
set join_cache_level=@tmp_jcl;
set optimizer_switch=@tmp_os;
drop table t1, t2;
#
# Bug mdev-17382: equi-join of derived table with join_cache_level=4
#
CREATE TABLE t1 (
id int NOT NULL,
amount decimal DEFAULT NULL,
PRIMARY KEY (id)
);
CREATE TABLE t2 (
id int NOT NULL,
name varchar(50) DEFAULT NULL,
PRIMARY KEY (id)
);
INSERT INTO t1 VALUES
(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000),
(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000);
INSERT INTO t2 VALUES
(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL),
(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL);
set @save_optimizer_switch= @@optimizer_switch;
set optimizer_switch='split_materialized=off';
set join_cache_level=4;
EXPLAIN
SELECT t2.id,t2.name,t.total_amt
FROM t2
LEFT JOIN
(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t
ON t2.id=t.id
WHERE t2.id < 3;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 range PRIMARY PRIMARY 4 NULL 3 Using index condition
1 PRIMARY <derived2> ref key0 key0 5 test.t2.id 2
2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using temporary; Using filesort
set join_cache_level=default;
set optimizer_switch= @save_optimizer_switch;
DROP TABLE t1,t2;
set optimizer_switch=@exit_optimizer_switch;
......@@ -363,5 +363,48 @@ set join_cache_level=@tmp_jcl;
set optimizer_switch=@tmp_os;
drop table t1, t2;
--echo #
--echo # Bug mdev-17382: equi-join of derived table with join_cache_level=4
--echo #
CREATE TABLE t1 (
id int NOT NULL,
amount decimal DEFAULT NULL,
PRIMARY KEY (id)
);
CREATE TABLE t2 (
id int NOT NULL,
name varchar(50) DEFAULT NULL,
PRIMARY KEY (id)
);
INSERT INTO t1 VALUES
(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000),
(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000);
INSERT INTO t2 VALUES
(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL),
(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL);
set @save_optimizer_switch= @@optimizer_switch;
set optimizer_switch='split_materialized=off';
set join_cache_level=4;
EXPLAIN
SELECT t2.id,t2.name,t.total_amt
FROM t2
LEFT JOIN
(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t
ON t2.id=t.id
WHERE t2.id < 3;
set join_cache_level=default;
set optimizer_switch= @save_optimizer_switch;
DROP TABLE t1,t2;
# The following command must be the last one the file
set optimizer_switch=@exit_optimizer_switch;
......@@ -226,7 +226,7 @@ set join_cache_level=3;
explain SELECT 1 FROM (SELECT url, id FROM t2 LIMIT 1 OFFSET 20) derived RIGHT JOIN t1 ON t1.id = derived.id;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL #
1 PRIMARY <derived2> hash_ALL key0 #hash#key0 25 test.t1.id # Using join buffer (flat, BNLH join)
1 PRIMARY <derived2> ref key0 key0 25 test.t1.id #
2 DERIVED t2 ALL NULL NULL NULL NULL #
set join_cache_level= @tmp_mdev5037;
drop table t0,t1,t2;
......
......@@ -11618,7 +11618,15 @@ uint check_join_cache_usage(JOIN_TAB *tab,
effort now.
*/
if (tab->table->pos_in_table_list->is_materialized_derived())
{
no_bka_cache= true;
/*
Don't use hash join algorithm if the temporary table for the rows
of the derived table will be created with an equi-join key.
*/
if (tab->table->s->keys)
no_hashed_cache= true;
}
/*
Don't use join buffering if we're dictated not to by no_jbuf_after
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment