Commit 00412656 authored by Igor Babaev's avatar Igor Babaev

MDEV-27510 Query returns wrong result when using split optimization

This bug may affect the queries that uses a grouping derived table with
grouping list containing references to columns from different tables if
the optimizer decides to employ the split optimization for the derived
table. In some very specific cases it may affect queries with a grouping
derived table that refers only one base table.
This bug was caused by an improper fix for the bug MDEV-25128. The fix
tried to get rid of the equality conditions pushed into the where clause
of the grouping derived table T to which the split optimization had been
applied. The fix erroneously assumed that only those pushed equalities
that were used for ref access of the tables referenced by T were needed.
In fact the function remove_const() that figures out what columns from the
group list can be removed if the split optimization is applied can uses
other pushed equalities as well.
This patch actually provides a proper fix for MDEV-25128. Rather than
trying to remove invalid pushed equalities referencing the fields of SJM
tables with a look-up access the patch attempts not to push such equalities.

Approved by Oleksandr Byelkin <sanja@mariadb.com>
parent da37bfd8
...@@ -3711,4 +3711,146 @@ set optimizer_switch='split_materialized=default'; ...@@ -3711,4 +3711,146 @@ set optimizer_switch='split_materialized=default';
DROP TABLE t1,t2,t3; DROP TABLE t1,t2,t3;
--echo #
--echo # MDEV-27510: Splittable derived with grouping over two tables
--echo #
CREATE TABLE ledgers (
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(32)
) ENGINE=MyISAM;
CREATE TABLE charges (
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
from_ledger_id BIGINT UNSIGNED NOT NULL,
to_ledger_id BIGINT UNSIGNED NOT NULL,
amount INT NOT NULL,
KEY fk_charge_from_ledger (from_ledger_id),
KEY fk_charge_to_ledger (to_ledger_id)
) ENGINE=MyISAM;
CREATE TABLE transactions (
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
ledger_id BIGINT UNSIGNED NOT NULL,
KEY fk_transactions_ledger (ledger_id)
) ENGINE=MyISAM;
CREATE TABLE transaction_items (
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
transaction_id BIGINT UNSIGNED NOT NULL,
charge_id BIGINT UNSIGNED,
amount INT NOT NULL,
KEY fk_items_transaction (transaction_id),
KEY fk_items_charge (charge_id)
) ENGINE=MyISAM;
INSERT INTO ledgers (id, name) VALUES
(1, 'Anna'), (2, 'John'), (3, 'Fred');
INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES
(1, 2, 1, 200), (2, 1, 2, 330), (3, 1, 2, 640), (4, 3, 1, 640), (5, 3, 2, 1000),
(6, 3, 1, 660), (7, 2, 3, 650), (8, 3, 2, 160), (9, 2, 1, 740), (10, 3, 2, 310),
(11, 2, 1, 640), (12, 3, 2, 240), (13, 3, 2, 340), (14, 2, 1, 720),
(15, 2, 3, 100),
(16, 2, 3, 980), (17, 2, 1, 80), (18, 1, 2, 760), (19, 2, 3, 740),
(20, 2, 1, 990);
INSERT INTO transactions (id, ledger_id) VALUES
(2, 1), (3, 1), (5, 1), (8, 1), (12, 1), (18, 1), (22, 1), (28, 1),
(34, 1), (35, 1),
(40, 1), (1, 2), (4, 2), (6, 2), (10, 2), (13, 2), (16, 2), (17, 2),
(20, 2), (21, 2),
(24, 2), (26, 2), (27, 2), (29, 2), (31, 2), (33, 2), (36, 2), (37, 2),
(39, 2), (7, 3),
(9, 3), (11, 3), (14, 3), (15, 3), (19, 3), (23, 3), (25, 3), (30, 3),
(32, 3), (38, 3);
INSERT INTO transaction_items (id, transaction_id, charge_id, amount) VALUES
(1, 1, 1, -200), (2, 2, 1, 200), (3, 3, 2, -330), (4, 4, 2, 330),
(5, 5, 3, -640),
(6, 6, 3, 640), (7, 7, 4, -640), (8, 8, 4, 640), (9, 9, 5, -1000),
(10, 10, 5, 1000),
(11, 11, 6, -660), (12, 12, 6, 660), (13, 13, 7, -650), (14, 14, 7, 650),
(15, 15, 8, -160),
(16, 16, 8, 160), (17, 17, 9, -740), (18, 18, 9, 740), (19, 19, 10, -310),
(20, 20, 10, 310),
(21, 21, 11, -640), (22, 22, 11, 640), (23, 23, 12, -240), (24, 24, 12, 240),
(25, 25, 13, -340),
(26, 26, 13, 340), (27, 27, 14, -720), (28, 28, 14, 720), (29, 29, 15, -100),
(30, 30, 15, 100),
(31, 31, 16, -980), (32, 32, 16, 980), (33, 33, 17, -80), (34, 34, 17, 80),
(35, 35, 18, -760),
(36, 36, 18, 760), (37, 37, 19, -740), (38, 38, 19, 740), (39, 39, 20, -990),
(40, 40, 20, 990);
ANALYZE TABLE ledgers, charges, transactions, transaction_items;
let $q=
SELECT
charges.id,
charges.from_ledger_id,
charges.to_ledger_id,
from_agg_items.num_rows AS from_num_rows
FROM charges
INNER JOIN (
SELECT
transactions.ledger_id,
transaction_items.charge_id,
count(*) as num_rows
FROM transaction_items
INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
GROUP BY transactions.ledger_id, transaction_items.charge_id
) AS from_agg_items
ON from_agg_items.charge_id = charges.id AND
from_agg_items.ledger_id = charges.from_ledger_id
WHERE charges.to_ledger_id = 2;
set optimizer_switch='split_materialized=on';
eval $q;
eval EXPLAIN $q;
eval EXPLAIN FORMAT=JSON $q;
set optimizer_switch='split_materialized=off';
eval $q;
eval EXPLAIN $q;
INSERT INTO charges (id, from_ledger_id, to_ledger_id, amount) VALUES
(101, 4, 2, 100), (102, 7, 2, 200);
let $q1=
SELECT
charges.id,
charges.from_ledger_id,
charges.to_ledger_id,
from_agg_items.num_rows AS from_num_rows
FROM charges
LEFT JOIN (
SELECT
transactions.ledger_id,
transaction_items.charge_id,
count(*) as num_rows
FROM transaction_items
INNER JOIN transactions ON transaction_items.transaction_id = transactions.id
GROUP BY transactions.ledger_id, transaction_items.charge_id
) AS from_agg_items
ON from_agg_items.charge_id = charges.id AND
from_agg_items.ledger_id = charges.from_ledger_id
WHERE charges.to_ledger_id = 2;
set optimizer_switch='split_materialized=on';
eval $q1;
eval EXPLAIN $q1;
eval EXPLAIN FORMAT=JSON $q1;
set optimizer_switch='split_materialized=off';
eval $q1;
eval EXPLAIN $q1;
set optimizer_switch='split_materialized=default';
DROP TABLE transaction_items;
DROP TABLE transactions;
DROP TABLE charges;
DROP TABLE ledgers;
--echo # End of 10.3 tests --echo # End of 10.3 tests
...@@ -1048,16 +1048,16 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, ...@@ -1048,16 +1048,16 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
Inject equalities for splitting used by the materialization join Inject equalities for splitting used by the materialization join
@param @param
remaining_tables used to filter out the equalities that cannot excluded_tables used to filter out the equalities that cannot
be pushed. be pushed.
@details @details
This function is called by JOIN_TAB::fix_splitting that is used This function injects equalities pushed into a derived table T for which
to fix the chosen splitting of a splittable materialized table T the split optimization has been chosen by the optimizer. The function
in the final query execution plan. In this plan the table T is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op().
is joined just before the 'remaining_tables'. So all equalities All equalities usable for splitting T whose right parts do not depend on
usable for splitting whose right parts do not depend on any of any of the 'excluded_tables' can be pushed into the where clause of the
remaining tables can be pushed into join for T. derived table T.
The function also marks the select that specifies T as The function also marks the select that specifies T as
UNCACHEABLE_DEPENDENT_INJECTED. UNCACHEABLE_DEPENDENT_INJECTED.
...@@ -1066,7 +1066,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, ...@@ -1066,7 +1066,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
true on failure true on failure
*/ */
bool JOIN::inject_best_splitting_cond(table_map remaining_tables) bool JOIN::inject_best_splitting_cond(table_map excluded_tables)
{ {
Item *inj_cond= 0; Item *inj_cond= 0;
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list; List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
...@@ -1074,7 +1074,7 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables) ...@@ -1074,7 +1074,7 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
KEY_FIELD *added_key_field; KEY_FIELD *added_key_field;
while ((added_key_field= li++)) while ((added_key_field= li++))
{ {
if (remaining_tables & added_key_field->val->used_tables()) if (excluded_tables & added_key_field->val->used_tables())
continue; continue;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root)) if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true; return true;
...@@ -1168,8 +1168,6 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan, ...@@ -1168,8 +1168,6 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
memcpy((char *) md_join->best_positions, memcpy((char *) md_join->best_positions,
(char *) spl_plan->best_positions, (char *) spl_plan->best_positions,
sizeof(POSITION) * md_join->table_count); sizeof(POSITION) * md_join->table_count);
if (md_join->inject_best_splitting_cond(remaining_tables))
return true;
/* /*
This is called for a proper work of JOIN::get_best_combination() This is called for a proper work of JOIN::get_best_combination()
called for the join that materializes T called for the join that materializes T
...@@ -1213,7 +1211,8 @@ bool JOIN::fix_all_splittings_in_plan() ...@@ -1213,7 +1211,8 @@ bool JOIN::fix_all_splittings_in_plan()
if (tab->table->is_splittable()) if (tab->table->is_splittable())
{ {
SplM_plan_info *spl_plan= cur_pos->spl_plan; SplM_plan_info *spl_plan= cur_pos->spl_plan;
if (tab->fix_splitting(spl_plan, all_tables & ~prev_tables, if (tab->fix_splitting(spl_plan,
all_tables & ~prev_tables,
tablenr < const_tables )) tablenr < const_tables ))
return true; return true;
} }
...@@ -1221,3 +1220,44 @@ bool JOIN::fix_all_splittings_in_plan() ...@@ -1221,3 +1220,44 @@ bool JOIN::fix_all_splittings_in_plan()
} }
return false; return false;
} }
/**
@brief
Inject splitting conditions into WHERE of split derived
@details
The function calls JOIN_TAB::inject_best_splitting_cond() for each
materialized derived table T used in this join for which the split
optimization has been chosen by the optimizer. It is done in order to
inject equalities pushed into the where clause of the specification
of T that would be helpful to employ the splitting technique.
@retval
false on success
true on failure
*/
bool JOIN::inject_splitting_cond_for_all_tables_with_split_opt()
{
table_map prev_tables= 0;
table_map all_tables= (table_map(1) << table_count) - 1;
for (uint tablenr= 0; tablenr < table_count; tablenr++)
{
POSITION *cur_pos= &best_positions[tablenr];
JOIN_TAB *tab= cur_pos->table;
prev_tables|= tab->table->map;
if (!(tab->table->is_splittable() && cur_pos->spl_plan))
continue;
SplM_opt_info *spl_opt_info= tab->table->spl_opt_info;
JOIN *join= spl_opt_info->join;
/*
Currently the equalities referencing columns of SJM tables with
look-up access cannot be pushed into materialized derived.
*/
if (join->inject_best_splitting_cond((all_tables & ~prev_tables) |
sjm_lookup_tables))
return true;
}
return false;
}
...@@ -9755,6 +9755,9 @@ bool JOIN::get_best_combination() ...@@ -9755,6 +9755,9 @@ bool JOIN::get_best_combination()
hash_join= FALSE; hash_join= FALSE;
fix_semijoin_strategies_for_picked_join_order(this); fix_semijoin_strategies_for_picked_join_order(this);
if (inject_splitting_cond_for_all_tables_with_split_opt())
DBUG_RETURN(TRUE);
JOIN_TAB_RANGE *root_range; JOIN_TAB_RANGE *root_range;
if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE)) if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE))
...@@ -21863,21 +21866,6 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond, ...@@ -21863,21 +21866,6 @@ make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
cond->marker=3; // Checked when read cond->marker=3; // Checked when read
return (COND*) 0; return (COND*) 0;
} }
/*
If cond is an equality injected for split optimization then
a. when retain_ref_cond == false : cond is removed unconditionally
(cond that supports ref access is removed by the preceding code)
b. when retain_ref_cond == true : cond is removed if it does not
support ref access
*/
if (left_item->type() == Item::FIELD_ITEM &&
is_eq_cond_injected_for_split_opt((Item_func_eq *) cond) &&
(!retain_ref_cond ||
!test_if_ref(root_cond, (Item_field*) left_item,right_item)))
{
cond->marker=3;
return (COND*) 0;
}
} }
cond->marker=2; cond->marker=2;
cond->set_join_tab_idx(join_tab_idx_arg); cond->set_join_tab_idx(join_tab_idx_arg);
......
...@@ -1764,6 +1764,7 @@ class JOIN :public Sql_alloc ...@@ -1764,6 +1764,7 @@ class JOIN :public Sql_alloc
void add_keyuses_for_splitting(); void add_keyuses_for_splitting();
bool inject_best_splitting_cond(table_map remaining_tables); bool inject_best_splitting_cond(table_map remaining_tables);
bool fix_all_splittings_in_plan(); bool fix_all_splittings_in_plan();
bool inject_splitting_cond_for_all_tables_with_split_opt();
bool transform_in_predicates_into_in_subq(THD *thd); bool transform_in_predicates_into_in_subq(THD *thd);
private: private:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment