Commit 9fe9fb68 authored by Vicențiu Ciorbaru's avatar Vicențiu Ciorbaru

MDEV-10859: Wrong result of aggregate window function in query with HAVING and no ORDER BY

Window functions need to be computed after applying the HAVING clause.
An optimization that we have for regular, non-window function, cases is
to apply having only during sending of the rows to the client. This
allows rows that should be filtered from the temporary table used to
store aggregation results to be stored there.

This behaviour is undesireable for window functions, as we have to
compute window functions on the result-set after HAVING is applied.
Storing extra rows in the table leads to wrong values as the frame
bounds might capture those -to be filtered afterwards- rows.
parent a90066b1
......@@ -2925,3 +2925,36 @@ WHERE EXISTS ( SELECT * FROM t2 WHERE c IN ( SELECT MAX(d) FROM t3 ) );
a MAX(a) AVG(a) OVER (PARTITION BY b)
NULL NULL NULL
DROP TABLE t1,t2,t3;
#
# MDEV-10859: Wrong result of aggregate window function in query
# with HAVING and no ORDER BY
#
create table empsalary (depname varchar(32), empno smallint primary key, salary int);
insert into empsalary values
('develop', 1, 5000), ('develop', 2, 4000),('sales', 3, '6000'),('sales', 4, 5000);
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary;
depname empno salary avg(salary) OVER (PARTITION BY depname)
develop 1 5000 4500.0000
develop 2 4000 4500.0000
sales 3 6000 5500.0000
sales 4 5000 5500.0000
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname;
depname empno salary avg(salary) OVER (PARTITION BY depname)
develop 1 5000 4500.0000
develop 2 4000 4500.0000
sales 3 6000 5500.0000
sales 4 5000 5500.0000
#
# These last 2 should have the same row results, ignoring order.
#
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary HAVING empno > 1;
depname empno salary avg(salary) OVER (PARTITION BY depname)
develop 2 4000 4000.0000
sales 3 6000 5500.0000
sales 4 5000 5500.0000
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary HAVING empno > 1 ORDER BY depname;
depname empno salary avg(salary) OVER (PARTITION BY depname)
develop 2 4000 4000.0000
sales 3 6000 5500.0000
sales 4 5000 5500.0000
drop table empsalary;
......@@ -1711,3 +1711,26 @@ SELECT a, MAX(a), AVG(a) OVER (PARTITION BY b) FROM t1
WHERE EXISTS ( SELECT * FROM t2 WHERE c IN ( SELECT MAX(d) FROM t3 ) );
DROP TABLE t1,t2,t3;
--echo #
--echo # MDEV-10859: Wrong result of aggregate window function in query
--echo # with HAVING and no ORDER BY
--echo #
create table empsalary (depname varchar(32), empno smallint primary key, salary int);
insert into empsalary values
('develop', 1, 5000), ('develop', 2, 4000),('sales', 3, '6000'),('sales', 4, 5000);
--sorted_result
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary;
--sorted_result
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname;
--echo #
--echo # These last 2 should have the same row results, ignoring order.
--echo #
--sorted_result
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary HAVING empno > 1;
--sorted_result
SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary HAVING empno > 1 ORDER BY depname;
drop table empsalary;
......@@ -2411,13 +2411,18 @@ bool JOIN::make_aggr_tables_info()
If having is not handled here, it will be checked before the row
is sent to the client.
In the case of window functions however, we *must* make sure to not
store any rows which don't match HAVING within the temp table,
as rows will end up being used during their computation.
*/
if (having &&
(sort_and_group || (exec_tmp_table->distinct && !group_list)))
(sort_and_group || (exec_tmp_table->distinct && !group_list) ||
select_lex->have_window_funcs()))
{
// Attach HAVING to tmp table's condition
/* Attach HAVING to tmp table's condition */
curr_tab->having= having;
having= NULL; // Already done
having= NULL; /* Already done */
}
/* Change sum_fields reference to calculated fields in tmp_table */
......
......@@ -2840,6 +2840,12 @@ bool Window_funcs_computation::setup(THD *thd,
order_window_funcs_by_window_specs(window_funcs);
SQL_SELECT *sel= NULL;
/*
If the tmp table is filtered during sorting
(ex: SELECT with HAVING && ORDER BY), we must make sure to keep the
filtering conditions when we perform sorting for window function
computation.
*/
if (tab->filesort && tab->filesort->select)
{
sel= tab->filesort->select;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment