Commit 64ab10ff authored by Sergei Petrunia's avatar Sergei Petrunia

Initial implementation of RANK() window function

parent 30c9450a
drop table if exists t1,t2; drop table if exists t1,t2;
#
# Check what happens when one attempts to use window function without OVER clause
#
create table t1 (a int, b int);
insert into t1 values (1,1),(2,2);
select row_number() from t1;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1
select rank() from t1;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1
drop table t1;
#
# Check if basic window functions work
#
create table t1(a int, b int, x char(32)); create table t1(a int, b int, x char(32));
insert into t1 values (2, 10, 'xx'); insert into t1 values (2, 10, 'xx');
insert into t1 values (2, 10, 'zz'); insert into t1 values (2, 10, 'zz');
...@@ -39,3 +52,34 @@ pk a b row_number() over (order by a) row_number() over (order by b) ...@@ -39,3 +52,34 @@ pk a b row_number() over (order by a) row_number() over (order by b)
4 13 19 4 2 4 13 19 4 2
5 14 18 5 1 5 14 18 5 1
drop table t1; drop table t1;
#
# Try RANK() function
#
create table t2 (
pk int primary key,
a int
);
insert into t2 values
( 1 , 0),
( 2 , 0),
( 3 , 1),
( 4 , 1),
( 8 , 2),
( 5 , 2),
( 6 , 2),
( 7 , 2),
( 9 , 4),
(10 , 4);
select pk, a, rank() over (order by a) from t2;
pk a rank() over (order by a)
1 0 1
2 0 1
3 1 3
4 1 3
8 2 5
5 2 5
6 2 5
7 2 5
9 4 9
10 4 9
drop table t2;
...@@ -6,6 +6,22 @@ ...@@ -6,6 +6,22 @@
drop table if exists t1,t2; drop table if exists t1,t2;
--enable_warnings --enable_warnings
--echo #
--echo # Check what happens when one attempts to use window function without OVER clause
--echo #
create table t1 (a int, b int);
insert into t1 values (1,1),(2,2);
--error ER_PARSE_ERROR
select row_number() from t1;
--error ER_PARSE_ERROR
select rank() from t1;
drop table t1;
--echo #
--echo # Check if basic window functions work
--echo #
create table t1(a int, b int, x char(32)); create table t1(a int, b int, x char(32));
insert into t1 values (2, 10, 'xx'); insert into t1 values (2, 10, 'xx');
insert into t1 values (2, 10, 'zz'); insert into t1 values (2, 10, 'zz');
...@@ -35,3 +51,27 @@ from t1; ...@@ -35,3 +51,27 @@ from t1;
drop table t1; drop table t1;
--echo #
--echo # Try RANK() function
--echo #
create table t2 (
pk int primary key,
a int
);
insert into t2 values
( 1 , 0),
( 2 , 0),
( 3 , 1),
( 4 , 1),
( 8 , 2),
( 5 , 2),
( 6 , 2),
( 7 , 2),
( 9 , 4),
(10 , 4);
select pk, a, rank() over (order by a) from t2;
drop table t2;
...@@ -109,6 +109,7 @@ class Aggregator : public Sql_alloc ...@@ -109,6 +109,7 @@ class Aggregator : public Sql_alloc
class st_select_lex; class st_select_lex;
class Window_spec;
/** /**
Class Item_sum is the base class used for special expressions that SQL calls Class Item_sum is the base class used for special expressions that SQL calls
...@@ -550,6 +551,9 @@ class Item_sum :public Item_func_or_sum ...@@ -550,6 +551,9 @@ class Item_sum :public Item_func_or_sum
{ {
return trace_unsupported_by_check_vcol_func_processor(func_name()); return trace_unsupported_by_check_vcol_func_processor(func_name());
} }
//psergey-todo: this is ugly:
virtual void setup_window_func(THD *thd, Window_spec *window_spec) {}
}; };
......
...@@ -40,6 +40,30 @@ void Item_window_func::setup_partition_border_check(THD *thd) ...@@ -40,6 +40,30 @@ void Item_window_func::setup_partition_border_check(THD *thd)
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
partition_fields.push_back(tmp); partition_fields.push_back(tmp);
} }
window_func->setup_window_func(thd, window_spec);
}
void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec)
{
/* TODO: move this into Item_window_func? */
for (ORDER *curr = window_spec->order_list.first; curr; curr=curr->next) {
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
orderby_fields.push_back(tmp);
}
clear();
}
bool Item_sum_rank::add()
{
row_number++;
if (test_if_group_changed(orderby_fields) > -1)
{
/* Row value changed */
cur_rank= row_number;
}
return false;
} }
...@@ -48,6 +72,7 @@ void Item_window_func::advance_window() { ...@@ -48,6 +72,7 @@ void Item_window_func::advance_window() {
int changed = test_if_group_changed(partition_fields); int changed = test_if_group_changed(partition_fields);
if (changed > -1) { if (changed > -1) {
/* Next partition */
window_func->clear(); window_func->clear();
} }
window_func->add(); window_func->add();
......
...@@ -9,6 +9,7 @@ class Window_spec; ...@@ -9,6 +9,7 @@ class Window_spec;
/* /*
ROW_NUMBER() OVER (...) ROW_NUMBER() OVER (...)
@detail
- This is a Window function (not just an aggregate) - This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided - It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition. the output is sorted according to the window definition.
...@@ -26,7 +27,7 @@ class Item_sum_row_number: public Item_sum_int ...@@ -26,7 +27,7 @@ class Item_sum_row_number: public Item_sum_int
Item_sum_row_number(THD *thd) Item_sum_row_number(THD *thd)
: Item_sum_int(thd), count(0) {} : Item_sum_int(thd), count(0) {}
enum Sumfunctype sum_func () const enum Sumfunctype sum_func() const
{ {
return ROW_NUMBER_FUNC; return ROW_NUMBER_FUNC;
} }
...@@ -42,29 +43,71 @@ class Item_sum_row_number: public Item_sum_int ...@@ -42,29 +43,71 @@ class Item_sum_row_number: public Item_sum_int
/* /*
RANK() OVER (...) Windowing function RANK() OVER (...) Windowing function
@detail
- This is a Window function (not just an aggregate) - This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided - It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition. the output is sorted according to the window definition.
The function is defined as:
"The rank of row R is defined as 1 (one) plus the number of rows that
precede R and are not peers of R"
"This implies that if two or more rows are not distinct with respect to
the window ordering, then there will be one or more"
@todo: failure to overload val_int() causes infinite mutual recursion like
this:
#7505 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364
#7506 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707
#7507 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721
#7508 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364
#7509 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707
#7510 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721
#7511 0x0000555555e6b411 in Item_window_func::val_real (this=0x7fff5000d870) at sql/item_windowfunc.h:291
#7512 0x0000555555ce1f40 in Item::save_in_field (this=0x7fff5000d870, field=0x7fff50012be0, no_conversions=true) at sql/item.cc:5843
#7513 0x00005555559c2c54 in Item_result_field::save_in_result_field (this=0x7fff5000d870, no_conversions=true) at sql/item.h:2280
#7514 0x0000555555aeb6bf in copy_funcs (func_ptr=0x7fff500126c8, thd=0x55555ab77458) at sql/sql_select.cc:23077
#7515 0x0000555555ae2d01 in end_write (join=0x7fff5000f230, join_tab=0x7fff50010728, end_of_records=false) at sql/sql_select.cc:19520
#7516 0x0000555555adffc1 in evaluate_join_record (join=0x7fff5000f230, join_tab=0x7fff500103e0, error=0) at sql/sql_select.cc:18388
#7517 0x0000555555adf8b6 in sub_select (join=0x7fff5000f230, join_tab=0x7fff500103e0, end_of_records=false) at sql/sql_select.cc:18163
is this normal? Can it happen with other val_XXX functions?
Should we use another way to prevent this by forcing
Item_window_func::val_real() to return NULL at phase #1?
*/ */
class Item_sum_rank: public Item_sum_int class Item_sum_rank: public Item_sum_int
{ {
longlong rank; longlong row_number; // just ROW_NUMBER()
longlong cur_rank; // current value
/*TODO: implementation is currently missing */ List<Cached_item> orderby_fields;
public:
void clear() void clear()
{ {
// This is called on next partition /* This is called on partition start */
cur_rank= 1;
row_number= 0;
} }
bool add()
{ bool add();
return false;
longlong val_int()
{
return cur_rank;
} }
void update_field() {} void update_field() {}
/*
void reset_field();
TODO: ^^ what does this do ? It is not called ever?
*/
public: public:
Item_sum_rank(THD *thd) Item_sum_rank(THD *thd)
: Item_sum_int(thd), rank(0) {} : Item_sum_int(thd) {}
enum Sumfunctype sum_func () const enum Sumfunctype sum_func () const
{ {
...@@ -76,17 +119,29 @@ class Item_sum_rank: public Item_sum_int ...@@ -76,17 +119,29 @@ class Item_sum_rank: public Item_sum_int
return "rank"; return "rank";
} }
void setup_window_func(THD *thd, Window_spec *window_spec);
}; };
/* /*
RANK() OVER (...) Windowing function RANK() OVER (...) Windowing function
@detail
- This is a Window function (not just an aggregate) - This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided - It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition. the output is sorted according to the window definition.
The function is defined as:
"If DENSE_RANK is specified, then the rank of row R is defined as the
number of rows preceding and including R that are distinct with respect
to the window ordering"
"This implies that there are no gaps in the sequential rank numbering of
rows in each window partition."
*/ */
class Item_sum_dense_rank: public Item_sum_int class Item_sum_dense_rank: public Item_sum_int
{ {
longlong dense_rank; longlong dense_rank;
...@@ -111,6 +166,18 @@ class Item_sum_dense_rank: public Item_sum_int ...@@ -111,6 +166,18 @@ class Item_sum_dense_rank: public Item_sum_int
}; };
/*
@detail
"The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is
defined to be the RANK of R and NR is defined to be the number of rows in
the window partition of R."
Computation of this function requires two passes:
- First pass to find #rows in the partition
- Second pass to compute rank of current row and the value of the function
*/
class Item_sum_percent_rank: public Item_sum_num class Item_sum_percent_rank: public Item_sum_num
{ {
longlong rank; longlong rank;
...@@ -140,6 +207,18 @@ class Item_sum_percent_rank: public Item_sum_num ...@@ -140,6 +207,18 @@ class Item_sum_percent_rank: public Item_sum_num
}; };
/*
@detail
"The relative rank of a row R is defined as NP/NR, where
- NP is defined to be the number of rows preceding or peer with R in the
window ordering of the window partition of R
- NR is defined to be the number of rows in the window partition of R.
Just like with Item_sum_percent_rank, compuation of this function requires
two passes.
*/
class Item_sum_cume_dist: public Item_sum_num class Item_sum_cume_dist: public Item_sum_num
{ {
longlong count; longlong count;
...@@ -172,7 +251,6 @@ class Item_sum_cume_dist: public Item_sum_num ...@@ -172,7 +251,6 @@ class Item_sum_cume_dist: public Item_sum_num
class Item_window_func : public Item_result_field class Item_window_func : public Item_result_field
{ {
private:
Item_sum *window_func; Item_sum *window_func;
LEX_STRING *window_name; LEX_STRING *window_name;
Window_spec *window_spec; Window_spec *window_spec;
......
...@@ -22341,6 +22341,11 @@ int test_if_item_cache_changed(List<Cached_item> &list) ...@@ -22341,6 +22341,11 @@ int test_if_item_cache_changed(List<Cached_item> &list)
} }
/*
@return
-1 - Group not changed
value>=0 - Number of the component where the group changed
*/
static int static int
test_if_group_changed(List<Cached_item> &list) test_if_group_changed(List<Cached_item> &list)
......
...@@ -286,6 +286,8 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list) ...@@ -286,6 +286,8 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
return true; return true;
item_win->setup_partition_border_check(thd); item_win->setup_partition_border_check(thd);
// TODO: somehow, setup_sortkey_check here (either directly here
// or in the item.
int err; int err;
TABLE *tbl= *table; TABLE *tbl= *table;
...@@ -295,12 +297,12 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list) ...@@ -295,12 +297,12 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
/* /*
This will cause window function to compute its value for the This will cause window function to compute its value for the
current row : current row :
*/ */
item_win->advance_window(); item_win->advance_window();
/* /*
Put the new value into temptable's field Put the new value into temptable's field
TODO: Should this use item_win->update_field() call? TODO: Should this use item_win->update_field() call?
Regular aggegate function implementations seem to implement it. Regular aggegate function implementations seem to implement it.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment