Commit 64ab10ff authored by Sergei Petrunia's avatar Sergei Petrunia

Initial implementation of RANK() window function

parent 30c9450a
drop table if exists t1,t2;
#
# Check what happens when one attempts to use window function without OVER clause
#
create table t1 (a int, b int);
insert into t1 values (1,1),(2,2);
select row_number() from t1;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1
select rank() from t1;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'from t1' at line 1
drop table t1;
#
# Check if basic window functions work
#
create table t1(a int, b int, x char(32));
insert into t1 values (2, 10, 'xx');
insert into t1 values (2, 10, 'zz');
......@@ -39,3 +52,34 @@ pk a b row_number() over (order by a) row_number() over (order by b)
4 13 19 4 2
5 14 18 5 1
drop table t1;
#
# Try RANK() function
#
create table t2 (
pk int primary key,
a int
);
insert into t2 values
( 1 , 0),
( 2 , 0),
( 3 , 1),
( 4 , 1),
( 8 , 2),
( 5 , 2),
( 6 , 2),
( 7 , 2),
( 9 , 4),
(10 , 4);
select pk, a, rank() over (order by a) from t2;
pk a rank() over (order by a)
1 0 1
2 0 1
3 1 3
4 1 3
8 2 5
5 2 5
6 2 5
7 2 5
9 4 9
10 4 9
drop table t2;
......@@ -6,6 +6,22 @@
drop table if exists t1,t2;
--enable_warnings
--echo #
--echo # Check what happens when one attempts to use window function without OVER clause
--echo #
create table t1 (a int, b int);
insert into t1 values (1,1),(2,2);
--error ER_PARSE_ERROR
select row_number() from t1;
--error ER_PARSE_ERROR
select rank() from t1;
drop table t1;
--echo #
--echo # Check if basic window functions work
--echo #
create table t1(a int, b int, x char(32));
insert into t1 values (2, 10, 'xx');
insert into t1 values (2, 10, 'zz');
......@@ -35,3 +51,27 @@ from t1;
drop table t1;
--echo #
--echo # Try RANK() function
--echo #
create table t2 (
pk int primary key,
a int
);
insert into t2 values
( 1 , 0),
( 2 , 0),
( 3 , 1),
( 4 , 1),
( 8 , 2),
( 5 , 2),
( 6 , 2),
( 7 , 2),
( 9 , 4),
(10 , 4);
select pk, a, rank() over (order by a) from t2;
drop table t2;
......@@ -109,6 +109,7 @@ class Aggregator : public Sql_alloc
class st_select_lex;
class Window_spec;
/**
Class Item_sum is the base class used for special expressions that SQL calls
......@@ -550,6 +551,9 @@ class Item_sum :public Item_func_or_sum
{
return trace_unsupported_by_check_vcol_func_processor(func_name());
}
//psergey-todo: this is ugly:
virtual void setup_window_func(THD *thd, Window_spec *window_spec) {}
};
......
......@@ -40,6 +40,30 @@ void Item_window_func::setup_partition_border_check(THD *thd)
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
partition_fields.push_back(tmp);
}
window_func->setup_window_func(thd, window_spec);
}
void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec)
{
/* TODO: move this into Item_window_func? */
for (ORDER *curr = window_spec->order_list.first; curr; curr=curr->next) {
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
orderby_fields.push_back(tmp);
}
clear();
}
bool Item_sum_rank::add()
{
row_number++;
if (test_if_group_changed(orderby_fields) > -1)
{
/* Row value changed */
cur_rank= row_number;
}
return false;
}
......@@ -48,6 +72,7 @@ void Item_window_func::advance_window() {
int changed = test_if_group_changed(partition_fields);
if (changed > -1) {
/* Next partition */
window_func->clear();
}
window_func->add();
......
......@@ -9,6 +9,7 @@ class Window_spec;
/*
ROW_NUMBER() OVER (...)
@detail
- This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition.
......@@ -26,7 +27,7 @@ class Item_sum_row_number: public Item_sum_int
Item_sum_row_number(THD *thd)
: Item_sum_int(thd), count(0) {}
enum Sumfunctype sum_func () const
enum Sumfunctype sum_func() const
{
return ROW_NUMBER_FUNC;
}
......@@ -42,29 +43,71 @@ class Item_sum_row_number: public Item_sum_int
/*
RANK() OVER (...) Windowing function
@detail
- This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition.
The function is defined as:
"The rank of row R is defined as 1 (one) plus the number of rows that
precede R and are not peers of R"
"This implies that if two or more rows are not distinct with respect to
the window ordering, then there will be one or more"
@todo: failure to overload val_int() causes infinite mutual recursion like
this:
#7505 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364
#7506 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707
#7507 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721
#7508 0x0000555555cd3a1c in Item::val_int_from_real (this=0x7fff50006460) at sql/item.cc:364
#7509 0x0000555555c768d4 in Item_sum_num::val_int (this=0x7fff50006460) at sql/item_sum.h:707
#7510 0x0000555555c76a54 in Item_sum_int::val_real (this=0x7fff50006460) at sql/item_sum.h:721
#7511 0x0000555555e6b411 in Item_window_func::val_real (this=0x7fff5000d870) at sql/item_windowfunc.h:291
#7512 0x0000555555ce1f40 in Item::save_in_field (this=0x7fff5000d870, field=0x7fff50012be0, no_conversions=true) at sql/item.cc:5843
#7513 0x00005555559c2c54 in Item_result_field::save_in_result_field (this=0x7fff5000d870, no_conversions=true) at sql/item.h:2280
#7514 0x0000555555aeb6bf in copy_funcs (func_ptr=0x7fff500126c8, thd=0x55555ab77458) at sql/sql_select.cc:23077
#7515 0x0000555555ae2d01 in end_write (join=0x7fff5000f230, join_tab=0x7fff50010728, end_of_records=false) at sql/sql_select.cc:19520
#7516 0x0000555555adffc1 in evaluate_join_record (join=0x7fff5000f230, join_tab=0x7fff500103e0, error=0) at sql/sql_select.cc:18388
#7517 0x0000555555adf8b6 in sub_select (join=0x7fff5000f230, join_tab=0x7fff500103e0, end_of_records=false) at sql/sql_select.cc:18163
is this normal? Can it happen with other val_XXX functions?
Should we use another way to prevent this by forcing
Item_window_func::val_real() to return NULL at phase #1?
*/
class Item_sum_rank: public Item_sum_int
{
longlong rank;
longlong row_number; // just ROW_NUMBER()
longlong cur_rank; // current value
/*TODO: implementation is currently missing */
List<Cached_item> orderby_fields;
public:
void clear()
{
// This is called on next partition
/* This is called on partition start */
cur_rank= 1;
row_number= 0;
}
bool add()
{
return false;
bool add();
longlong val_int()
{
return cur_rank;
}
void update_field() {}
/*
void reset_field();
TODO: ^^ what does this do ? It is not called ever?
*/
public:
public:
Item_sum_rank(THD *thd)
: Item_sum_int(thd), rank(0) {}
: Item_sum_int(thd) {}
enum Sumfunctype sum_func () const
{
......@@ -76,17 +119,29 @@ class Item_sum_rank: public Item_sum_int
return "rank";
}
void setup_window_func(THD *thd, Window_spec *window_spec);
};
/*
RANK() OVER (...) Windowing function
@detail
- This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition.
The function is defined as:
"If DENSE_RANK is specified, then the rank of row R is defined as the
number of rows preceding and including R that are distinct with respect
to the window ordering"
"This implies that there are no gaps in the sequential rank numbering of
rows in each window partition."
*/
class Item_sum_dense_rank: public Item_sum_int
{
longlong dense_rank;
......@@ -111,6 +166,18 @@ class Item_sum_dense_rank: public Item_sum_int
};
/*
@detail
"The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is
defined to be the RANK of R and NR is defined to be the number of rows in
the window partition of R."
Computation of this function requires two passes:
- First pass to find #rows in the partition
- Second pass to compute rank of current row and the value of the function
*/
class Item_sum_percent_rank: public Item_sum_num
{
longlong rank;
......@@ -140,6 +207,18 @@ class Item_sum_percent_rank: public Item_sum_num
};
/*
@detail
"The relative rank of a row R is defined as NP/NR, where
- NP is defined to be the number of rows preceding or peer with R in the
window ordering of the window partition of R
- NR is defined to be the number of rows in the window partition of R.
Just like with Item_sum_percent_rank, compuation of this function requires
two passes.
*/
class Item_sum_cume_dist: public Item_sum_num
{
longlong count;
......@@ -172,7 +251,6 @@ class Item_sum_cume_dist: public Item_sum_num
class Item_window_func : public Item_result_field
{
private:
Item_sum *window_func;
LEX_STRING *window_name;
Window_spec *window_spec;
......
......@@ -22341,6 +22341,11 @@ int test_if_item_cache_changed(List<Cached_item> &list)
}
/*
@return
-1 - Group not changed
value>=0 - Number of the component where the group changed
*/
static int
test_if_group_changed(List<Cached_item> &list)
......
......@@ -286,6 +286,8 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
return true;
item_win->setup_partition_border_check(thd);
// TODO: somehow, setup_sortkey_check here (either directly here
// or in the item.
int err;
TABLE *tbl= *table;
......@@ -295,12 +297,12 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
/*
This will cause window function to compute its value for the
current row :
current row :
*/
item_win->advance_window();
/*
Put the new value into temptable's field
/*
Put the new value into temptable's field
TODO: Should this use item_win->update_field() call?
Regular aggegate function implementations seem to implement it.
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment