Commit b579a626 authored by Vicențiu Ciorbaru's avatar Vicențiu Ciorbaru

Implement percent_rank window function

parent f638ffef
...@@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref) ...@@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref)
if (window_func->fix_fields(thd, ref)) if (window_func->fix_fields(thd, ref))
return true; return true;
fix_length_and_dec();
max_length= window_func->max_length; max_length= window_func->max_length;
fixed= 1; fixed= 1;
...@@ -180,3 +182,27 @@ void Item_window_func::advance_window() ...@@ -180,3 +182,27 @@ void Item_window_func::advance_window()
} }
window_func->add(); window_func->add();
} }
bool Item_sum_percent_rank::add()
{
row_number++;
if (test_if_group_changed(orderby_fields) > -1)
{
/* Row value changed. */
cur_rank= row_number;
}
return false;
}
void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec)
{
/* TODO: move this into Item_window_func? */
for (ORDER *curr= window_spec->order_list.first; curr; curr=curr->next)
{
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
orderby_fields.push_back(tmp);
}
clear();
}
...@@ -70,6 +70,7 @@ class Item_sum_row_number: public Item_sum_int ...@@ -70,6 +70,7 @@ class Item_sum_row_number: public Item_sum_int
class Item_sum_rank: public Item_sum_int class Item_sum_rank: public Item_sum_int
{ {
protected:
longlong row_number; // just ROW_NUMBER() longlong row_number; // just ROW_NUMBER()
longlong cur_rank; // current value longlong cur_rank; // current value
...@@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int ...@@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int
}; };
/* TODO-cvicentiu
* Perhaps this is overengineering, but I would like to decouple the 2-pass
* algorithm from the specific action that must be performed during the
* first pass. The second pass can make use of the "add" function from the
* Item_sum_<window_function>.
*/
/*
This class represents a generic interface for window functions that need
to store aditional information. Such window functions include percent_rank
and cume_dist.
*/
class Window_context
{
public:
virtual void add_field_to_context(Field* field) = 0;
virtual void reset() = 0;
virtual ~Window_context() {};
};
/*
A generic interface that specifies the datatype that the context represents.
*/
template <typename T>
class Window_context_getter
{
protected:
virtual T get_field_context(const Field* field) = 0;
virtual ~Window_context_getter() {};
};
/*
A window function context representing the number of rows that are present
with a partition. Because the number of rows is not dependent of the
specific value within the current field, we ignore the parameter
in this case.
*/
class Window_context_row_count :
public Window_context, Window_context_getter<ulonglong>
{
public:
Window_context_row_count() : num_rows_(0) {};
void add_field_to_context(Field* field __attribute__((unused)))
{
num_rows_++;
}
void reset()
{
num_rows_= 0;
}
ulonglong get_field_context(const Field* field __attribute__((unused)))
{
return num_rows_;
}
private:
ulonglong num_rows_;
};
class Window_context_row_and_group_count :
public Window_context, Window_context_getter<std::pair<ulonglong, ulonglong> >
{
public:
Window_context_row_and_group_count(void * group_list) {}
};
/*
An abstract class representing an item that holds a context.
*/
class Item_context
{
public:
Item_context() : context_(NULL) {}
Window_context* get_window_context() { return context_; }
virtual bool create_window_context() = 0;
virtual void delete_window_context() = 0;
protected:
Window_context* context_;
};
/*
A base window function (aggregate) that also holds a context.
NOTE: All two pass window functions need to implement
this interface.
*/
class Item_sum_window_with_context : public Item_sum_num,
public Item_context
{
public:
Item_sum_window_with_context(THD *thd)
: Item_sum_num(thd), Item_context() {}
};
/* /*
@detail @detail
...@@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int ...@@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int
Computation of this function requires two passes: Computation of this function requires two passes:
- First pass to find #rows in the partition - First pass to find #rows in the partition
This is held within the row_count context.
- Second pass to compute rank of current row and the value of the function - Second pass to compute rank of current row and the value of the function
*/ */
class Item_sum_percent_rank: public Item_sum_window_with_context,
class Item_sum_percent_rank: public Item_sum_num public Window_context_row_count
{ {
longlong rank;
longlong partition_rows;
void clear() {}
bool add() { return false; }
void update_field() {}
public: public:
Item_sum_percent_rank(THD *thd) Item_sum_percent_rank(THD *thd)
: Item_sum_num(thd), rank(0), partition_rows(0) {} : Item_sum_window_with_context(thd), cur_rank(1) {}
double val_real() { return 0; } longlong val_int()
{
/*
Percent rank is a real value so calling the integer value should never
happen. It makes no sense as it gets truncated to either 0 or 1.
*/
DBUG_ASSERT(0);
return 0;
}
double val_real()
{
/*
We can not get the real value without knowing the number of rows
in the partition. Don't divide by 0.
*/
if (!get_context_())
{
// Calling this kind of function with a context makes no sense.
DBUG_ASSERT(0);
return 0;
}
longlong partition_rows = get_context_()->get_field_context(result_field);
return partition_rows > 1 ?
static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
}
enum Sumfunctype sum_func () const enum Sumfunctype sum_func () const
{ {
...@@ -205,11 +323,60 @@ class Item_sum_percent_rank: public Item_sum_num ...@@ -205,11 +323,60 @@ class Item_sum_percent_rank: public Item_sum_num
return "percent_rank"; return "percent_rank";
} }
bool create_window_context()
{
// TODO-cvicentiu: Currently this means we must make sure to delete
// the window context. We can potentially allocate this on the THD memroot.
// At the same time, this is only necessary for a small portion of the
// query execution and it does not make sense to keep it for all of it.
context_ = new Window_context_row_count();
if (context_ == NULL)
return true;
return false;
}
void delete_window_context()
{
if (context_)
delete get_context_();
context_ = NULL;
}
void update_field() {}
void clear()
{
cur_rank= 1;
row_number= 0;
}
bool add();
enum Item_result result_type () const { return REAL_RESULT; }
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; } enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
void fix_length_and_dec()
{
decimals = 10; // TODO-cvicentiu find out how many decimals the standard
// requires.
}
void setup_window_func(THD *thd, Window_spec *window_spec);
private:
longlong cur_rank; // Current rank of the current row.
longlong row_number; // Value if this were ROW_NUMBER() function.
List<Cached_item> orderby_fields;
/* Helper function so that we don't cast the context every time. */
Window_context_row_count* get_context_()
{
return static_cast<Window_context_row_count *>(context_);
}
}; };
/* /*
@detail @detail
"The relative rank of a row R is defined as NP/NR, where "The relative rank of a row R is defined as NP/NR, where
...@@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num ...@@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num
two passes. two passes.
*/ */
class Item_sum_cume_dist: public Item_sum_num class Item_sum_cume_dist: public Item_sum_percent_rank
{ {
longlong count;
longlong partition_rows;
void clear() {}
bool add() { return false; }
void update_field() {}
public: public:
Item_sum_cume_dist(THD *thd) Item_sum_cume_dist(THD *thd)
: Item_sum_num(thd), count(0), partition_rows(0) {} : Item_sum_percent_rank(thd) {}
double val_real() { return 0; } double val_real() { return 0; }
...@@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num ...@@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num
{ {
return "cume_dist"; return "cume_dist";
} }
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
}; };
...@@ -361,7 +518,7 @@ class Item_window_func : public Item_result_field ...@@ -361,7 +518,7 @@ class Item_window_func : public Item_result_field
List<Item> &fields, uint flags); List<Item> &fields, uint flags);
void fix_length_and_dec() void fix_length_and_dec()
{ {
window_func->fix_length_and_dec(); decimals = window_func->decimals;
} }
const char* func_name() const { return "WF"; } const char* func_name() const { return "WF"; }
...@@ -369,7 +526,6 @@ class Item_window_func : public Item_result_field ...@@ -369,7 +526,6 @@ class Item_window_func : public Item_result_field
bool fix_fields(THD *thd, Item **ref); bool fix_fields(THD *thd, Item **ref);
bool resolve_window_name(THD *thd); bool resolve_window_name(THD *thd);
}; };
#endif /* ITEM_WINDOWFUNC_INCLUDED */ #endif /* ITEM_WINDOWFUNC_INCLUDED */
...@@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win, ...@@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
} }
bool compute_two_pass_window_functions(Item_window_func *item_win,
TABLE *table, READ_RECORD *info)
{
/* Perform first pass. */
// TODO-cvicentiu why not initialize the record for when we need, _in_
// this function.
READ_RECORD *info2= new READ_RECORD();
int err;
bool is_error = false;
bool first_row= true;
clone_read_record(info, info2);
Item_sum_window_with_context *window_func=
static_cast<Item_sum_window_with_context *>(item_win->window_func);
uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
is_error= window_func->create_window_context();
/* Unable to allocate a new context. */
if (is_error)
return true;
Window_context *context = window_func->get_window_context();
/*
The two pass algorithm is as follows:
We have a sorted table according to the partition and order by clauses.
1. Scan through the table till we reach a partition boundary.
2. For each row that we scan, add it to the context.
3. Once the partition boundary is met, do a second scan through the
current partition and use the context information to compute the value for
the window function for that partition.
4. Reset the context.
5. Repeat from 1 till end of table.
*/
bool done = false;
longlong rows_in_current_partition = 0;
// TODO handle end of table updating.
while (!done)
{
if ((err= info->read_record(info)))
{
done = true;
}
bool partition_changed= (done || item_win->check_partition_bound() > -1) ?
true : false;
// The first time we always have a partition changed. Ignore it.
if (first_row)
{
partition_changed= false;
first_row= false;
}
if (partition_changed)
{
/*
We are now looking at the first row for the next partition, or at the
end of the table. Either way, we must remember this position for when
we finish doing the second pass.
*/
table->file->position(table->record[0]);
memcpy(rowid_buf, table->file->ref, table->file->ref_length);
for (longlong row_number = 0; row_number < rows_in_current_partition;
row_number++)
{
if ((err= info2->read_record(info2)))
{
is_error= true;
break;
}
window_func->add();
// Save the window function into the table.
item_win->save_in_field(item_win->result_field, true);
err= table->file->ha_update_row(table->record[1], table->record[0]);
if (err && err != HA_ERR_RECORD_IS_THE_SAME)
{
is_error= true;
break;
}
}
if (is_error)
break;
rows_in_current_partition= 0;
window_func->clear();
context->reset();
// Return to the beginning of the new partition.
table->file->ha_rnd_pos(table->record[0], rowid_buf);
}
rows_in_current_partition++;
context->add_field_to_context(item_win->result_field);
}
window_func->delete_window_context();
delete info2;
my_free(rowid_buf);
return is_error;
}
/* /*
@brief @brief
This function is called by JOIN::exec to compute window function values This function is called by JOIN::exec to compute window function values
...@@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list) ...@@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
is_error= true; is_error= true;
break; break;
} }
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
{
if (compute_two_pass_window_functions(item_win, tbl, &info))
is_error= true;
break;
}
case Item_sum::COUNT_FUNC: case Item_sum::COUNT_FUNC:
{ {
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment