Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
b579a626
Commit
b579a626
authored
Mar 03, 2016
by
Vicențiu Ciorbaru
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement percent_rank window function
parent
f638ffef
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
322 additions
and
29 deletions
+322
-29
sql/item_windowfunc.cc
sql/item_windowfunc.cc
+26
-0
sql/item_windowfunc.h
sql/item_windowfunc.h
+185
-29
sql/sql_window.cc
sql/sql_window.cc
+111
-0
No files found.
sql/item_windowfunc.cc
View file @
b579a626
...
@@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref)
...
@@ -56,6 +56,8 @@ Item_window_func::fix_fields(THD *thd, Item **ref)
if
(
window_func
->
fix_fields
(
thd
,
ref
))
if
(
window_func
->
fix_fields
(
thd
,
ref
))
return
true
;
return
true
;
fix_length_and_dec
();
max_length
=
window_func
->
max_length
;
max_length
=
window_func
->
max_length
;
fixed
=
1
;
fixed
=
1
;
...
@@ -180,3 +182,27 @@ void Item_window_func::advance_window()
...
@@ -180,3 +182,27 @@ void Item_window_func::advance_window()
}
}
window_func
->
add
();
window_func
->
add
();
}
}
bool
Item_sum_percent_rank
::
add
()
{
row_number
++
;
if
(
test_if_group_changed
(
orderby_fields
)
>
-
1
)
{
/* Row value changed. */
cur_rank
=
row_number
;
}
return
false
;
}
void
Item_sum_percent_rank
::
setup_window_func
(
THD
*
thd
,
Window_spec
*
window_spec
)
{
/* TODO: move this into Item_window_func? */
for
(
ORDER
*
curr
=
window_spec
->
order_list
.
first
;
curr
;
curr
=
curr
->
next
)
{
Cached_item
*
tmp
=
new_Cached_item
(
thd
,
curr
->
item
[
0
],
TRUE
);
orderby_fields
.
push_back
(
tmp
);
}
clear
();
}
sql/item_windowfunc.h
View file @
b579a626
...
@@ -70,6 +70,7 @@ class Item_sum_row_number: public Item_sum_int
...
@@ -70,6 +70,7 @@ class Item_sum_row_number: public Item_sum_int
class
Item_sum_rank
:
public
Item_sum_int
class
Item_sum_rank
:
public
Item_sum_int
{
{
protected:
longlong
row_number
;
// just ROW_NUMBER()
longlong
row_number
;
// just ROW_NUMBER()
longlong
cur_rank
;
// current value
longlong
cur_rank
;
// current value
...
@@ -108,7 +109,7 @@ class Item_sum_rank: public Item_sum_int
...
@@ -108,7 +109,7 @@ class Item_sum_rank: public Item_sum_int
{
{
return
"rank"
;
return
"rank"
;
}
}
void
setup_window_func
(
THD
*
thd
,
Window_spec
*
window_spec
);
void
setup_window_func
(
THD
*
thd
,
Window_spec
*
window_spec
);
};
};
...
@@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int
...
@@ -168,6 +169,103 @@ class Item_sum_dense_rank: public Item_sum_int
};
};
/* TODO-cvicentiu
* Perhaps this is overengineering, but I would like to decouple the 2-pass
* algorithm from the specific action that must be performed during the
* first pass. The second pass can make use of the "add" function from the
* Item_sum_<window_function>.
*/
/*
This class represents a generic interface for window functions that need
to store aditional information. Such window functions include percent_rank
and cume_dist.
*/
class
Window_context
{
public:
virtual
void
add_field_to_context
(
Field
*
field
)
=
0
;
virtual
void
reset
()
=
0
;
virtual
~
Window_context
()
{};
};
/*
A generic interface that specifies the datatype that the context represents.
*/
template
<
typename
T
>
class
Window_context_getter
{
protected:
virtual
T
get_field_context
(
const
Field
*
field
)
=
0
;
virtual
~
Window_context_getter
()
{};
};
/*
A window function context representing the number of rows that are present
with a partition. Because the number of rows is not dependent of the
specific value within the current field, we ignore the parameter
in this case.
*/
class
Window_context_row_count
:
public
Window_context
,
Window_context_getter
<
ulonglong
>
{
public:
Window_context_row_count
()
:
num_rows_
(
0
)
{};
void
add_field_to_context
(
Field
*
field
__attribute__
((
unused
)))
{
num_rows_
++
;
}
void
reset
()
{
num_rows_
=
0
;
}
ulonglong
get_field_context
(
const
Field
*
field
__attribute__
((
unused
)))
{
return
num_rows_
;
}
private:
ulonglong
num_rows_
;
};
class
Window_context_row_and_group_count
:
public
Window_context
,
Window_context_getter
<
std
::
pair
<
ulonglong
,
ulonglong
>
>
{
public:
Window_context_row_and_group_count
(
void
*
group_list
)
{}
};
/*
An abstract class representing an item that holds a context.
*/
class
Item_context
{
public:
Item_context
()
:
context_
(
NULL
)
{}
Window_context
*
get_window_context
()
{
return
context_
;
}
virtual
bool
create_window_context
()
=
0
;
virtual
void
delete_window_context
()
=
0
;
protected:
Window_context
*
context_
;
};
/*
A base window function (aggregate) that also holds a context.
NOTE: All two pass window functions need to implement
this interface.
*/
class
Item_sum_window_with_context
:
public
Item_sum_num
,
public
Item_context
{
public:
Item_sum_window_with_context
(
THD
*
thd
)
:
Item_sum_num
(
thd
),
Item_context
()
{}
};
/*
/*
@detail
@detail
...
@@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int
...
@@ -177,23 +275,43 @@ class Item_sum_dense_rank: public Item_sum_int
Computation of this function requires two passes:
Computation of this function requires two passes:
- First pass to find #rows in the partition
- First pass to find #rows in the partition
This is held within the row_count context.
- Second pass to compute rank of current row and the value of the function
- Second pass to compute rank of current row and the value of the function
*/
*/
class
Item_sum_percent_rank
:
public
Item_sum_window_with_context
,
class
Item_sum_percent_rank
:
public
Item_sum_num
public
Window_context_row_count
{
{
longlong
rank
;
longlong
partition_rows
;
void
clear
()
{}
bool
add
()
{
return
false
;
}
void
update_field
()
{}
public:
public:
Item_sum_percent_rank
(
THD
*
thd
)
Item_sum_percent_rank
(
THD
*
thd
)
:
Item_sum_
num
(
thd
),
rank
(
0
),
partition_rows
(
0
)
{}
:
Item_sum_
window_with_context
(
thd
),
cur_rank
(
1
)
{}
double
val_real
()
{
return
0
;
}
longlong
val_int
()
{
/*
Percent rank is a real value so calling the integer value should never
happen. It makes no sense as it gets truncated to either 0 or 1.
*/
DBUG_ASSERT
(
0
);
return
0
;
}
double
val_real
()
{
/*
We can not get the real value without knowing the number of rows
in the partition. Don't divide by 0.
*/
if
(
!
get_context_
())
{
// Calling this kind of function with a context makes no sense.
DBUG_ASSERT
(
0
);
return
0
;
}
longlong
partition_rows
=
get_context_
()
->
get_field_context
(
result_field
);
return
partition_rows
>
1
?
static_cast
<
double
>
(
cur_rank
-
1
)
/
(
partition_rows
-
1
)
:
0
;
}
enum
Sumfunctype
sum_func
()
const
enum
Sumfunctype
sum_func
()
const
{
{
...
@@ -204,12 +322,61 @@ class Item_sum_percent_rank: public Item_sum_num
...
@@ -204,12 +322,61 @@ class Item_sum_percent_rank: public Item_sum_num
{
{
return
"percent_rank"
;
return
"percent_rank"
;
}
}
bool
create_window_context
()
{
// TODO-cvicentiu: Currently this means we must make sure to delete
// the window context. We can potentially allocate this on the THD memroot.
// At the same time, this is only necessary for a small portion of the
// query execution and it does not make sense to keep it for all of it.
context_
=
new
Window_context_row_count
();
if
(
context_
==
NULL
)
return
true
;
return
false
;
}
void
delete_window_context
()
{
if
(
context_
)
delete
get_context_
();
context_
=
NULL
;
}
void
update_field
()
{}
void
clear
()
{
cur_rank
=
1
;
row_number
=
0
;
}
bool
add
();
enum
Item_result
result_type
()
const
{
return
REAL_RESULT
;
}
enum_field_types
field_type
()
const
{
return
MYSQL_TYPE_DOUBLE
;
}
enum_field_types
field_type
()
const
{
return
MYSQL_TYPE_DOUBLE
;
}
void
fix_length_and_dec
()
{
decimals
=
10
;
// TODO-cvicentiu find out how many decimals the standard
// requires.
}
void
setup_window_func
(
THD
*
thd
,
Window_spec
*
window_spec
);
private:
longlong
cur_rank
;
// Current rank of the current row.
longlong
row_number
;
// Value if this were ROW_NUMBER() function.
List
<
Cached_item
>
orderby_fields
;
/* Helper function so that we don't cast the context every time. */
Window_context_row_count
*
get_context_
()
{
return
static_cast
<
Window_context_row_count
*>
(
context_
);
}
};
};
/*
/*
@detail
@detail
"The relative rank of a row R is defined as NP/NR, where
"The relative rank of a row R is defined as NP/NR, where
...
@@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num
...
@@ -221,18 +388,11 @@ class Item_sum_percent_rank: public Item_sum_num
two passes.
two passes.
*/
*/
class
Item_sum_cume_dist
:
public
Item_sum_
num
class
Item_sum_cume_dist
:
public
Item_sum_
percent_rank
{
{
longlong
count
;
longlong
partition_rows
;
void
clear
()
{}
bool
add
()
{
return
false
;
}
void
update_field
()
{}
public:
public:
Item_sum_cume_dist
(
THD
*
thd
)
Item_sum_cume_dist
(
THD
*
thd
)
:
Item_sum_
num
(
thd
),
count
(
0
),
partition_rows
(
0
)
{}
:
Item_sum_
percent_rank
(
thd
)
{}
double
val_real
()
{
return
0
;
}
double
val_real
()
{
return
0
;
}
...
@@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num
...
@@ -245,9 +405,6 @@ class Item_sum_cume_dist: public Item_sum_num
{
{
return
"cume_dist"
;
return
"cume_dist"
;
}
}
enum_field_types
field_type
()
const
{
return
MYSQL_TYPE_DOUBLE
;
}
};
};
...
@@ -331,7 +488,7 @@ class Item_window_func : public Item_result_field
...
@@ -331,7 +488,7 @@ class Item_window_func : public Item_result_field
}
}
longlong
val_int
()
longlong
val_int
()
{
{
if
(
force_return_blank
)
if
(
force_return_blank
)
return
0
;
return
0
;
return
read_value_from_result_field
?
result_field
->
val_int
()
:
return
read_value_from_result_field
?
result_field
->
val_int
()
:
...
@@ -361,15 +518,14 @@ class Item_window_func : public Item_result_field
...
@@ -361,15 +518,14 @@ class Item_window_func : public Item_result_field
List
<
Item
>
&
fields
,
uint
flags
);
List
<
Item
>
&
fields
,
uint
flags
);
void
fix_length_and_dec
()
void
fix_length_and_dec
()
{
{
window_func
->
fix_length_and_dec
()
;
decimals
=
window_func
->
decimals
;
}
}
const
char
*
func_name
()
const
{
return
"WF"
;
}
const
char
*
func_name
()
const
{
return
"WF"
;
}
bool
fix_fields
(
THD
*
thd
,
Item
**
ref
);
bool
fix_fields
(
THD
*
thd
,
Item
**
ref
);
bool
resolve_window_name
(
THD
*
thd
);
bool
resolve_window_name
(
THD
*
thd
);
};
};
#endif
/* ITEM_WINDOWFUNC_INCLUDED */
#endif
/* ITEM_WINDOWFUNC_INCLUDED */
sql/sql_window.cc
View file @
b579a626
...
@@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
...
@@ -705,6 +705,110 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
}
}
bool
compute_two_pass_window_functions
(
Item_window_func
*
item_win
,
TABLE
*
table
,
READ_RECORD
*
info
)
{
/* Perform first pass. */
// TODO-cvicentiu why not initialize the record for when we need, _in_
// this function.
READ_RECORD
*
info2
=
new
READ_RECORD
();
int
err
;
bool
is_error
=
false
;
bool
first_row
=
true
;
clone_read_record
(
info
,
info2
);
Item_sum_window_with_context
*
window_func
=
static_cast
<
Item_sum_window_with_context
*>
(
item_win
->
window_func
);
uchar
*
rowid_buf
=
(
uchar
*
)
my_malloc
(
table
->
file
->
ref_length
,
MYF
(
0
));
is_error
=
window_func
->
create_window_context
();
/* Unable to allocate a new context. */
if
(
is_error
)
return
true
;
Window_context
*
context
=
window_func
->
get_window_context
();
/*
The two pass algorithm is as follows:
We have a sorted table according to the partition and order by clauses.
1. Scan through the table till we reach a partition boundary.
2. For each row that we scan, add it to the context.
3. Once the partition boundary is met, do a second scan through the
current partition and use the context information to compute the value for
the window function for that partition.
4. Reset the context.
5. Repeat from 1 till end of table.
*/
bool
done
=
false
;
longlong
rows_in_current_partition
=
0
;
// TODO handle end of table updating.
while
(
!
done
)
{
if
((
err
=
info
->
read_record
(
info
)))
{
done
=
true
;
}
bool
partition_changed
=
(
done
||
item_win
->
check_partition_bound
()
>
-
1
)
?
true
:
false
;
// The first time we always have a partition changed. Ignore it.
if
(
first_row
)
{
partition_changed
=
false
;
first_row
=
false
;
}
if
(
partition_changed
)
{
/*
We are now looking at the first row for the next partition, or at the
end of the table. Either way, we must remember this position for when
we finish doing the second pass.
*/
table
->
file
->
position
(
table
->
record
[
0
]);
memcpy
(
rowid_buf
,
table
->
file
->
ref
,
table
->
file
->
ref_length
);
for
(
longlong
row_number
=
0
;
row_number
<
rows_in_current_partition
;
row_number
++
)
{
if
((
err
=
info2
->
read_record
(
info2
)))
{
is_error
=
true
;
break
;
}
window_func
->
add
();
// Save the window function into the table.
item_win
->
save_in_field
(
item_win
->
result_field
,
true
);
err
=
table
->
file
->
ha_update_row
(
table
->
record
[
1
],
table
->
record
[
0
]);
if
(
err
&&
err
!=
HA_ERR_RECORD_IS_THE_SAME
)
{
is_error
=
true
;
break
;
}
}
if
(
is_error
)
break
;
rows_in_current_partition
=
0
;
window_func
->
clear
();
context
->
reset
();
// Return to the beginning of the new partition.
table
->
file
->
ha_rnd_pos
(
table
->
record
[
0
],
rowid_buf
);
}
rows_in_current_partition
++
;
context
->
add_field_to_context
(
item_win
->
result_field
);
}
window_func
->
delete_window_context
();
delete
info2
;
my_free
(
rowid_buf
);
return
is_error
;
}
/*
/*
@brief
@brief
This function is called by JOIN::exec to compute window function values
This function is called by JOIN::exec to compute window function values
...
@@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
...
@@ -899,6 +1003,13 @@ bool JOIN::process_window_functions(List<Item> *curr_fields_list)
is_error
=
true
;
is_error
=
true
;
break
;
break
;
}
}
case
Item_sum
:
:
PERCENT_RANK_FUNC
:
case
Item_sum
:
:
CUME_DIST_FUNC
:
{
if
(
compute_two_pass_window_functions
(
item_win
,
tbl
,
&
info
))
is_error
=
true
;
break
;
}
case
Item_sum
:
:
COUNT_FUNC
:
case
Item_sum
:
:
COUNT_FUNC
:
{
{
/*
/*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment