Commit e76e617e authored by timour@mysql.com's avatar timour@mysql.com

Implementation of the last review comments for WL#1724

"Min/Max Optimization for Queries with Group By Clause"
parent e2cd3dd1
......@@ -1869,6 +1869,12 @@ id select_type table type possible_keys key key_len ref rows Extra
explain select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 76 Using where; Using index
explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 Using where; Using index
select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
a1 a2 min(b) c
a a a a111
explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 Using where; Using index
......
......@@ -3,6 +3,12 @@
# The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT.
#
#
# TODO:
# Add queries with:
# - C != const
# - C IS NOT NULL
# - HAVING clause
--disable_warnings
drop table if exists t1;
......@@ -175,8 +181,6 @@ explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by
explain select min(a2) from t1 group by a1;
explain select a2, min(c), max(c) from t1 group by a1,a2,b;
-- TODO: Queries with HAVING
-- queries
select a1, min(a2) from t1 group by a1;
select a1, max(a2) from t1 group by a1;
......@@ -190,8 +194,6 @@ select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2,
select min(a2) from t1 group by a1;
select a2, min(c), max(c) from t1 group by a1,a2,b;
-- TODO: Queries with HAVING
--
-- Queries with a where clause
--
......@@ -300,7 +302,6 @@ select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
-- TODO: IS NOT NULL ?
-- C) Range predicates for the MIN/MAX attribute
-- plans
......@@ -553,6 +554,11 @@ where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy
explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1;
explain select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b;
-- non-group field with an equality predicate that references a keypart after the
-- MIN/MAX argument
explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
-- disjunction for a non-group select attribute
explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1;
......
......@@ -260,22 +260,7 @@ class Item {
virtual bool remove_dependence_processor(byte * arg) { return 0; }
virtual bool remove_fixed(byte * arg) { fixed= 0; return 0; }
/*
All collect_* methods are used as arguments to walk() to collect
specific types items.
TODO:
A more generic implementation would add a special class
Collect_processor_param that can store arbitrary sets of item kinds
(currently specified as enums), along with a list to store items of the
specified kinds. This would allow to collect combinations of items of
arbitrary kinds without having to add a new collect method each time.
There can be one generic collect_processor method that checks the item type
and compares it with the item types in Collect_processor_param.
*/
virtual bool collect_item_field_processor(byte * arg) { return 0; }
virtual bool collect_item_sum_min_processor(byte * arg) { return 0; }
virtual bool collect_item_sum_max_processor(byte * arg) { return 0; }
virtual bool has_non_min_max_sum_processor(byte * arg) { return 0; }
virtual Item *this_item() { return this; } /* For SPs mostly. */
virtual Item *this_const_item() const { return const_cast<Item*>(this); } /* For SPs mostly. */
......
......@@ -183,44 +183,6 @@ bool Item_sum::walk (Item_processor processor, byte *argument)
}
/*
Store the pointer to this item into a list if not already there.
SYNOPSIS
Item_sum::collect()
item_list pointer to a List<Item_sum> where item_sum objects are collected
DESCRIPTION
The method is used by collect_item_sum_*_processor, called by
Item_sum::walk, to collect all unique Item_sum_min and Item_sum_max objects
from a tree of Items into a set of items represented as a list.
IMPLEMENTATION
Item_cond::walk() and Item_func::walk() stop the evaluation of the
processor function for its arguments once the processor returns
true.Therefore in order to force this method being called for all item
arguments in a condition the method must return false.
RETURN
FALSE on success (force the evaluation of collect_item_sum_*_processor
for the subsequent items.)
TRUE o/w (stop evaluation of subsequent items.)
*/
bool Item_sum::collect(List<Item_sum> *item_list)
{
List_iterator<Item_sum> item_list_it(*item_list);
Item_sum *curr_item;
while ((curr_item= item_list_it++))
{
if (curr_item == this)
return FALSE; /* Already in the set. */
}
item_list->push_back(this);
return FALSE;
}
String *
Item_sum_num::val_str(String *str)
{
......
......@@ -27,8 +27,6 @@ class Item_arena;
class Item_sum :public Item_result_field
{
private:
bool collect(List<Item_sum> *item_list);
public:
enum Sumfunctype
{ COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
......@@ -100,33 +98,6 @@ class Item_sum :public Item_result_field
bool save_args(Item_arena* stmt);
bool walk (Item_processor processor, byte *argument);
/* Collect Item_sum_min objects into a list supplied by the caller. */
bool collect_item_sum_min_processor(byte *arg)
{
if (Item_sum::MIN_FUNC == this->sum_func())
return collect((List<Item_sum>*) arg);
else
return FALSE;
}
/* Collect Item_sum_max objects into a list supplied by the caller. */
bool collect_item_sum_max_processor(byte *arg)
{
if (Item_sum::MAX_FUNC == this->sum_func())
return collect((List<Item_sum>*) arg);
else
return FALSE;
}
/* Check if there are any aggregate functions other than MIN and MAX. */
bool has_non_min_max_sum_processor(byte * arg)
{
Sumfunctype sum_type= this->sum_func();
if ((sum_type != Item_sum::MIN_FUNC) && (sum_type != Item_sum::MAX_FUNC))
return TRUE;
return FALSE;
}
};
......
......@@ -1477,11 +1477,11 @@ class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
KEY_PART_INFO *min_max_arg_part, uint group_prefix_len,
uint used_key_parts, uint group_key_parts, KEY *index_info,
uint index, uint key_infix_len, byte *key_infix,
SEL_TREE *tree, PARAM *param);
SEL_TREE *tree, SEL_ARG *index_tree, uint param_idx,
ha_rows quick_prefix_records, PARAM *param);
QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
MEM_ROOT *parent_alloc);
void update_cost();
};
......@@ -6362,17 +6362,21 @@ static inline uint get_field_keypart(KEY *index, Field *field);
static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
PARAM *param, uint *param_idx);
static bool
get_constant_key_infix(List<Item_field> &non_group_fields,
SEL_ARG *index_range_tree,
get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
KEY_PART_INFO *first_non_group_part,
KEY_PART_INFO *end_part,
byte *key_infix, uint *key_infix_len);
KEY_PART_INFO *min_max_arg_part,
KEY_PART_INFO *last_part, THD *thd,
byte *key_infix, uint *key_infix_len,
KEY_PART_INFO **first_non_infix_part);
static bool
check_group_min_max_predicates(COND *cond, ORDER *group_list,
List<Item_field> &select_fields,
List<Item_field> &non_group_fields,
Item_field *min_max_arg_item);
check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item);
static void
cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
uint group_key_parts, SEL_TREE *range_tree,
SEL_ARG *index_tree, ha_rows quick_prefix_records,
bool have_min, bool have_max,
double *read_cost, ha_rows *records);
/*
Test if this access method is applicable to a GROUP query with MIN/MAX
......@@ -6400,7 +6404,7 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
= SA - if Q is a DISTINCT query (based on the
equivalence of DISTINCT and GROUP queries.
- NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in GROUP BY
and not referenced by a MIN/MAX functions.
and not referenced by MIN/MAX functions.
with the following properties specified below.
SA1. There is at most one attribute in SA referenced by any number of
......@@ -6412,9 +6416,12 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
- (const {< | <= | > | >= | =} C)
- (C between const_i and const_j)
- C IS NULL
- C IS NOT NULL
- C != const
SA4. If Q has a GROUP BY clause, there are no other aggregate functions
except MIN and MAX. For queries with DISTINCT, aggregate functions
are allowed.
SA5. The select list in DISTINCT queries should not contain expressions.
GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
G_i = A_j => i = j.
GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
......@@ -6444,14 +6451,16 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
in the index I (if this was already tested for GA, NGA and C).
C) Overall query form:
SELECT [A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)] -- at least one
FROM T -- of {A_i} or
WHERE [RNG(A_1,...,A_p ; where p <= k)] -- MIN/MAX must
[AND EQ(B_1,...,B_m)] -- be present.
SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
FROM T
WHERE [RNG(A_1,...,A_p ; where p <= k)]
[AND EQ(B_1,...,B_m)]
[AND PC(C)]
[AND PA(A_i1,...,A_iq)]
GROUP BY A_1,...,A_k;
or
GROUP BY A_1,...,A_k
[HAVING PH(A_1, ..., B_1,..., C)]
where EXPR(...) is an arbitrary expression over some or all SELECT fields,
or:
SELECT DISTINCT A_i1,...,A_ik
FROM T
WHERE [RNG(A_1,...,A_p ; where p <= k)]
......@@ -6506,11 +6515,6 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
uint index= 0; /* The id of the chosen index. */
uint group_key_parts= 0; /* Number of index key parts in the group prefix. */
uint used_key_parts= 0; /* Number of index key parts used for access. */
List<Item_field> query_fields;/* Set of all fields referenced in the query. */
List<Item_field> select_fields; /* Set of the fields referenced in SELECT. */
List<Item_field> non_group_fields;/* Set of the query fields not in GROUP BY*/
/* and not referenced in a MIN/MAX. */
List<Item_sum> min_max_functions; /* All MIN/MAX functions in SELECT. */
byte key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
uint key_infix_len= 0; /* Length of key_infix. */
TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
......@@ -6532,109 +6536,44 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
DBUG_RETURN(NULL);
/* Analyze the query in more detail. */
List_iterator<Item> select_items_it(join->fields_list);
List<Item> &select_items= join->fields_list;
List_iterator<Item> select_items_it(select_items);
List_iterator<Item_field> query_fields_it(query_fields);
List_iterator<Item_field> select_fields_it(select_fields);
List_iterator<Item_sum> min_max_functions_it(min_max_functions);
/*
Collect all fields referenced in the query. Notice that since a HAVING
clause must reference only SELECT attributes, there is no need to extract
the attributes of HAVING.
Also:
- check (SA4) if there are any aggregate functions other than MIN and MAX,
- collect all references to MIN/MAX functions.
*/
while ((item= select_items_it++))
/* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
if(join->make_sum_func_list(join->all_fields, join->fields_list, 1))
DBUG_RETURN(NULL);
if (join->sum_funcs[0])
{
if (item->walk(&Item::has_non_min_max_sum_processor, NULL))
DBUG_RETURN(NULL);
item->walk(&Item::collect_item_field_processor, (byte*) &query_fields);
item->walk(&Item::collect_item_field_processor, (byte*) &select_fields);
item->walk(&Item::collect_item_sum_min_processor,
(byte*) &min_max_functions);
item->walk(&Item::collect_item_sum_max_processor,
(byte*) &min_max_functions);
}
if (join->conds)
join->conds->walk(&Item::collect_item_field_processor,
(byte*) &query_fields);
/* Check (SA1) and store the only MIN/MAX argument - the C attribute.*/
Item_sum *min_max_item;
while ((min_max_item= min_max_functions_it++))
{
if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
have_min= TRUE;
else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
have_max= TRUE;
else
DBUG_RETURN(NULL);
Item *expr= min_max_item->args[0]; /* This is the argument of MIN/MAX. */
if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
Item_sum *min_max_item;
Item_sum **func_ptr= join->sum_funcs;
while ((min_max_item= *(func_ptr++)))
{
if (! min_max_arg_item)
min_max_arg_item= (Item_field*) expr;
else if (! min_max_arg_item->eq(expr, 1))
if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
have_min= TRUE;
else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
have_max= TRUE;
else
DBUG_RETURN(NULL);
}
else
DBUG_RETURN(NULL);
}
/* Collect the NGA fields. */
query_fields_it.rewind();
List_iterator<Item_field> non_group_fields_it(non_group_fields);
Item_field *curr_field;
while ((item_field= query_fields_it++))
{
bool found_field= FALSE;
if (min_max_arg_item && item_field->eq(min_max_arg_item, 1))
found_field= TRUE;
else if (join->group_list)
{
for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
Item *expr= min_max_item->args[0]; /* The argument of MIN/MAX. */
if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
{
if (item_field->eq(*tmp_group->item, 1))
{
found_field= TRUE;
break;
}
}
}
else if (join->select_distinct)
{ /* For DISTINCT queries select fields are treated as group fields. */
Item_field *sel_field;
select_fields_it.rewind();
while ((sel_field= select_fields_it++))
{
if (item_field->eq(sel_field, 1))
{
found_field= TRUE;
break;
}
if (! min_max_arg_item)
min_max_arg_item= (Item_field*) expr;
else if (! min_max_arg_item->eq(expr, 1))
DBUG_RETURN(NULL);
}
else
DBUG_RETURN(NULL);
}
else
DBUG_ASSERT(FALSE);
}
if (!found_field)
/* Check (SA5). */
if (join->select_distinct)
{
while ((item= select_items_it++))
{
/* Make sure non_group_fields is a set. */
non_group_fields_it.rewind();
while ((curr_field= non_group_fields_it++))
{
if (curr_field->eq(item_field, 1))
{
found_field= TRUE;
break;
}
}
if (!found_field)
non_group_fields.push_back(item_field);
if (item->type() != Item::FIELD_ITEM)
DBUG_RETURN(NULL);
}
}
......@@ -6653,17 +6592,32 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
KEY *cur_index_info= table->key_info;
KEY *cur_index_info_end= cur_index_info + table->keys;
KEY_PART_INFO *cur_part= NULL;
KEY_PART_INFO *end_part;
for (uint tmp_index= 0 ; cur_index_info != cur_index_info_end ;
cur_index_info++, tmp_index++)
{
/* Check (B1) - each query field participates in the current index. */
query_fields_it.rewind();
while ((item_field= query_fields_it++))
{
if ( ! item_field->field->part_of_key.is_set(tmp_index) )
goto next_index;
}
KEY_PART_INFO *end_part; /* Last part for loops. */
/* Last index part. */
KEY_PART_INFO *last_part= NULL;
KEY_PART_INFO *first_non_group_part= NULL;
KEY_PART_INFO *first_non_infix_part= NULL;
uint key_infix_parts= 0;
uint cur_group_key_parts= 0;
uint cur_group_prefix_len= 0;
/* Cost-related variables for the best index so far. */
double best_read_cost= DBL_MAX;
ha_rows best_records= 0;
SEL_ARG *best_index_tree= NULL;
ha_rows best_quick_prefix_records= 0;
uint best_param_idx= 0;
double cur_read_cost= DBL_MAX;
ha_rows cur_records;
SEL_ARG *cur_index_tree= NULL;
ha_rows cur_quick_prefix_records= 0;
uint cur_param_idx;
for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ;
cur_index_info++, cur_index++)
{
/* Check (B1) - if current index is covering. */
if (!table->used_keys.is_set(cur_index))
goto next_index;
/*
Check (GA1) for GROUP BY queries.
......@@ -6686,8 +6640,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
Item_field *group_field= (Item_field *) (*tmp_group->item);
if (group_field->field->eq(cur_part->field))
{
group_prefix_len+= cur_part->store_length;
++group_key_parts;
cur_group_prefix_len+= cur_part->store_length;
++cur_group_key_parts;
}
else
goto next_index;
......@@ -6703,18 +6657,18 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
*/
else if (join->select_distinct)
{
DBUG_ASSERT(!join->group_list); /* Only DISTINCT, no GROUP BY. */
select_fields_it.rewind();
while ((item_field= select_fields_it++))
select_items_it.rewind();
while ((item= select_items_it++))
{
item_field= (Item_field*) item; /* (SA5) already checked above. */
/* Find the order of the key part in the index. */
key_part_nr= get_field_keypart(cur_index_info, item_field->field);
if (key_part_nr < 1 || key_part_nr > select_fields.elements)
if (key_part_nr < 1 || key_part_nr > join->fields_list.elements)
goto next_index;
cur_part= cur_index_info->key_part + key_part_nr - 1;
group_prefix_len+= cur_part->store_length;
cur_group_prefix_len+= cur_part->store_length;
}
group_key_parts= select_fields.elements;
cur_group_key_parts= join->fields_list.elements;
}
else
DBUG_ASSERT(FALSE);
......@@ -6723,7 +6677,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
if (min_max_arg_item)
{
key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
if (key_part_nr <= group_key_parts)
if (key_part_nr <= cur_group_key_parts)
goto next_index;
min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
}
......@@ -6732,36 +6686,98 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
Check (NGA1, NGA2) and extract a sequence of constants to be used as part
of all search keys.
*/
if (non_group_fields.elements > 0)
/*
If there is MIN/MAX, each keypart between the last group part and the
MIN/MAX part must participate in one equality with constants, and all
keyparts after the MIN/MAX part must not be referenced in the query.
If there is no MIN/MAX, the keyparts after the last group part can be
referenced only in equalities with constants, and the referenced keyparts
must form a sequence without any gaps that starts immediately after the
last group keypart.
*/
last_part= cur_index_info->key_part + cur_index_info->key_parts;
first_non_group_part= (cur_group_key_parts < cur_index_info->key_parts) ?
cur_index_info->key_part + cur_group_key_parts :
NULL;
first_non_infix_part= min_max_arg_part ?
(min_max_arg_part < last_part) ?
min_max_arg_part + 1 :
NULL :
NULL;
if (first_non_group_part &&
(!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
{
if (!tree)
goto next_index; /* No range tree - no predicates at all. */
uint dummy;
SEL_ARG *index_range_tree= get_index_range_tree(tmp_index, tree, param,
&dummy);
KEY_PART_INFO *first_non_group_part= cur_index_info->key_part +
group_key_parts;
KEY_PART_INFO *end_part= min_max_arg_part ?
min_max_arg_part :
cur_index_info->key_part +
cur_index_info->key_parts;
if (!get_constant_key_infix(non_group_fields, index_range_tree,
first_non_group_part, end_part,
key_infix, &key_infix_len))
if (tree)
{
uint dummy;
SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
&dummy);
if (!get_constant_key_infix(cur_index_info, index_range_tree,
first_non_group_part, min_max_arg_part,
last_part, thd, key_infix, &key_infix_len,
&first_non_infix_part))
goto next_index;
}
else if (min_max_arg_part &&
(min_max_arg_part - first_non_group_part > 0))
/*
There is a gap but no range tree, thus no predicates at all for the
non-group keyparts.
*/
goto next_index;
}
/*
Test (WA1) partially - that no other keypart after the last infix part is
referenced in the query.
*/
if (first_non_infix_part)
{
for (cur_part= first_non_infix_part; cur_part != last_part; cur_part++)
{
if (cur_part->field->query_id == thd->query_id)
goto next_index;
}
}
/* If we got to this point, cur_index_info passes the test. */
index_info= cur_index_info;
index= tmp_index;
used_key_parts= group_key_parts + non_group_fields.elements;
key_infix_parts= key_infix_len ?
(first_non_infix_part - first_non_group_part) : 0;
used_key_parts= cur_group_key_parts + key_infix_parts;
/* Exit the loop because we found a usable index. */
break;
/* Compute the cost of using this index. */
if (tree)
{
/* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
cur_index_tree= get_index_range_tree(cur_index, tree, param,
&cur_param_idx);
/* Check if this range tree can be used for prefix retrieval. */
cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
cur_index_tree);
}
cost_group_min_max(table, cur_index_info, used_key_parts,
cur_group_key_parts, tree, cur_index_tree,
cur_quick_prefix_records, have_min, have_max,
&cur_read_cost, &cur_records);
if (cur_read_cost < best_read_cost)
{
index_info= cur_index_info;
index= cur_index;
best_read_cost= cur_read_cost;
best_records= cur_records;
best_index_tree= cur_index_tree;
best_quick_prefix_records= cur_quick_prefix_records;
best_param_idx= cur_param_idx;
group_key_parts= cur_group_key_parts;
group_prefix_len= cur_group_prefix_len;
}
next_index:
group_key_parts= 0;
group_prefix_len= 0;
cur_group_key_parts= 0;
cur_group_prefix_len= 0;
continue;
}
if (!index_info) /* No usable index found. */
......@@ -6769,9 +6785,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
/* Check (SA3,WA1) for the where clause. */
if (!check_group_min_max_predicates(join->conds, join->group_list,
select_fields, non_group_fields,
min_max_arg_item))
if (!check_group_min_max_predicates(join->conds, min_max_arg_item))
DBUG_RETURN(NULL);
/* The query passes all tests, so construct a new TRP object. */
......@@ -6781,13 +6795,16 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
used_key_parts, group_key_parts, index_info,
index, key_infix_len,
(key_infix_len > 0) ? key_infix : NULL,
tree, param);
tree, best_index_tree, best_param_idx,
best_quick_prefix_records, param);
if (read_plan)
{
if (tree && read_plan->quick_prefix_records == 0)
DBUG_RETURN(NULL);
read_plan->update_cost();
read_plan->read_cost= best_read_cost;
read_plan->records= best_records;
DBUG_PRINT("info",
("Returning group min/max plan: cost: %g, records: %lu",
read_plan->read_cost, (ulong) read_plan->records));
......@@ -6798,33 +6815,20 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
/*
Check that all where predicates reference only select fields and the MIN/MAX
attribute participates only in range predicates with constants.
Check that the MIN/MAX attribute participates only in range predicates
with constants.
SYNOPSIS
check_group_min_max_predicates()
cond tree (or subtree) describing all or part of the WHERE
clause being analyzed
select_fields list of fields in the SELECT clause
min_max_arg_item the field referenced by the MIN/MAX function(s)
DESCRIPTION
The function walks recursively over the cond tree representing a WHERE
clause, and checks that:
(WA1) every predicate references only GA and NGA fields as specified
in get_best_group_min_max().
(SA3) if a field is referenced by a MIN/MAX aggregate function, it is
referenced only by the following predicates:
{=, <, <=, >, >=, between, is null}.
The function assumes that the select list has already been checked that
each selected attribute is part of an index. As a result this test
essentially checks condition B6 from get_best_group_min_max(), namely
that every attribute referenced in the WHERE clause is part of the index I.
IMPLEMENTATION
The function assumes that the two sets of attributes GA (group_list) and NGA
(non_group_fields) are already checked to satisfy all conditions given in
get_best_group_min_max's specification.
clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
aggregate function, it is referenced only by the following predicates:
{=, !=, <, <=, >, >=, between, is null, is not null}.
RETURN
TRUE if cond passes the test
......@@ -6832,10 +6836,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
*/
static bool
check_group_min_max_predicates(COND *cond, ORDER *group_list,
List<Item_field> &select_fields,
List<Item_field> &non_group_fields,
Item_field *min_max_arg_item)
check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item)
{
DBUG_ENTER("check_group_min_max_predicates");
if (!cond) /* If no WHERE clause, then all is OK. */
......@@ -6849,8 +6850,7 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
Item *and_or_arg;
while ((and_or_arg= li++))
{
if(!check_group_min_max_predicates(and_or_arg, group_list, select_fields,
non_group_fields, min_max_arg_item))
if(!check_group_min_max_predicates(and_or_arg, min_max_arg_item))
DBUG_RETURN(FALSE);
}
DBUG_RETURN(TRUE);
......@@ -6876,14 +6876,16 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
with a constant.
*/
Item_func::Functype pred_type= pred->functype();
if (pred_type != Item_func::EQUAL_FUNC &&
pred_type != Item_func::LT_FUNC &&
pred_type != Item_func::LE_FUNC &&
pred_type != Item_func::GT_FUNC &&
pred_type != Item_func::GE_FUNC &&
pred_type != Item_func::BETWEEN &&
pred_type != Item_func::ISNULL_FUNC &&
pred_type != Item_func::EQ_FUNC)
if (pred_type != Item_func::EQUAL_FUNC &&
pred_type != Item_func::LT_FUNC &&
pred_type != Item_func::LE_FUNC &&
pred_type != Item_func::GT_FUNC &&
pred_type != Item_func::GE_FUNC &&
pred_type != Item_func::BETWEEN &&
pred_type != Item_func::ISNULL_FUNC &&
pred_type != Item_func::ISNOTNULL_FUNC &&
pred_type != Item_func::EQ_FUNC &&
pred_type != Item_func::NE_FUNC)
DBUG_RETURN(FALSE);
/* Check that pred compares min_max_arg_item with a constant. */
......@@ -6893,54 +6895,10 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
if (!simple_pred(pred, args, &inv))
DBUG_RETURN(FALSE);
}
else
{/* pred can be arbitrary predicate, but cur_arg must be in GA or NGA. */
bool is_in_ga_or_nga= FALSE;
Item_field *item_field;
if (non_group_fields.elements > 0)
{
List_iterator<Item_field> non_group_fields_it(non_group_fields);
while ((item_field= non_group_fields_it++))
{
if (cur_arg->eq(item_field, 1))
{
is_in_ga_or_nga= TRUE;
break;
}
}
}
if (group_list && !is_in_ga_or_nga) /* This is a GROUP-BY query. */
{
ORDER *tmp_group;
for (tmp_group= group_list; tmp_group; tmp_group= tmp_group->next)
{
if (cur_arg->eq(*tmp_group->item, 1))
{
is_in_ga_or_nga= TRUE;
break;
}
}
}
else /* This is a DISTINCT query. */
{
List_iterator<Item_field> select_fields_it(select_fields);
while ((item_field= select_fields_it++))
{
if (cur_arg->eq(item_field, 1))
{
is_in_ga_or_nga= TRUE;
break;
}
}
}
if (!is_in_ga_or_nga)
DBUG_RETURN(FALSE);
}
}
else if (cur_arg->type() == Item::FUNC_ITEM)
{
if(!check_group_min_max_predicates(cur_arg, group_list, select_fields,
non_group_fields, min_max_arg_item))
if(!check_group_min_max_predicates(cur_arg, min_max_arg_item))
DBUG_RETURN(FALSE);
}
else if (cur_arg->const_item())
......@@ -6954,128 +6912,50 @@ check_group_min_max_predicates(COND *cond, ORDER *group_list,
DBUG_RETURN(TRUE);
}
/*
Find the key part referenced by a field.
SYNOPSIS
get_field_keypart()
index descriptor of an index
field field that possibly references some key part in index
NOTES
The return value can be used to get a KEY_PART_INFO pointer by
part= index->key_part + get_field_keypart(...) - 1;
RETURN
Positive number which is the consecutive number of the key part, or
0 if field does not reference any index field.
*/
static inline uint
get_field_keypart(KEY *index, Field *field)
{
KEY_PART_INFO *part= index->key_part;
uint key_part_num= 0;
while (part != part + index->key_parts)
{
key_part_num++;
if (field->eq(part->field))
return key_part_num;
part++;
}
return key_part_num;
}
/*
Find the SEL_ARG sub-tree that corresponds to the chosen index.
SYNOPSIS
get_index_range_tree()
index
range_tree
param
param_idx [out]
DESCRIPTION
*/
SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
uint *param_idx)
{
uint idx= 0; /* Index nr in param->key_parts */
while (idx < param->keys)
{
if (index == param->real_keynr[idx])
break;
idx++;
}
*param_idx= idx;
return(range_tree->keys[idx]);
}
/*
Extract a sequence of constants from a conjunction of equality predicates.
SYNOPSIS
get_constant_key_infix()
cond [in] The WHERE clause of the current query
non_group_fields[in] Select fields that are not in GROUP-BY list
index_info [in] The index which key parts are being considered
group_prefix_len [in] Length of the key parts referenced by group
attributes
first_non_group_part [in] First index part after group attribute parts
min_max_part [in] Index keypart referenced by the MIN/MAX argument
key_infix [out]
key_infix_len [out]
index_info [in] Descriptor of the chosen index.
index_range_tree [in] Range tree for the chosen index
first_non_group_part [in] First index part after group attribute parts
min_max_arg_part [in] The keypart of the MIN/MAX argument if any
last_part [in] Last keypart of the index
thd [in] Current thread
key_infix [out] Infix of constants to be used for index lookup
key_infix_len [out] Lenghth of the infix
first_non_infix_part [out] The first keypart after the infix (if any)
DESCRIPTION
Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
for each distinct attribute NG_i from non_group_fields test if:
- there is a constant equality predicate among conds with the form
(NG_i = const_ci) or (const_ci = NG_i), and
- the NG_i attributes are a sub-sequence of the index which starts
immediately after the last group attribute, and ends just before the
MIN/MAX attribute.
Thus all the NG_i attributes must fill the 'gap' between the last group-by
for each keypart field NGF_i not in GROUP-BY, check that there is a constant
equality predicate among conds with the form (NGF_i = const_ci) or
(const_ci = NGF_i).
Thus all the NGF_i attributes must fill the 'gap' between the last group-by
attribute and the MIN/MAX attribute in the index (if present). If these
conditions hold, copy each constant from its corresponding predicate into
key_infix, in the order its NG_i attribute appears in the index, and update
key_infix_len with the total length of the key parts in key_infix.
IMPLEMENTATION
TODO
RETURN
TRUE if non_group_fields pass the test
TRUE if the index passes the test
FALSE o/w
*/
static bool
get_constant_key_infix(List<Item_field> &non_group_fields,
SEL_ARG *index_range_tree,
get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
KEY_PART_INFO *first_non_group_part,
KEY_PART_INFO *end_part,
byte *key_infix, uint *key_infix_len)
KEY_PART_INFO *min_max_arg_part,
KEY_PART_INFO *last_part, THD *thd,
byte *key_infix, uint *key_infix_len,
KEY_PART_INFO **first_non_infix_part)
{
Item_field *non_group_field;
List_iterator<Item_field> non_group_fields_it(non_group_fields);
SEL_ARG *cur_range;
KEY_PART_INFO *cur_part;
DBUG_ASSERT(non_group_fields.elements > 0);
/*
For each keypart in the 'gap' between the last group part, and the min/max
argument (if present) or the last keypart, there must be exactly one
non-group field, therefore their count must match. Notice that
non_group_fields is a set.
*/
if (non_group_fields.elements != (uint)(end_part - first_non_group_part))
return FALSE;
/* End part for the first loop below. */
KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
*key_infix_len= 0;
byte *key_ptr= key_infix;
......@@ -7092,17 +6972,15 @@ get_constant_key_infix(List<Item_field> &non_group_fields,
break;
}
if (!cur_range)
return FALSE; /* The current keypart has no range predicates at all. */
/* Check if there is a matching non-group field. */
non_group_fields_it.rewind();
while ((non_group_field= non_group_fields_it++))
{
if (cur_range->field->eq(non_group_field->field))
break;
if (min_max_arg_part)
return FALSE; /* The current keypart has no range predicates at all. */
else
{
*first_non_infix_part= cur_part;
return TRUE;
}
}
if (!non_group_field)
return FALSE;
/* Check that the current range tree is a single point interval. */
if (cur_range->prev || cur_range->next)
......@@ -7126,10 +7004,85 @@ get_constant_key_infix(List<Item_field> &non_group_fields,
return FALSE;
}
if (!min_max_arg_part && (cur_part == last_part))
*first_non_infix_part= last_part;
return TRUE;
}
/*
Find the key part referenced by a field.
SYNOPSIS
get_field_keypart()
index descriptor of an index
field field that possibly references some key part in index
NOTES
The return value can be used to get a KEY_PART_INFO pointer by
part= index->key_part + get_field_keypart(...) - 1;
RETURN
Positive number which is the consecutive number of the key part, or
0 if field does not reference any index field.
*/
static inline uint
get_field_keypart(KEY *index, Field *field)
{
KEY_PART_INFO *part= index->key_part;
uint key_part_num= 0;
while (part != part + index->key_parts)
{
key_part_num++;
if (field->eq(part->field))
return key_part_num;
part++;
}
return key_part_num;
}
/*
Find the SEL_ARG sub-tree that corresponds to the chosen index.
SYNOPSIS
get_index_range_tree()
index [in] The ID of the index being looked for
range_tree[in] Tree of ranges being searched
param [in] PARAM from SQL_SELECT::test_quick_select
param_idx [out] Index in the array PARAM::key that corresponds to 'index'
DESCRIPTION
A SEL_TREE contains range trees for all usable indexes. This procedure
finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
ordered in the same way as the members of PARAM::key, thus we first find
the corresponding index in the array PARAM::key. This index is returned
through the variable param_idx, to be used later as argument of
check_quick_select().
RETURN
Pointer to the SEL_ARG subtree that corresponds to index.
*/
SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
uint *param_idx)
{
uint idx= 0; /* Index nr in param->key_parts */
while (idx < param->keys)
{
if (index == param->real_keynr[idx])
break;
idx++;
}
*param_idx= idx;
return(range_tree->keys[idx]);
}
TRP_GROUP_MIN_MAX::TRP_GROUP_MIN_MAX(JOIN *join, TABLE* table,
bool have_min, bool have_max,
KEY_PART_INFO *min_max_arg_part,
......@@ -7137,31 +7090,37 @@ TRP_GROUP_MIN_MAX::TRP_GROUP_MIN_MAX(JOIN *join, TABLE* table,
uint group_key_parts, KEY *index_info,
uint index, uint key_infix_len,
byte *key_infix, SEL_TREE *tree,
PARAM *param)
SEL_ARG *index_tree, uint param_idx,
ha_rows quick_prefix_records, PARAM *param)
: join(join), table(table), have_min(have_min), have_max(have_max),
min_max_arg_part(min_max_arg_part), group_prefix_len(group_prefix_len),
used_key_parts(used_key_parts), group_key_parts(group_key_parts),
index_info(index_info), index(index), key_infix_len(key_infix_len),
range_tree(tree)
range_tree(tree), index_tree(index_tree), param_idx(param_idx),
quick_prefix_records(quick_prefix_records)
{
if (key_infix_len)
memcpy(this->key_infix, key_infix, MAX_KEY_LENGTH);
if (range_tree)
{
/* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
index_tree= get_index_range_tree(index, range_tree, param,
&param_idx);
/* Check if this range tree can be used for prefix retrieval. */
quick_prefix_records= check_quick_select(param, param_idx, index_tree);
}
}
/*
Compute the cost of this trp.
Compute the cost of a quick_group_min_max_select for a particular index.
SYNOPSIS
TRP_GROUP_MIN_MAX::update_cost()
cost_group_min_max()
table [in] The table being accessed
index_info [in] The index used to access the table
used_key_parts [in] Number of key parts used to access the index
group_key_parts [in] Number of index key parts in the group prefix
range_tree [in] Tree of ranges for all indexes
index_tree [in] The range tree for the current index
quick_prefix_records [in] Number of records retrieved by the internally used
quick range select if any
have_min [in] True if there is a MIN function
have_max [in] True if there is a MAX function
read_cost [out] The cost to retrieve rows via this quick select
records [out] The number of rows retrieved
DESCRIPTION
This method computes the access cost of a TRP_GROUP_MIN_MAX instance and the
......@@ -7205,7 +7164,11 @@ TRP_GROUP_MIN_MAX::TRP_GROUP_MIN_MAX(JOIN *join, TABLE* table,
None
*/
void TRP_GROUP_MIN_MAX::update_cost()
void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
uint group_key_parts, SEL_TREE *range_tree,
SEL_ARG *index_tree, ha_rows quick_prefix_records,
bool have_min, bool have_max,
double *read_cost, ha_rows *records)
{
uint table_records;
uint num_groups;
......@@ -7219,7 +7182,7 @@ void TRP_GROUP_MIN_MAX::update_cost()
double io_cost;
double cpu_cost= 0; /* TODO: CPU cost of index_read calls? */
DBUG_ENTER("TRP_GROUP_MIN_MAX::update_cost");
DBUG_ENTER("TRP_GROUP_MIN_MAX::cost");
table_records= table->file->records;
keys_per_block= (table->file->block_size / 2 /
(index_info->key_length + table->file->ref_length)
......@@ -7270,8 +7233,8 @@ void TRP_GROUP_MIN_MAX::update_cost()
*/
cpu_cost= (double) num_groups / TIME_FOR_COMPARE;
read_cost= io_cost /*+ cpu_cost*/;
records= num_groups;
*read_cost= io_cost /*+ cpu_cost*/;
*records= num_groups;
DBUG_PRINT("info",
("records=%u, keys/block=%u, keys/group=%u, records=%u, blocks=%u",
......@@ -7434,6 +7397,12 @@ QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT(
SYNOPSIS
QUICK_GROUP_MIN_MAX_SELECT::init()
DESCRIPTION
The method performs initialization that cannot be done in the constructor
such as memory allocations that may fail. It allocates memory for the
group prefix and inifix buffers, and for the lists of MIN/MAX item to be
updated during execution.
RETURN
0 OK
other Error code
......@@ -7472,23 +7441,46 @@ int QUICK_GROUP_MIN_MAX_SELECT::init()
if(my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16))
return 1;
if (!(min_functions= new List<Item_sum>))
return 1;
if (!(max_functions= new List<Item_sum>))
return 1;
if (have_min)
{
if(!(min_functions= new List<Item_sum>))
return 1;
}
else
min_functions= NULL;
if (have_max)
{
if(!(max_functions= new List<Item_sum>))
return 1;
}
else
max_functions= NULL;
List_iterator<Item> select_items_it(join->fields_list);
Item *item;
while ((item= select_items_it++))
Item_sum *min_max_item;
Item_sum **func_ptr= join->sum_funcs;
while ((min_max_item= *(func_ptr++)))
{
item->walk(&Item::collect_item_sum_min_processor, (byte*) min_functions);
item->walk(&Item::collect_item_sum_max_processor, (byte*) max_functions);
if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
min_functions->push_back(min_max_item);
else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
max_functions->push_back(min_max_item);
}
if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
return 1;
if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
return 1;
if (have_min)
{
if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
return 1;
}
else
min_functions_it= NULL;
if (have_max)
{
if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
return 1;
}
else
max_functions_it= NULL;
}
return 0;
......@@ -7619,6 +7611,10 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
SYNOPSIS
QUICK_GROUP_MIN_MAX_SELECT::reset()
DESCRIPTION
Initialize the index chosen for access and find and store the prefix
of the last group. The method is expensive since it performs disk access.
RETURN
0 OK
other Error code
......@@ -8211,6 +8207,21 @@ void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
}
/*
Append comma-separated list of keys this quick select uses to key_names;
append comma-separated list of corresponding used lengths to used_lengths.
SYNOPSIS
QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
key_names [out] Names of used indexes
used_lengths [out] Corresponding lengths of the index names
DESCRIPTION
This method is used by select_describe to extract the names of the
indexes used by a quick select.
*/
void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
String *used_lengths)
{
......@@ -8222,6 +8233,26 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
}
/*
Print quick select information to DBUG_FILE.
SYNOPSIS
QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
indent Indentation offset
verbose If TRUE show more detailed output.
DESCRIPTION
Print the contents of this quick select to DBUG_FILE. The method also
calls dbug_dump() for the used quick select if any.
IMPLEMENTATION
Caller is responsible for locking DBUG_FILE before this call and unlocking
it afterwards.
RETURN
None
*/
void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
{
fprintf(DBUG_FILE,
......
......@@ -1327,7 +1327,7 @@ JOIN::exec()
}
}
if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list,
1) ||
1, TRUE) ||
(tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
0)))
{
......@@ -1415,7 +1415,7 @@ JOIN::exec()
set_items_ref_array(items3);
if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list,
1) || thd->is_fatal_error)
1, TRUE) || thd->is_fatal_error)
DBUG_VOID_RETURN;
}
if (curr_join->group_list || curr_join->order)
......@@ -10765,6 +10765,7 @@ bool JOIN::alloc_func_list()
field_list All items
send_fields Items in select list
before_group_by Set to 1 if this is called before GROUP BY handling
recompute Set to TRUE if sum_funcs must be recomputed
NOTES
Calls ::setup() for all item_sum objects in field_list
......@@ -10775,13 +10776,16 @@ bool JOIN::alloc_func_list()
*/
bool JOIN::make_sum_func_list(List<Item> &field_list, List<Item> &send_fields,
bool before_group_by)
bool before_group_by, bool recompute)
{
List_iterator_fast<Item> it(field_list);
Item_sum **func;
Item *item;
DBUG_ENTER("make_sum_func_list");
if (*sum_funcs && !recompute)
DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
func= sum_funcs;
while ((item=it++))
{
......
......@@ -303,7 +303,7 @@ class JOIN :public Sql_alloc
void restore_tmp();
bool alloc_func_list();
bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
bool before_group_by);
bool before_group_by, bool recompute= FALSE);
inline void set_items_ref_array(Item **ptr)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment