Commit 35a3f9d7 authored by Neeraj Bisht's avatar Neeraj Bisht

Bug#12328597 - MULTIPLE COUNT(DISTINCT) IN SAME SELECT FALSE

               WITH COMPOSITE KEY COLUMNS

Problem:-
While running a SELECT query with several AGGR(DISTINCT) function 
and these are referring to different field of same composite key, 
Returned incorrect value.

Analysis:-

In a table, where we have composite key like (a,b,c)
and when we give a query like

select COUNT(DISTINCT b), SUM(DISTINCT a) from ....

here, we first make a list of items in Aggr(distinct) function
(which is a, b), where order of item doesn't matter. 
and then we see, whether we have a composite key where the prefix 
of index columns matches the items of the aggregation function.
(in this case we have a,b,c).

if yes, so we can use loose index scan and we need not perform 
duplicate removal to distinct in our aggregate function.

In our table, we traverse column marked with <-- and get the result as
(a,b,c)      count(distinct b)           sum(distinct a)
             treated as count b          treated as sum(a)
(1,1,2)<--              1                      1		
(1,2,2)<--              1++=2                  1+1=2
(1,2,3)		
(2,1,2)<--              2++=3                  1+1+2=4
(2,2,2)<--              3++=4                  1+1+2+2=6
(2,2,3)

result will be 4,6, but it should be (2,3)

As in this case, our assumption is incorrect. If we have
query like 
select count(distinct a,b), sum(distinct a,b)from ..
then we can use loose index scan

Solution:-
In our query, when we have more then one aggr(distinct) function 
then they should refer to same  fields like

select count(distinct a,b), sum(distinct a,b) from .. 

-->we can use loose scan index as both aggr(distinct) refer to same fields a,b.

If they are referring to different field like

select count(distinct a), sum(distinct b) from .. 

-->will not use loose scan index as both aggr(distinct) refer to different fields.
parent 05111d32
...@@ -2858,9 +2858,10 @@ COUNT(DISTINCT a, b + 0) ...@@ -2858,9 +2858,10 @@ COUNT(DISTINCT a, b + 0)
16 16
EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL a 10 NULL 9 Using index for group-by 1 SIMPLE t1 index NULL a 10 NULL 16 Using index
SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10; SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
COUNT(DISTINCT a) COUNT(DISTINCT a)
2
EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10; EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 1 SIMPLE t1 ALL NULL NULL NULL NULL 16
...@@ -2994,7 +2995,7 @@ WHERE b = 13 AND c = 42 GROUP BY a; ...@@ -2994,7 +2995,7 @@ WHERE b = 13 AND c = 42 GROUP BY a;
a COUNT(DISTINCT a) SUM(DISTINCT a) a COUNT(DISTINCT a) SUM(DISTINCT a)
EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL a 10 NULL 9 Using where; Using index for group-by 1 SIMPLE t2 index NULL a 15 NULL 16 Using where; Using index
SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
COUNT(DISTINCT a, b) SUM(DISTINCT a) COUNT(DISTINCT a, b) SUM(DISTINCT a)
0 NULL 0 NULL
......
...@@ -1207,6 +1207,8 @@ EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 ...@@ -1207,6 +1207,8 @@ EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2 SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
WHERE b = 13 AND c = 42 GROUP BY a; WHERE b = 13 AND c = 42 GROUP BY a;
# This query could have been resolved using loose index scan since the second
# part of count(..) is defined by a constant predicate
EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42; SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
......
...@@ -71,6 +71,7 @@ template <uint default_width> class Bitmap ...@@ -71,6 +71,7 @@ template <uint default_width> class Bitmap
my_bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); } my_bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); }
my_bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); } my_bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); }
my_bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); } my_bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); }
my_bool operator!=(const Bitmap& map2) const { return !(*this == map2); }
char *print(char *buf) const char *print(char *buf) const
{ {
char *s=buf; char *s=buf;
......
...@@ -4204,8 +4204,23 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array) ...@@ -4204,8 +4204,23 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
Optionally (if out_args is supplied) will push the arguments of Optionally (if out_args is supplied) will push the arguments of
AGGFN(DISTINCT) to the list AGGFN(DISTINCT) to the list
Check for every COUNT(DISTINCT), AVG(DISTINCT) or
SUM(DISTINCT). These can be resolved by Loose Index Scan as long
as all the aggregate distinct functions refer to the same
fields. Thus:
SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS
SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS
SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS
etc.
@param join the join to check @param join the join to check
@param[out] out_args list of aggregate function arguments @param[out] out_args Collect the arguments of the aggregate functions
to a list. We don't worry about duplicates as
these will be sorted out later in
get_best_group_min_max.
@return does the query qualify for indexed AGGFN(DISTINCT) @return does the query qualify for indexed AGGFN(DISTINCT)
@retval true it does @retval true it does
@retval false AGGFN(DISTINCT) must apply distinct in it. @retval false AGGFN(DISTINCT) must apply distinct in it.
...@@ -4216,6 +4231,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args) ...@@ -4216,6 +4231,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
{ {
Item_sum **sum_item_ptr; Item_sum **sum_item_ptr;
bool result= false; bool result= false;
Field_map first_aggdistinct_fields;
if (join->tables != 1 || /* reference more than 1 table */ if (join->tables != 1 || /* reference more than 1 table */
join->select_distinct || /* or a DISTINCT */ join->select_distinct || /* or a DISTINCT */
...@@ -4228,6 +4244,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args) ...@@ -4228,6 +4244,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++) for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
{ {
Item_sum *sum_item= *sum_item_ptr; Item_sum *sum_item= *sum_item_ptr;
Field_map cur_aggdistinct_fields;
Item *expr; Item *expr;
/* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */ /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
switch (sum_item->sum_func()) switch (sum_item->sum_func())
...@@ -4257,15 +4274,23 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args) ...@@ -4257,15 +4274,23 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
if (expr->real_item()->type() != Item::FIELD_ITEM) if (expr->real_item()->type() != Item::FIELD_ITEM)
return false; return false;
/* Item_field* item= static_cast<Item_field*>(expr->real_item());
If we came to this point the AGGFN(DISTINCT) loose index scan
optimization is applicable
*/
if (out_args) if (out_args)
out_args->push_back((Item_field *) expr->real_item()); out_args->push_back(item);
cur_aggdistinct_fields.set_bit(item->field->field_index);
result= true; result= true;
} }
/*
If there are multiple aggregate functions, make sure that they all
refer to exactly the same set of columns.
*/
if (first_aggdistinct_fields.is_clear_all())
first_aggdistinct_fields.merge(cur_aggdistinct_fields);
else if (first_aggdistinct_fields != cur_aggdistinct_fields)
return false;
} }
return result; return result;
} }
......
...@@ -894,6 +894,9 @@ enum index_hint_type ...@@ -894,6 +894,9 @@ enum index_hint_type
INDEX_HINT_FORCE INDEX_HINT_FORCE
}; };
/* Bitmap of table's fields */
typedef Bitmap<MAX_FIELDS> Field_map;
struct TABLE struct TABLE
{ {
TABLE() {} /* Remove gcc warning */ TABLE() {} /* Remove gcc warning */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment