Commit a006e88c authored by Varun Gupta's avatar Varun Gupta

MDEV-11563: GROUP_CONCAT(DISTINCT ...) may produce a non-distinct list

Backported from MYSQL
Bug #25331425: DISTINCT CLAUSE DOES NOT WORK IN GROUP_CONCAT
Issue:
------
The problem occurs when:
1) GROUP_CONCAT (DISTINCT ....) is used in the query.
2) Data size greater than value of system variable:
tmp_table_size.

The result would contain values that are non-unique.

Root cause:
-----------
An in-memory structure is used to filter out non-unique
values. When the data size exceeds tmp_table_size, the
overflow is written to disk as a separate file. The
expectation here is that when all such files are merged,
the full set of unique values can be obtained.

But the Item_func_group_concat::add function is in a bit of
hurry. Even as it is adding values to the tree, it wants to
decide if a value is unique and write it to the result
buffer. This works fine if the configured maximum size is
greater than the size of the data. But since tmp_table_size
is set to a low value, the size of the tree is smaller and
hence requires the creation of multiple copies on disk.

Item_func_group_concat currently has no mechanism to merge
all the copies on disk and then generate the result. This
results in duplicate values.

Solution:
---------
In case of the DISTINCT clause, don't write to the result
buffer immediately. Do the merge and only then put the
unique values in the result buffer. This has be done in
Item_func_group_concat::val_str.

Note regarding result file changes:
-----------------------------------
Earlier when a unique value was seen in
Item_func_group_concat::add, it was dumped to the output.
So result is in the order stored in SE. But with this fix,
we wait until all the data is read and the final set of
unique values are written to output buffer. So the data
appears in the sorted order.

This only fixes the cases when we have DISTINCT without ORDER BY clause
in GROUP_CONCAT.
parent fd1755e4
...@@ -363,8 +363,8 @@ bb,ccc,a,bb,ccc ...@@ -363,8 +363,8 @@ bb,ccc,a,bb,ccc
BB,CCC,A,BB,CCC BB,CCC,A,BB,CCC
select group_concat(distinct b) from t1 group by a; select group_concat(distinct b) from t1 group by a;
group_concat(distinct b) group_concat(distinct b)
bb,ccc,a a,bb,ccc
BB,CCC,A A,BB,CCC
select group_concat(b order by b) from t1 group by a; select group_concat(b order by b) from t1 group by a;
group_concat(b order by b) group_concat(b order by b)
a,bb,bb,ccc,ccc a,bb,bb,ccc,ccc
...@@ -383,11 +383,11 @@ Warning 1260 Row 2 was cut by GROUP_CONCAT() ...@@ -383,11 +383,11 @@ Warning 1260 Row 2 was cut by GROUP_CONCAT()
Warning 1260 Row 4 was cut by GROUP_CONCAT() Warning 1260 Row 4 was cut by GROUP_CONCAT()
select group_concat(distinct b) from t1 group by a; select group_concat(distinct b) from t1 group by a;
group_concat(distinct b) group_concat(distinct b)
bb,c a,bb
BB,C A,BB
Warnings: Warnings:
Warning 1260 Row 2 was cut by GROUP_CONCAT() Warning 1260 Row 3 was cut by GROUP_CONCAT()
Warning 1260 Row 4 was cut by GROUP_CONCAT() Warning 1260 Row 6 was cut by GROUP_CONCAT()
select group_concat(b order by b) from t1 group by a; select group_concat(b order by b) from t1 group by a;
group_concat(b order by b) group_concat(b order by b)
a,bb a,bb
...@@ -413,8 +413,8 @@ bb,ccc,a,bb,ccc,1111111111111111111111111111111111111111111111111111111111111111 ...@@ -413,8 +413,8 @@ bb,ccc,a,bb,ccc,1111111111111111111111111111111111111111111111111111111111111111
BB,CCC,A,BB,CCC,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001 BB,CCC,A,BB,CCC,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001
select group_concat(distinct b) from t1 group by a; select group_concat(distinct b) from t1 group by a;
group_concat(distinct b) group_concat(distinct b)
bb,ccc,a,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,a,bb,ccc
BB,CCC,A,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,A,BB,CCC
select group_concat(b order by b) from t1 group by a; select group_concat(b order by b) from t1 group by a;
group_concat(b order by b) group_concat(b order by b)
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,a,bb,bb,ccc,ccc 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,a,bb,bb,ccc,ccc
...@@ -433,11 +433,11 @@ Warning 1260 Row 7 was cut by GROUP_CONCAT() ...@@ -433,11 +433,11 @@ Warning 1260 Row 7 was cut by GROUP_CONCAT()
Warning 1260 Row 14 was cut by GROUP_CONCAT() Warning 1260 Row 14 was cut by GROUP_CONCAT()
select group_concat(distinct b) from t1 group by a; select group_concat(distinct b) from t1 group by a;
group_concat(distinct b) group_concat(distinct b)
bb,ccc,a,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
BB,CCC,A,1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111112,00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
Warnings: Warnings:
Warning 1260 Row 5 was cut by GROUP_CONCAT() Warning 1260 Row 2 was cut by GROUP_CONCAT()
Warning 1260 Row 10 was cut by GROUP_CONCAT() Warning 1260 Row 4 was cut by GROUP_CONCAT()
select group_concat(b order by b) from t1 group by a; select group_concat(b order by b) from t1 group by a;
group_concat(b order by b) group_concat(b order by b)
0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001,11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
...@@ -520,9 +520,9 @@ a group_concat(b) ...@@ -520,9 +520,9 @@ a group_concat(b)
NULL 3,4,2,1,2,7,3,3 NULL 3,4,2,1,2,7,3,3
select a, group_concat(distinct b) from t1 group by a with rollup; select a, group_concat(distinct b) from t1 group by a with rollup;
a group_concat(distinct b) a group_concat(distinct b)
1 3,4,2,1 1 1,2,3,4
2 7,3 2 3,7
NULL 3,4,2,1,7 NULL 1,2,3,4,7
select a, group_concat(b order by b) from t1 group by a with rollup; select a, group_concat(b order by b) from t1 group by a with rollup;
a group_concat(b order by b) a group_concat(b order by b)
1 1,2,2,3,4 1 1,2,2,3,4
...@@ -745,10 +745,10 @@ CREATE TABLE t1(a TEXT, b CHAR(20)); ...@@ -745,10 +745,10 @@ CREATE TABLE t1(a TEXT, b CHAR(20));
INSERT INTO t1 VALUES ("one.1","one.1"),("two.2","two.2"),("one.3","one.3"); INSERT INTO t1 VALUES ("one.1","one.1"),("two.2","two.2"),("one.3","one.3");
SELECT GROUP_CONCAT(DISTINCT UCASE(a)) FROM t1; SELECT GROUP_CONCAT(DISTINCT UCASE(a)) FROM t1;
GROUP_CONCAT(DISTINCT UCASE(a)) GROUP_CONCAT(DISTINCT UCASE(a))
ONE.1,TWO.2,ONE.3 ONE.1,ONE.3,TWO.2
SELECT GROUP_CONCAT(DISTINCT UCASE(b)) FROM t1; SELECT GROUP_CONCAT(DISTINCT UCASE(b)) FROM t1;
GROUP_CONCAT(DISTINCT UCASE(b)) GROUP_CONCAT(DISTINCT UCASE(b))
ONE.1,TWO.2,ONE.3 ONE.1,ONE.3,TWO.2
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1( a VARCHAR( 10 ), b INT ); CREATE TABLE t1( a VARCHAR( 10 ), b INT );
INSERT INTO t1 VALUES ( repeat( 'a', 10 ), 1), INSERT INTO t1 VALUES ( repeat( 'a', 10 ), 1),
...@@ -847,7 +847,7 @@ create table t1(a bit(2) not null); ...@@ -847,7 +847,7 @@ create table t1(a bit(2) not null);
insert into t1 values (1), (0), (0), (3), (1); insert into t1 values (1), (0), (0), (3), (1);
select group_concat(distinct a) from t1; select group_concat(distinct a) from t1;
group_concat(distinct a) group_concat(distinct a)
1,0,3 0,1,3
select group_concat(distinct a order by a) from t1; select group_concat(distinct a order by a) from t1;
group_concat(distinct a order by a) group_concat(distinct a order by a)
0,1,3 0,1,3
...@@ -860,13 +860,13 @@ insert into t1 values (1, 'a', 0), (0, 'b', 1), (0, 'c', 0), (3, 'd', 1), ...@@ -860,13 +860,13 @@ insert into t1 values (1, 'a', 0), (0, 'b', 1), (0, 'c', 0), (3, 'd', 1),
(1, 'e', 1), (3, 'f', 1), (0, 'g', 1); (1, 'e', 1), (3, 'f', 1), (0, 'g', 1);
select group_concat(distinct a, c) from t1; select group_concat(distinct a, c) from t1;
group_concat(distinct a, c) group_concat(distinct a, c)
10,01,00,31,11 00,01,10,11,31
select group_concat(distinct a, c order by a) from t1; select group_concat(distinct a, c order by a) from t1;
group_concat(distinct a, c order by a) group_concat(distinct a, c order by a)
00,01,11,10,31 00,01,11,10,31
select group_concat(distinct a, c) from t1; select group_concat(distinct a, c) from t1;
group_concat(distinct a, c) group_concat(distinct a, c)
10,01,00,31,11 00,01,10,11,31
select group_concat(distinct a, c order by a, c) from t1; select group_concat(distinct a, c order by a, c) from t1;
group_concat(distinct a, c order by a, c) group_concat(distinct a, c order by a, c)
00,01,10,11,31 00,01,10,11,31
...@@ -1333,8 +1333,8 @@ select grp,group_concat(c limit 5.5...' at line 1 ...@@ -1333,8 +1333,8 @@ select grp,group_concat(c limit 5.5...' at line 1
select grp,group_concat(distinct c limit 1,10 ) from t1 group by grp; select grp,group_concat(distinct c limit 1,10 ) from t1 group by grp;
grp group_concat(distinct c limit 1,10 ) grp group_concat(distinct c limit 1,10 )
1 c 1 c
2 b 2 c
3 C,D 3 D,E
select grp,group_concat(c order by a) from t1 group by grp; select grp,group_concat(c order by a) from t1 group by grp;
grp group_concat(c order by a) grp group_concat(c order by a)
1 b,c 1 b,c
...@@ -1370,6 +1370,15 @@ grp group_concat(c order by c desc limit 2) ...@@ -1370,6 +1370,15 @@ grp group_concat(c order by c desc limit 2)
1 c,b 1 c,b
2 c,b 2 c,b
3 E,E 3 E,E
#
# Empty results for group concat as offset is greater than the rows
# for a group
#
select grp,group_concat(distinct c limit 10,1 ) from t1 group by grp;
grp group_concat(distinct c limit 10,1 )
1
2
3
drop table t1; drop table t1;
create table t2 (a int, b varchar(10)); create table t2 (a int, b varchar(10));
insert into t2 values(1,'a'),(1,'b'),(NULL,'c'),(2,'x'),(2,'y'); insert into t2 values(1,'a'),(1,'b'),(NULL,'c'),(2,'x'),(2,'y');
......
...@@ -986,6 +986,13 @@ select grp,group_concat(c order by c limit 2) from t1 group by grp; ...@@ -986,6 +986,13 @@ select grp,group_concat(c order by c limit 2) from t1 group by grp;
select grp,group_concat(c order by c desc) from t1 group by grp; select grp,group_concat(c order by c desc) from t1 group by grp;
select grp,group_concat(c order by c desc limit 2) from t1 group by grp; select grp,group_concat(c order by c desc limit 2) from t1 group by grp;
--echo #
--echo # Empty results for group concat as offset is greater than the rows
--echo # for a group
--echo #
select grp,group_concat(distinct c limit 10,1 ) from t1 group by grp;
drop table t1; drop table t1;
create table t2 (a int, b varchar(10)); create table t2 (a int, b varchar(10));
......
...@@ -3632,23 +3632,26 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), ...@@ -3632,23 +3632,26 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
ulonglong *offset_limit= &item->copy_offset_limit; ulonglong *offset_limit= &item->copy_offset_limit;
ulonglong *row_limit = &item->copy_row_limit; ulonglong *row_limit = &item->copy_row_limit;
if (item->limit_clause && !(*row_limit)) if (item->limit_clause && !(*row_limit))
{
item->result_finalized= true;
return 1; return 1;
}
if (item->no_appended)
item->no_appended= FALSE;
else
result->append(*item->separator);
tmp.length(0); tmp.length(0);
if (item->limit_clause && (*offset_limit)) if (item->limit_clause && (*offset_limit))
{ {
item->row_count++; item->row_count++;
item->no_appended= TRUE;
(*offset_limit)--; (*offset_limit)--;
return 0; return 0;
} }
if (!item->result_finalized)
item->result_finalized= true;
else
result->append(*item->separator);
for (; arg < arg_end; arg++) for (; arg < arg_end; arg++)
{ {
String *res; String *res;
...@@ -3904,7 +3907,7 @@ void Item_func_group_concat::clear() ...@@ -3904,7 +3907,7 @@ void Item_func_group_concat::clear()
result.copy(); result.copy();
null_value= TRUE; null_value= TRUE;
warning_for_row= FALSE; warning_for_row= FALSE;
no_appended= TRUE; result_finalized= false;
if (offset_limit) if (offset_limit)
copy_offset_limit= offset_limit->val_int(); copy_offset_limit= offset_limit->val_int();
if (row_limit) if (row_limit)
...@@ -4040,12 +4043,10 @@ bool Item_func_group_concat::add(bool exclude_nulls) ...@@ -4040,12 +4043,10 @@ bool Item_func_group_concat::add(bool exclude_nulls)
tree_len+= row_str_len; tree_len+= row_str_len;
} }
/* /*
If the row is not a duplicate (el->count == 1) In case of GROUP_CONCAT with DISTINCT or ORDER BY (or both) don't dump the
we can dump the row here in case of GROUP_CONCAT(DISTINCT...) row to the output buffer here. That will be done in val_str.
instead of doing tree traverse later.
*/ */
if (row_eligible && !warning_for_row && if (row_eligible && !warning_for_row && (!tree && !distinct))
(!tree || (el->count == 1 && distinct && !arg_count_order)))
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this); dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
return 0; return 0;
...@@ -4278,9 +4279,18 @@ String* Item_func_group_concat::val_str(String* str) ...@@ -4278,9 +4279,18 @@ String* Item_func_group_concat::val_str(String* str)
DBUG_ASSERT(fixed == 1); DBUG_ASSERT(fixed == 1);
if (null_value) if (null_value)
return 0; return 0;
if (no_appended && tree)
/* Tree is used for sorting as in ORDER BY */ if (!result_finalized) // Result yet to be written.
{
if (tree != NULL) // order by
tree_walk(tree, &dump_leaf_key, this, left_root_right); tree_walk(tree, &dump_leaf_key, this, left_root_right);
else if (distinct) // distinct (and no order by).
unique_filter->walk(table, &dump_leaf_key, this);
else if (row_limit && copy_row_limit == (ulonglong)row_limit->val_int())
return &result;
else
DBUG_ASSERT(false); // Can't happen
}
if (table && table->blob_storage && if (table && table->blob_storage &&
table->blob_storage->is_truncated_value()) table->blob_storage->is_truncated_value())
......
...@@ -1887,7 +1887,8 @@ class Item_func_group_concat : public Item_sum ...@@ -1887,7 +1887,8 @@ class Item_func_group_concat : public Item_sum
bool warning_for_row; bool warning_for_row;
bool always_null; bool always_null;
bool force_copy_fields; bool force_copy_fields;
bool no_appended; /** True if entire result of GROUP_CONCAT has been written to output buffer. */
bool result_finalized;
/** Limits the rows in the result */ /** Limits the rows in the result */
Item *row_limit; Item *row_limit;
/** Skips a particular number of rows in from the result*/ /** Skips a particular number of rows in from the result*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment