Commit 0f6f0daa authored by Varun Gupta's avatar Varun Gupta

MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results

For DISTINCT to be handled with JSON_ARRAYAGG, we need to make sure
that the Unique tree also holds the NULL bytes of a table record
inside the node of the tree. This behaviour for JSON_ARRAYAGG is
different from GROUP_CONCAT because in GROUP_CONCAT we just reject
NULL values for columns.

Also introduced a comparator function for the unique tree to handle null
values for distinct inside JSON_ARRAYAGG.
parent a006e88c
...@@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a) ...@@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a)
[1,2,3] [1,2,3]
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1; SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
JSON_ARRAYAGG(DISTINCT b) JSON_ARRAYAGG(DISTINCT b)
["Hello","World","This","Will","Work","!",null] [null,"!","Hello","This","Will","Work","World"]
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1; SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
JSON_ARRAYAGG(DISTINCT a LIMIT 2) JSON_ARRAYAGG(DISTINCT a LIMIT 2)
[1,2] [1,2]
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1; SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
JSON_ARRAYAGG(DISTINCT b LIMIT 2) JSON_ARRAYAGG(DISTINCT b LIMIT 2)
["Hello","World"] [null,"!"]
# #
# JSON aggregation # JSON aggregation
# #
...@@ -1247,5 +1247,42 @@ select json_object('x', json_arrayagg(json_object('a', 1))); ...@@ -1247,5 +1247,42 @@ select json_object('x', json_arrayagg(json_object('a', 1)));
json_object('x', json_arrayagg(json_object('a', 1))) json_object('x', json_arrayagg(json_object('a', 1)))
{"x": [{"a": 1}]} {"x": [{"a": 1}]}
# #
# MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
#
CREATE TABLE t1(a INT, b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
[1,2,3,1,2,3]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[1,2,3]
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
[1,2,3,1,2,3,null,null]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[null,1,2,3]
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(10), b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
["1","2","3","1","2","3"]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
["1","2","3"]
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
["1","2","3","1","2","3",null,null]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[null,"1","2","3"]
DROP TABLE t1;
#
# End of 10.5 tests # End of 10.5 tests
# #
...@@ -763,6 +763,37 @@ select json_arrayagg(a order by a asc) from (select 1 a union select 2 a) t; ...@@ -763,6 +763,37 @@ select json_arrayagg(a order by a asc) from (select 1 a union select 2 a) t;
select json_object('x', json_arrayagg(json_object('a', 1))); select json_object('x', json_arrayagg(json_object('a', 1)));
--echo #
--echo # MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
--echo #
CREATE TABLE t1(a INT, b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(10), b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
DROP TABLE t1;
--echo # --echo #
--echo # End of 10.5 tests --echo # End of 10.5 tests
--echo # --echo #
......
...@@ -1461,7 +1461,7 @@ static int append_json_value_from_field(String *str, ...@@ -1461,7 +1461,7 @@ static int append_json_value_from_field(String *str,
const char *t_f; const char *t_f;
int t_f_len; int t_f_len;
if (f->is_null(offset)) if (f->is_null_in_record(key))
goto append_null; goto append_null;
if (v_int) if (v_int)
...@@ -1479,7 +1479,7 @@ static int append_json_value_from_field(String *str, ...@@ -1479,7 +1479,7 @@ static int append_json_value_from_field(String *str,
} }
{ {
String *sv= f->val_str(tmp_val, key + offset); String *sv= f->val_str(tmp_val, key + offset);
if (f->is_null(offset)) if (f->is_null_in_record(key))
goto append_null; goto append_null;
if (i->is_json_type()) if (i->is_json_type())
return str->append(sv->ptr(), sv->length()); return str->append(sv->ptr(), sv->length());
......
...@@ -3554,6 +3554,63 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1, ...@@ -3554,6 +3554,63 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
} }
/*
@brief
Comparator function for DISTINCT clause taking into account NULL values.
@note
Used for JSON_ARRAYAGG function
*/
int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1_arg,
const void* key2_arg)
{
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes;
uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes;
/*
JSON_ARRAYAGG function only accepts one argument.
*/
Item *item= item_func->args[0];
/*
If item is a const item then either get_tmp_table_field returns 0
or it is an item over a const table.
*/
if (item->const_item())
return 0;
/*
We have to use get_tmp_table_field() instead of
real_item()->get_tmp_table_field() because we want the field in
the temporary table, not the original field
*/
Field *field= item->get_tmp_table_field();
if (!field)
return 0;
if (field->is_null_in_record((uchar*)key1_arg) &&
field->is_null_in_record((uchar*)key2_arg))
return 0;
if (field->is_null_in_record((uchar*)key1_arg))
return -1;
if (field->is_null_in_record((uchar*)key2_arg))
return 1;
uint offset= (field->offset(field->table->record[0]) -
field->table->s->null_bytes);
int res= field->cmp(key1 + offset, key2 + offset);
if (res)
return res;
return 0;
}
/** /**
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... ) function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
*/ */
...@@ -3672,7 +3729,8 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), ...@@ -3672,7 +3729,8 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
uint offset= (field->offset(field->table->record[0]) - uint offset= (field->offset(field->table->record[0]) -
table->s->null_bytes); table->s->null_bytes);
DBUG_ASSERT(offset < table->s->reclength); DBUG_ASSERT(offset < table->s->reclength);
res= item->get_str_from_field(*arg, field, &tmp, key, offset); res= item->get_str_from_field(*arg, field, &tmp, key,
offset + item->get_null_bytes());
} }
else else
res= item->get_str_from_item(*arg, &tmp); res= item->get_str_from_item(*arg, &tmp);
...@@ -4012,6 +4070,14 @@ bool Item_func_group_concat::add(bool exclude_nulls) ...@@ -4012,6 +4070,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
if (tree && (res= field->val_str(&buf))) if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length(); row_str_len+= res->length();
} }
else
{
/*
should not reach here, we create temp table for all the arguments of
the group_concat function
*/
DBUG_ASSERT(0);
}
} }
null_value= FALSE; null_value= FALSE;
...@@ -4021,7 +4087,7 @@ bool Item_func_group_concat::add(bool exclude_nulls) ...@@ -4021,7 +4087,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
{ {
/* Filter out duplicate rows. */ /* Filter out duplicate rows. */
uint count= unique_filter->elements_in_tree(); uint count= unique_filter->elements_in_tree();
unique_filter->unique_add(table->record[0] + table->s->null_bytes); unique_filter->unique_add(get_record_pointer());
if (count == unique_filter->elements_in_tree()) if (count == unique_filter->elements_in_tree())
row_eligible= FALSE; row_eligible= FALSE;
} }
...@@ -4047,7 +4113,7 @@ bool Item_func_group_concat::add(bool exclude_nulls) ...@@ -4047,7 +4113,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
row to the output buffer here. That will be done in val_str. row to the output buffer here. That will be done in val_str.
*/ */
if (row_eligible && !warning_for_row && (!tree && !distinct)) if (row_eligible && !warning_for_row && (!tree && !distinct))
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this); dump_leaf_key(get_record_pointer(), 1, this);
return 0; return 0;
} }
...@@ -4247,9 +4313,9 @@ bool Item_func_group_concat::setup(THD *thd) ...@@ -4247,9 +4313,9 @@ bool Item_func_group_concat::setup(THD *thd)
} }
if (distinct) if (distinct)
unique_filter= new Unique(group_concat_key_cmp_with_distinct, unique_filter= new Unique(get_comparator_function_for_distinct(),
(void*)this, (void*)this,
tree_key_length, tree_key_length + get_null_bytes(),
ram_limitation(thd)); ram_limitation(thd));
if ((row_limit && row_limit->cmp_type() != INT_RESULT) || if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
(offset_limit && offset_limit->cmp_type() != INT_RESULT)) (offset_limit && offset_limit->cmp_type() != INT_RESULT))
...@@ -4305,6 +4371,53 @@ String* Item_func_group_concat::val_str(String* str) ...@@ -4305,6 +4371,53 @@ String* Item_func_group_concat::val_str(String* str)
} }
/*
@brief
Get the comparator function for DISTINT clause
*/
qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
{
return skip_nulls() ?
group_concat_key_cmp_with_distinct :
group_concat_key_cmp_with_distinct_with_nulls;
}
/*
@brief
Get the record pointer of the current row of the table
@details
look at the comments for Item_func_group_concat::get_null_bytes
*/
uchar* Item_func_group_concat::get_record_pointer()
{
return skip_nulls() ?
table->record[0] + table->s->null_bytes :
table->record[0];
}
/*
@brief
Get the null bytes for the table if required.
@details
This function is used for GROUP_CONCAT (or JSON_ARRAYAGG) implementation
where the Unique tree or the ORDER BY tree may store the null values,
in such case we also store the null bytes inside each node of the tree.
*/
uint Item_func_group_concat::get_null_bytes()
{
return skip_nulls() ? 0 : table->s->null_bytes;
}
void Item_func_group_concat::print(String *str, enum_query_type query_type) void Item_func_group_concat::print(String *str, enum_query_type query_type)
{ {
str->append(func_name()); str->append(func_name());
......
...@@ -1849,6 +1849,8 @@ class Item_sum_udf_str :public Item_sum_double ...@@ -1849,6 +1849,8 @@ class Item_sum_udf_str :public Item_sum_double
C_MODE_START C_MODE_START
int group_concat_key_cmp_with_distinct(void* arg, const void* key1, int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
const void* key2); const void* key2);
int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1,
const void* key2);
int group_concat_key_cmp_with_order(void* arg, const void* key1, int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2); const void* key2);
int dump_leaf_key(void* key_arg, int dump_leaf_key(void* key_arg,
...@@ -1913,6 +1915,9 @@ class Item_func_group_concat : public Item_sum ...@@ -1913,6 +1915,9 @@ class Item_func_group_concat : public Item_sum
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1, friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
const void* key2); const void* key2);
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_order(void* arg, const void* key1, friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2); const void* key2);
friend int dump_leaf_key(void* key_arg, friend int dump_leaf_key(void* key_arg,
...@@ -2004,6 +2009,10 @@ class Item_func_group_concat : public Item_sum ...@@ -2004,6 +2009,10 @@ class Item_func_group_concat : public Item_sum
{ context= (Name_resolution_context *)cntx; return FALSE; } { context= (Name_resolution_context *)cntx; return FALSE; }
Item *get_copy(THD *thd) Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_group_concat>(thd, this); } { return get_item_copy<Item_func_group_concat>(thd, this); }
qsort_cmp2 get_comparator_function_for_distinct();
uchar* get_record_pointer();
uint get_null_bytes();
}; };
#endif /* ITEM_SUM_INCLUDED */ #endif /* ITEM_SUM_INCLUDED */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment