Commit d0fc07c8 authored by Markus Mäkelä's avatar Markus Mäkelä

MDEV-16620: Add JSON_ARRAYAGG function

The JSON_ARRAYAGG function extends the GROUP_CONCAT function and provides
a method of aggregating JSON results. The current implementation supports
DISTINCT and LIMIT but not ORDER BY (Oracle supports GROUP BY).

Adding GROUP BY support is possible but it requires some extra work as the
grouping appears to be done inside a temporary table that complicates
matters.

Added test cases that covert aggregation of all JSON types and JSON
validation for the generated results.
parent 4d6a9094
......@@ -1028,5 +1028,155 @@ SELECT * FROM t1 WHERE CASE WHEN JSON_OBJECT('x', (SELECT MAX(a=2) FROM t1))='{"
a
DROP TABLE t1;
#
# MDEV-16620 JSON_ARRAYAGG
#
#
# Integer aggregation
#
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (1, 1),(2, 1), (1, 1),(2, 1), (3, 2),(2, 2),(2, 2),(2, 2);
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
JSON_VALID(JSON_ARRAYAGG(a))
1
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b)
[1,2,1,2,3,2,2,2] [1,1,1,1,2,2,2,2]
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY b;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b)
[1,2,1,2] [1,1,1,1]
[3,2,2,2] [2,2,2,2]
DROP TABLE t1;
#
# Real aggregation
#
CREATE TABLE t1 (a FLOAT, b DOUBLE, c DECIMAL(10, 2));
INSERT INTO t1 VALUES (1.0, 2.0, 3.0),(1.0, 3.0, 9.0),(1.0, 4.0, 16.0),(1.0, 5.0, 25.0);
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
JSON_VALID(JSON_ARRAYAGG(a))
1
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b), JSON_ARRAYAGG(c) FROM t1;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b) JSON_ARRAYAGG(c)
[1,1,1,1] [2,3,4,5] [3.00,9.00,16.00,25.00]
DROP TABLE t1;
#
# Boolean aggregation
#
CREATE TABLE t1 (a BOOLEAN, b BOOLEAN);
INSERT INTO t1 VALUES (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), (FALSE, FALSE);
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
JSON_VALID(JSON_ARRAYAGG(a))
1
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b)
[1,1,0,0] [1,0,1,0]
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY b;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b)
[1,0] [0,0]
[1,0] [1,1]
SELECT JSON_ARRAYAGG(TRUE), JSON_ARRAYAGG(FALSE) FROM t1;
JSON_ARRAYAGG(TRUE) JSON_ARRAYAGG(FALSE)
[true,true,true,true] [false,false,false,false]
DROP TABLE t1;
#
# Aggregation of strings with quoted
#
CREATE TABLE t1 (a VARCHAR(80));
INSERT INTO t1 VALUES
('"double_quoted_value"'), ("'single_quoted_value'"),
('"double_quoted_value"'), ("'single_quoted_value'");
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
JSON_VALID(JSON_ARRAYAGG(a))
1
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
["\"double_quoted_value\"","'single_quoted_value'","\"double_quoted_value\"","'single_quoted_value'"]
DROP TABLE t1;
#
# Strings and NULLs
#
CREATE TABLE t1 (a INT, b VARCHAR(80));
INSERT INTO t1 VALUES
(1, "Hello"),(1, "World"), (2, "This"),(2, "Will"), (2, "Work"),(2, "!"), (3, NULL),
(1, "Hello"),(1, "World"), (2, "This"),(2, "Will"), (2, "Work"),(2, "!"), (3, NULL);
SELECT JSON_VALID(JSON_ARRAYAGG(b)) FROM t1;
JSON_VALID(JSON_ARRAYAGG(b))
1
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b)
[1,1,2,2,2,2,3,1,1,2,2,2,2,3] ["Hello","World","This","Will","Work","!",null,"Hello","World","This","Will","Work","!",null]
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
JSON_ARRAYAGG(a) JSON_ARRAYAGG(b)
[1,1,1,1] ["Hello","World","Hello","World"]
[2,2,2,2,2,2,2,2] ["!","Work","Will","This","Will","This","!","Work"]
[3,3] [null,null]
#
# DISTINCT and LIMIT
#
SELECT JSON_ARRAYAGG(b LIMIT 1) FROM t1;
JSON_ARRAYAGG(b LIMIT 1)
["Hello"]
SELECT JSON_ARRAYAGG(b LIMIT 2) FROM t1;
JSON_ARRAYAGG(b LIMIT 2)
["Hello","World"]
SELECT JSON_ARRAYAGG(b LIMIT 1) FROM t1 GROUP BY b;
JSON_ARRAYAGG(b LIMIT 1)
[null]
["!"]
["Hello"]
["This"]
["Will"]
["Work"]
["World"]
SELECT JSON_ARRAYAGG(b LIMIT 2) FROM t1 GROUP BY a;
JSON_ARRAYAGG(b LIMIT 2)
["Hello","World"]
["!","Work"]
[null,null]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[1,2,3]
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
JSON_ARRAYAGG(DISTINCT b)
["Hello","World","This","Will","Work","!",null]
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
JSON_ARRAYAGG(DISTINCT a LIMIT 2)
[1,2]
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
JSON_ARRAYAGG(DISTINCT b LIMIT 2)
["Hello","World"]
#
# JSON aggregation
#
SELECT JSON_VALID(JSON_ARRAYAGG(JSON_ARRAY(a, b))) FROM t1;
JSON_VALID(JSON_ARRAYAGG(JSON_ARRAY(a, b)))
1
SELECT JSON_ARRAYAGG(JSON_ARRAY(a, b)) FROM t1;
JSON_ARRAYAGG(JSON_ARRAY(a, b))
[[1, "Hello"],[1, "World"],[2, "This"],[2, "Will"],[2, "Work"],[2, "!"],[3, null],[1, "Hello"],[1, "World"],[2, "This"],[2, "Will"],[2, "Work"],[2, "!"],[3, null]]
SELECT JSON_ARRAYAGG(JSON_ARRAY(a, b)) FROM t1 GROUP BY a;
JSON_ARRAYAGG(JSON_ARRAY(a, b))
[[1, "Hello"],[1, "World"],[1, "Hello"],[1, "World"]]
[[2, "!"],[2, "Work"],[2, "Will"],[2, "This"],[2, "Will"],[2, "This"],[2, "!"],[2, "Work"]]
[[3, null],[3, null]]
SELECT JSON_VALID(JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b))) FROM t1;
JSON_VALID(JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b)))
1
SELECT JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b)) FROM t1;
JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b))
[{"a": 1, "b": "Hello"},{"a": 1, "b": "World"},{"a": 2, "b": "This"},{"a": 2, "b": "Will"},{"a": 2, "b": "Work"},{"a": 2, "b": "!"},{"a": 3, "b": null},{"a": 1, "b": "Hello"},{"a": 1, "b": "World"},{"a": 2, "b": "This"},{"a": 2, "b": "Will"},{"a": 2, "b": "Work"},{"a": 2, "b": "!"},{"a": 3, "b": null}]
SELECT JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b)) FROM t1 GROUP BY a;
JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b))
[{"a": 1, "b": "Hello"},{"a": 1, "b": "World"},{"a": 1, "b": "Hello"},{"a": 1, "b": "World"}]
[{"a": 2, "b": "!"},{"a": 2, "b": "Work"},{"a": 2, "b": "Will"},{"a": 2, "b": "This"},{"a": 2, "b": "Will"},{"a": 2, "b": "This"},{"a": 2, "b": "!"},{"a": 2, "b": "Work"}]
[{"a": 3, "b": null},{"a": 3, "b": null}]
#
# Error checks
#
SELECT JSON_ARRAYAGG(a, b) FROM t1;
ERROR 42000: Incorrect parameter count in the call to native function 'JSON_ARRAYAGG'
SELECT JSON_ARRAYAGG(JSON_ARRAYAGG(a, b)) FROM t1;
ERROR HY000: Invalid use of group function
DROP TABLE t1;
#
# End of 10.4 tests
#
......@@ -606,6 +606,96 @@ SELECT * FROM t1 WHERE CASE WHEN JSON_OBJECT('x', (SELECT MAX(a=2) FROM t1))='{"
SELECT * FROM t1 WHERE CASE WHEN JSON_OBJECT('x', (SELECT MAX(a=2) FROM t1))='{"x": false}' THEN a END;
DROP TABLE t1;
-- echo #
-- echo # MDEV-16620 JSON_ARRAYAGG
-- echo #
-- echo #
-- echo # Integer aggregation
-- echo #
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (1, 1),(2, 1), (1, 1),(2, 1), (3, 2),(2, 2),(2, 2),(2, 2);
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY b;
DROP TABLE t1;
-- echo #
-- echo # Real aggregation
-- echo #
CREATE TABLE t1 (a FLOAT, b DOUBLE, c DECIMAL(10, 2));
INSERT INTO t1 VALUES (1.0, 2.0, 3.0),(1.0, 3.0, 9.0),(1.0, 4.0, 16.0),(1.0, 5.0, 25.0);
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b), JSON_ARRAYAGG(c) FROM t1;
DROP TABLE t1;
-- echo #
-- echo # Boolean aggregation
-- echo #
CREATE TABLE t1 (a BOOLEAN, b BOOLEAN);
INSERT INTO t1 VALUES (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), (FALSE, FALSE);
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY b;
SELECT JSON_ARRAYAGG(TRUE), JSON_ARRAYAGG(FALSE) FROM t1;
DROP TABLE t1;
-- echo #
-- echo # Aggregation of strings with quoted
-- echo #
CREATE TABLE t1 (a VARCHAR(80));
INSERT INTO t1 VALUES
('"double_quoted_value"'), ("'single_quoted_value'"),
('"double_quoted_value"'), ("'single_quoted_value'");
SELECT JSON_VALID(JSON_ARRAYAGG(a)) FROM t1;
SELECT JSON_ARRAYAGG(a) FROM t1;
DROP TABLE t1;
-- echo #
-- echo # Strings and NULLs
-- echo #
CREATE TABLE t1 (a INT, b VARCHAR(80));
INSERT INTO t1 VALUES
(1, "Hello"),(1, "World"), (2, "This"),(2, "Will"), (2, "Work"),(2, "!"), (3, NULL),
(1, "Hello"),(1, "World"), (2, "This"),(2, "Will"), (2, "Work"),(2, "!"), (3, NULL);
SELECT JSON_VALID(JSON_ARRAYAGG(b)) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1;
SELECT JSON_ARRAYAGG(a), JSON_ARRAYAGG(b) FROM t1 GROUP BY a;
-- echo #
-- echo # DISTINCT and LIMIT
-- echo #
SELECT JSON_ARRAYAGG(b LIMIT 1) FROM t1;
SELECT JSON_ARRAYAGG(b LIMIT 2) FROM t1;
SELECT JSON_ARRAYAGG(b LIMIT 1) FROM t1 GROUP BY b;
SELECT JSON_ARRAYAGG(b LIMIT 2) FROM t1 GROUP BY a;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
-- echo #
-- echo # JSON aggregation
-- echo #
SELECT JSON_VALID(JSON_ARRAYAGG(JSON_ARRAY(a, b))) FROM t1;
SELECT JSON_ARRAYAGG(JSON_ARRAY(a, b)) FROM t1;
SELECT JSON_ARRAYAGG(JSON_ARRAY(a, b)) FROM t1 GROUP BY a;
SELECT JSON_VALID(JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b))) FROM t1;
SELECT JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b)) FROM t1;
SELECT JSON_ARRAYAGG(JSON_OBJECT('a', a, 'b', b)) FROM t1 GROUP BY a;
-- echo #
-- echo # Error checks
-- echo #
--error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT
SELECT JSON_ARRAYAGG(a, b) FROM t1;
--error ER_INVALID_GROUP_FUNC_USE
SELECT JSON_ARRAYAGG(JSON_ARRAYAGG(a, b)) FROM t1;
DROP TABLE t1;
--echo #
......
......@@ -3621,3 +3621,25 @@ int Arg_comparator::compare_e_json_str_basic(Item *j, Item *s)
return MY_TEST(sortcmp(res1, res2, compare_collation()) == 0);
}
String* Item_func_json_arrayagg::convert_to_json(Item *item, String *res)
{
String tmp;
res->length(0);
append_json_value(res, item, &tmp);
return res;
}
String* Item_func_json_arrayagg::val_str(String *str)
{
str= Item_func_group_concat::val_str(str);
String s;
s.append('[');
s.swap(*str);
str->append(s);
str->append(']');
return str;
}
......@@ -23,6 +23,7 @@
#include <json_lib.h>
#include "item_cmpfunc.h" // Item_bool_func
#include "item_strfunc.h" // Item_str_func
#include "item_sum.h"
class json_path_with_flags
......@@ -523,4 +524,31 @@ class Item_func_json_format: public Item_json_func
};
class Item_func_json_arrayagg : public Item_func_group_concat
{
public:
Item_func_json_arrayagg(THD *thd, Name_resolution_context *context_arg,
bool is_distinct, List<Item> *is_select,
const SQL_I_List<ORDER> &is_order, String *is_separator,
bool limit_clause, Item *row_limit, Item *offset_limit):
Item_func_group_concat(thd, context_arg, is_distinct, is_select, is_order,
is_separator, limit_clause, row_limit, offset_limit)
{
}
const char *func_name() const { return "json_arrayagg("; }
enum Sumfunctype sum_func() const {return JSON_ARRAYAGG_FUNC;}
String* convert_to_json(Item *item, String *str);
String* val_str(String *str);
/* Overrides Item_func_group_concat::add() */
bool add()
{
return Item_func_group_concat::add(false);
}
};
#endif /* ITEM_JSONFUNC_INCLUDED */
......@@ -3535,6 +3535,14 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
if (item->limit_clause && !(*row_limit))
return 1;
if (item->sum_func() == Item_sum::JSON_ARRAYAGG_FUNC &&
item->arg_count_field > 1)
{
/* JSON_ARRAYAGG supports only one parameter */
my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), "JSON_ARRAYAGG");
return 1;
}
if (item->no_appended)
item->no_appended= FALSE;
else
......@@ -3576,7 +3584,19 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
res= (*arg)->val_str(&tmp);
}
if (res)
{
if (item->sum_func() == Item_sum::JSON_ARRAYAGG_FUNC)
{
/*
JSON_ARRAYAGG needs to convert the type into valid JSON before
appending it to the result
*/
Item_func_json_arrayagg *arrayagg= (Item_func_json_arrayagg *) item_arg;
res= arrayagg->convert_to_json(*arg, res);
}
result->append(*res);
}
}
if (item->limit_clause)
......@@ -3882,9 +3902,9 @@ bool Item_func_group_concat::repack_tree(THD *thd)
*/
#define GCONCAT_REPACK_FACTOR (1 << 10)
bool Item_func_group_concat::add()
bool Item_func_group_concat::add(bool exclude_nulls)
{
if (always_null)
if (always_null && exclude_nulls)
return 0;
copy_fields(tmp_table_param);
if (copy_funcs(tmp_table_param->items_to_copy, table->in_use))
......@@ -3902,7 +3922,8 @@ bool Item_func_group_concat::add()
Field *field= show_item->get_tmp_table_field();
if (field)
{
if (field->is_null_in_record((const uchar*) table->record[0]))
if (field->is_null_in_record((const uchar*) table->record[0]) &&
exclude_nulls)
return 0; // Skip row if it contains null
if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length();
......
......@@ -355,7 +355,7 @@ class Item_sum :public Item_func_or_sum
ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC,
CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC,
NTH_VALUE_FUNC, LEAD_FUNC, LAG_FUNC, PERCENTILE_CONT_FUNC,
PERCENTILE_DISC_FUNC, SP_AGGREGATE_FUNC
PERCENTILE_DISC_FUNC, SP_AGGREGATE_FUNC, JSON_ARRAYAGG_FUNC
};
Item **ref_by; /* pointer to a ref to the object used to register it */
......@@ -428,6 +428,7 @@ class Item_sum :public Item_func_or_sum
case SUM_BIT_FUNC:
case UDF_SUM_FUNC:
case GROUP_CONCAT_FUNC:
case JSON_ARRAYAGG_FUNC:
return true;
default:
return false;
......@@ -1794,6 +1795,7 @@ C_MODE_END
class Item_func_group_concat : public Item_sum
{
protected:
TMP_TABLE_PARAM *tmp_table_param;
String result;
String *separator;
......@@ -1839,6 +1841,12 @@ class Item_func_group_concat : public Item_sum
*/
Item_func_group_concat *original;
/*
Used by Item_func_group_concat and Item_func_json_arrayagg. The latter
needs null values but the former doesn't.
*/
bool add(bool exclude_nulls);
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
......@@ -1876,7 +1884,10 @@ class Item_func_group_concat : public Item_sum
return &type_handler_varchar;
}
void clear();
bool add();
bool add()
{
return add(true);
}
void reset_field() { DBUG_ASSERT(0); } // not used
void update_field() { DBUG_ASSERT(0); } // not used
bool fix_fields(THD *,Item **);
......
......@@ -741,6 +741,7 @@ static SYMBOL sql_functions[] = {
{ "EXTRACT", SYM(EXTRACT_SYM)},
{ "FIRST_VALUE", SYM(FIRST_VALUE_SYM)},
{ "GROUP_CONCAT", SYM(GROUP_CONCAT_SYM)},
{ "JSON_ARRAYAGG", SYM(JSON_ARRAYAGG_SYM)},
{ "LAG", SYM(LAG_SYM)},
{ "LEAD", SYM(LEAD_SYM)},
{ "MAX", SYM(MAX_SYM)},
......
......@@ -957,6 +957,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
%token GRANT /* SQL-2003-R */
%token GROUP_SYM /* SQL-2003-R */
%token GROUP_CONCAT_SYM
%token JSON_ARRAYAGG_SYM
%token LAG_SYM /* SQL-2011 */
%token LEAD_SYM /* SQL-2011 */
%token HAVING /* SQL-2003-R */
......@@ -11326,6 +11327,31 @@ sum_expr:
$5->empty();
sel->gorder_list.empty();
}
| JSON_ARRAYAGG_SYM '(' opt_distinct
{ Select->in_sum_expr++; }
expr_list opt_glimit_clause
')'
{
SELECT_LEX *sel= Select;
sel->in_sum_expr--;
String* s= new (thd->mem_root) String(",", 1, &my_charset_latin1);
if (unlikely(s == NULL))
MYSQL_YYABORT;
$$= new (thd->mem_root)
Item_func_json_arrayagg(thd, Lex->current_context(),
$3, $5,
sel->gorder_list, s, $6,
sel->select_limit,
sel->offset_limit);
if (unlikely($$ == NULL))
MYSQL_YYABORT;
sel->select_limit= NULL;
sel->offset_limit= NULL;
sel->explicit_limit= 0;
$5->empty();
sel->gorder_list.empty();
}
;
window_func_expr:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment