Commit 979cad22 authored by Sergei Golubchik's avatar Sergei Golubchik

MDEV-9531 GROUP_CONCAT with ORDER BY inside takes a lot of memory while it's executed

group concat tree is allocated in a memroot, so the only way to free
memory is to copy a part of the tree into a new memroot.

track the accumilated length of the result, and when it crosses
the threshold  - copy the result into a new tree, free the old one.
parent e91fd878
...@@ -1199,3 +1199,11 @@ Warning 1260 Row 3 was cut by GROUP_CONCAT() ...@@ -1199,3 +1199,11 @@ Warning 1260 Row 3 was cut by GROUP_CONCAT()
Warning 1260 Row 5 was cut by GROUP_CONCAT() Warning 1260 Row 5 was cut by GROUP_CONCAT()
DROP TABLE t1; DROP TABLE t1;
SET group_concat_max_len= DEFAULT; SET group_concat_max_len= DEFAULT;
set max_session_mem_used=16*1024*1024;
SELECT GROUP_CONCAT(concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1) ORDER BY 2,1,3,4,6,5,8,7) AS c
FROM seq_1_to_200000;
c
0.90910.90910.90910.90910.90910.90910.90910.9091,1.81821.81821.81821.81821.81821.81821.81821.8182,10.000010.000010.000010.000010.000010.000010.000010.0000,10.909110.909110.909110.909110.909110.909110.909110.9091,100.0000100.0000100.0000100.0000100.0000100.0000100.0000100.0000,100.9091100.9091100.9091100.9091100.9091100.9091100.9091100.9091,1000.00001000.00001000.00001000.00001000.00001000.00001000.00001000.0000,1000.90911000.90911000.90911000.90911000.90911000.90911000.90911000.9091,10000.000010000.000010000.000010000.000010000.000010000.000010000.000010000.0000,10000.909110000.909110000.909110000.909110000.909110000.909110000.909110000.9091,100000.0000100000.0000100000.0000100000.0000100000.0000100000.0000100000.0000100000.0000,100000.9091100000.9091100000.9091100000.9091100000.9091100000.9091100000.9091100000.9091,100001.8182100001.8182100001.8182100001.8182100001.8182100001.8182100001.8182100001.8182,100002.7273100002.7273100002.7273100002.7273100002.7273100002.7273100002.7273100002.7273,100003.6364100003.
Warnings:
Warning 1260 Row 15 was cut by GROUP_CONCAT()
set max_session_mem_used=default;
# #
# simple test of group_concat function # simple test of group_concat function
# #
source include/have_sequence.inc;
create table t1 (grp int, a bigint unsigned, c char(10) not null, d char(10) not null); create table t1 (grp int, a bigint unsigned, c char(10) not null, d char(10) not null);
insert into t1 values (1,1,"a","a"); insert into t1 values (1,1,"a","a");
...@@ -870,3 +871,11 @@ SELECT LENGTH(GROUP_CONCAT(f1 ORDER BY f2)) FROM t1 GROUP BY f2; ...@@ -870,3 +871,11 @@ SELECT LENGTH(GROUP_CONCAT(f1 ORDER BY f2)) FROM t1 GROUP BY f2;
DROP TABLE t1; DROP TABLE t1;
SET group_concat_max_len= DEFAULT; SET group_concat_max_len= DEFAULT;
#
# MDEV-9531 GROUP_CONCAT with ORDER BY inside takes a lot of memory while it's executed
#
set max_session_mem_used=16*1024*1024; # 8M..32M
SELECT GROUP_CONCAT(concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1), concat(seq/1.1) ORDER BY 2,1,3,4,6,5,8,7) AS c
FROM seq_1_to_200000;
set max_session_mem_used=default;
...@@ -3168,6 +3168,7 @@ Item_func_group_concat::Item_func_group_concat(THD *thd, ...@@ -3168,6 +3168,7 @@ Item_func_group_concat::Item_func_group_concat(THD *thd,
tmp_table_param(item->tmp_table_param), tmp_table_param(item->tmp_table_param),
separator(item->separator), separator(item->separator),
tree(item->tree), tree(item->tree),
tree_len(item->tree_len),
unique_filter(item->unique_filter), unique_filter(item->unique_filter),
table(item->table), table(item->table),
context(item->context), context(item->context),
...@@ -3292,7 +3293,10 @@ void Item_func_group_concat::clear() ...@@ -3292,7 +3293,10 @@ void Item_func_group_concat::clear()
warning_for_row= FALSE; warning_for_row= FALSE;
no_appended= TRUE; no_appended= TRUE;
if (tree) if (tree)
{
reset_tree(tree); reset_tree(tree);
tree_len= 0;
}
if (unique_filter) if (unique_filter)
unique_filter->reset(); unique_filter->reset();
if (table && table->blob_storage) if (table && table->blob_storage)
...@@ -3300,6 +3304,62 @@ void Item_func_group_concat::clear() ...@@ -3300,6 +3304,62 @@ void Item_func_group_concat::clear()
/* No need to reset the table as we never call write_row */ /* No need to reset the table as we never call write_row */
} }
struct st_repack_tree {
TREE tree;
TABLE *table;
size_t len, maxlen;
};
extern "C"
int copy_to_tree(void* key, element_count count __attribute__((unused)),
void* arg)
{
struct st_repack_tree *st= (struct st_repack_tree*)arg;
TABLE *table= st->table;
Field* field= table->field[0];
const uchar *ptr= field->ptr_in_record((uchar*)key - table->s->null_bytes);
size_t len= field->val_int(ptr);
DBUG_ASSERT(count == 1);
if (!tree_insert(&st->tree, key, 0, st->tree.custom_arg))
return 1;
st->len += len;
return st->len > st->maxlen;
}
bool Item_func_group_concat::repack_tree(THD *thd)
{
struct st_repack_tree st;
init_tree(&st.tree, MY_MIN(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0,
tree->size_of_element, group_concat_key_cmp_with_order, NULL,
(void*) this, MYF(MY_THREAD_SPECIFIC));
st.table= table;
st.len= 0;
st.maxlen= thd->variables.group_concat_max_len;
tree_walk(tree, &copy_to_tree, &st, left_root_right);
if (st.len <= st.maxlen) // Copying aborted. Must be OOM
{
delete_tree(&st.tree);
return 1;
}
delete_tree(&tree_base);
tree_base= st.tree;
tree_len= st.len;
return 0;
}
/*
Repacking the tree is expensive. But it keeps the tree small, and
inserting into an unnecessary large tree is also waste of time.
The following number is best-by-test. Test execution time slowly
decreases up to N=10 (that is, factor=1024) and then starts to increase,
again, very slowly.
*/
#define GCONCAT_REPACK_FACTOR (1 << 10)
bool Item_func_group_concat::add() bool Item_func_group_concat::add()
{ {
...@@ -3309,6 +3369,9 @@ bool Item_func_group_concat::add() ...@@ -3309,6 +3369,9 @@ bool Item_func_group_concat::add()
if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) if (copy_funcs(tmp_table_param->items_to_copy, table->in_use))
return TRUE; return TRUE;
size_t row_str_len= 0;
StringBuffer<MAX_FIELD_WIDTH> buf;
String *res;
for (uint i= 0; i < arg_count_field; i++) for (uint i= 0; i < arg_count_field; i++)
{ {
Item *show_item= args[i]; Item *show_item= args[i];
...@@ -3316,8 +3379,13 @@ bool Item_func_group_concat::add() ...@@ -3316,8 +3379,13 @@ bool Item_func_group_concat::add()
continue; continue;
Field *field= show_item->get_tmp_table_field(); Field *field= show_item->get_tmp_table_field();
if (field && field->is_null_in_record((const uchar*) table->record[0])) if (field)
return 0; // Skip row if it contains null {
if (field->is_null_in_record((const uchar*) table->record[0]))
return 0; // Skip row if it contains null
if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length();
}
} }
null_value= FALSE; null_value= FALSE;
...@@ -3335,11 +3403,18 @@ bool Item_func_group_concat::add() ...@@ -3335,11 +3403,18 @@ bool Item_func_group_concat::add()
TREE_ELEMENT *el= 0; // Only for safety TREE_ELEMENT *el= 0; // Only for safety
if (row_eligible && tree) if (row_eligible && tree)
{ {
THD *thd= table->in_use;
table->field[0]->store(row_str_len);
if (tree_len > thd->variables.group_concat_max_len * GCONCAT_REPACK_FACTOR
&& tree->elements_in_tree > 1)
if (repack_tree(thd))
return 1;
el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0, el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0,
tree->custom_arg); tree->custom_arg);
/* check if there was enough memory to insert the row */ /* check if there was enough memory to insert the row */
if (!el) if (!el)
return 1; return 1;
tree_len+= row_str_len;
} }
/* /*
If the row is not a duplicate (el->count == 1) If the row is not a duplicate (el->count == 1)
...@@ -3471,10 +3546,19 @@ bool Item_func_group_concat::setup(THD *thd) ...@@ -3471,10 +3546,19 @@ bool Item_func_group_concat::setup(THD *thd)
if (setup_order(thd, ref_pointer_array, context->table_list, list, if (setup_order(thd, ref_pointer_array, context->table_list, list,
all_fields, *order)) all_fields, *order))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
/*
Prepend the field to store the length of the string representation
of this row. Used to detect when the tree goes over group_concat_max_len
*/
Item *item= new (thd->mem_root)
Item_int(thd, thd->variables.group_concat_max_len);
if (!item || all_fields.push_front(item, thd->mem_root))
DBUG_RETURN(TRUE);
} }
count_field_types(select_lex, tmp_table_param, all_fields, 0); count_field_types(select_lex, tmp_table_param, all_fields, 0);
tmp_table_param->force_copy_fields= force_copy_fields; tmp_table_param->force_copy_fields= force_copy_fields;
tmp_table_param->hidden_field_count= (arg_count_order > 0);
DBUG_ASSERT(table == 0); DBUG_ASSERT(table == 0);
if (order_or_distinct) if (order_or_distinct)
{ {
...@@ -3533,11 +3617,12 @@ bool Item_func_group_concat::setup(THD *thd) ...@@ -3533,11 +3617,12 @@ bool Item_func_group_concat::setup(THD *thd)
syntax of this function). If there is no ORDER BY clause, we don't syntax of this function). If there is no ORDER BY clause, we don't
create this tree. create this tree.
*/ */
init_tree(tree, (uint) MY_MIN(thd->variables.max_heap_table_size, init_tree(tree, MY_MIN(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0, thd->variables.sortbuff_size/16), 0,
tree_key_length, tree_key_length,
group_concat_key_cmp_with_order, NULL, (void*) this, group_concat_key_cmp_with_order, NULL, (void*) this,
MYF(MY_THREAD_SPECIFIC)); MYF(MY_THREAD_SPECIFIC));
tree_len= 0;
} }
if (distinct) if (distinct)
......
...@@ -1431,6 +1431,7 @@ class Item_func_group_concat : public Item_sum ...@@ -1431,6 +1431,7 @@ class Item_func_group_concat : public Item_sum
String *separator; String *separator;
TREE tree_base; TREE tree_base;
TREE *tree; TREE *tree;
size_t tree_len;
Item **ref_pointer_array; Item **ref_pointer_array;
/** /**
...@@ -1468,6 +1469,8 @@ class Item_func_group_concat : public Item_sum ...@@ -1468,6 +1469,8 @@ class Item_func_group_concat : public Item_sum
element_count count __attribute__((unused)), element_count count __attribute__((unused)),
void* item_arg); void* item_arg);
bool repack_tree(THD *thd);
public: public:
Item_func_group_concat(THD *thd, Name_resolution_context *context_arg, Item_func_group_concat(THD *thd, Name_resolution_context *context_arg,
bool is_distinct, List<Item> *is_select, bool is_distinct, List<Item> *is_select,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment