handle tree overflow in count(distinct)

test heap table/tree overflow in count(distinct)
parent ac379c40
...@@ -74,3 +74,11 @@ count(distinct n2) n1 ...@@ -74,3 +74,11 @@ count(distinct n2) n1
1 NULL 1 NULL
1 1 1 1
3 2 3 2
count(distinct n)
5000
Variable_name Value
Created_tmp_disk_tables 1
count(distinct s)
5000
Variable_name Value
Created_tmp_disk_tables 1
-O max_heap_table_size=16384
...@@ -43,3 +43,32 @@ select count(distinct n1), count(distinct n2) from t1; ...@@ -43,3 +43,32 @@ select count(distinct n1), count(distinct n2) from t1;
select count(distinct n2), n1 from t1 group by n1; select count(distinct n2), n1 from t1 group by n1;
drop table t1; drop table t1;
# test the converstion from tree to MyISAM
create table t1 (n int);
let $1=5000;
while ($1)
{
eval insert into t1 values($1);
dec $1;
}
flush status;
select count(distinct n) from t1;
show status like 'Created_tmp_disk_tables';
drop table t1;
#test conversion from heap to MyISAM
create table t1 (s text);
let $1=5000;
while ($1)
{
eval insert into t1 values('$1');
dec $1;
}
flush status;
select count(distinct s) from t1;
show status like 'Created_tmp_disk_tables';
drop table t1;
...@@ -830,7 +830,26 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2) ...@@ -830,7 +830,26 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
return 0; return 0;
} }
// helper function for walking the tree when we dump it to MyISAM -
// tree_walk will call it for each
// leaf
int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item)
{
char* buf = item->table->record[0];
int error;
memset(buf, 0xff, item->rec_offset); // make up for cheating in the tree
memcpy(buf + item->rec_offset, key, item->tree.size_of_element);
if ((error = item->table->file->write_row(buf)))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
return 1;
}
return 0;
}
Item_sum_count_distinct::~Item_sum_count_distinct() Item_sum_count_distinct::~Item_sum_count_distinct()
{ {
...@@ -916,11 +935,29 @@ bool Item_sum_count_distinct::setup(THD *thd) ...@@ -916,11 +935,29 @@ bool Item_sum_count_distinct::setup(THD *thd)
key_len, compare_key, 0, 0); key_len, compare_key, 0, 0);
tree.cmp_arg = cmp_arg; tree.cmp_arg = cmp_arg;
use_tree = 1; use_tree = 1;
// the only time key_len could be 0 is if someone does
// count(distinct) on a char(0) field - stupid thing to do,
// but this has to be handled - otherwise someone can crash
// the server with a DoS attack
max_elements_in_tree = (key_len) ? max_heap_table_size/key_len :
max_heap_table_size;
} }
return 0; return 0;
} }
int Item_sum_count_distinct::tree_to_myisam()
{
if(create_myisam_from_heap(table, tmp_table_param,
HA_ERR_RECORD_FILE_FULL, 1) ||
tree_walk(&tree, (tree_walk_action)&dump_leaf, (void*)this,
left_root_right))
return 1;
delete_tree(&tree);
use_tree = 0;
return 0;
}
void Item_sum_count_distinct::reset() void Item_sum_count_distinct::reset()
{ {
...@@ -947,7 +984,11 @@ bool Item_sum_count_distinct::add() ...@@ -947,7 +984,11 @@ bool Item_sum_count_distinct::add()
if(use_tree) if(use_tree)
{ {
if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) // if the tree got too big, convert to MyISAM, otherwise
// insert into the tree
if((tree.elements_in_tree > max_elements_in_tree && tree_to_myisam())
||
!tree_insert(&tree, table->record[0] + rec_offset, 0))
return 1; return 1;
} }
else if ((error=table->file->write_row(table->record[0]))) else if ((error=table->file->write_row(table->record[0])))
......
...@@ -148,14 +148,25 @@ class Item_sum_count_distinct :public Item_sum_int ...@@ -148,14 +148,25 @@ class Item_sum_count_distinct :public Item_sum_int
bool fix_fields(THD *thd,TABLE_LIST *tables); bool fix_fields(THD *thd,TABLE_LIST *tables);
TMP_TABLE_PARAM *tmp_table_param; TMP_TABLE_PARAM *tmp_table_param;
TREE tree; TREE tree;
bool use_tree; // If there are no blobs, we can use a tree, which uint max_elements_in_tree;
// calculated based on max_heap_table_size. If reached,
// walk the tree and dump it into MyISAM table
bool use_tree;
// If there are no blobs, we can use a tree, which
// is faster than heap table. In that case, we still use the table // is faster than heap table. In that case, we still use the table
// to help get things set up, but we insert nothing in it // to help get things set up, but we insert nothing in it
int rec_offset; // the first few bytes of record ( at least one)
int rec_offset;
// the first few bytes of record ( at least one)
// are just markers for deleted and NULLs. We want to skip them since // are just markers for deleted and NULLs. We want to skip them since
// they will just bloat the tree without providing any valuable info // they will just bloat the tree without providing any valuable info
int tree_to_myisam();
friend int composite_key_cmp(void* arg, byte* key1, byte* key2); friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item);
public: public:
Item_sum_count_distinct(List<Item> &list) Item_sum_count_distinct(List<Item> &list)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment