use tree for count(distinct) when possible

parent a12117f0
...@@ -57,7 +57,7 @@ typedef struct st_tree { ...@@ -57,7 +57,7 @@ typedef struct st_tree {
void (*free)(void *); void (*free)(void *);
} TREE; } TREE;
/* Functions on hole tree */ /* Functions on whole tree */
void init_tree(TREE *tree,uint default_alloc_size, int element_size, void init_tree(TREE *tree,uint default_alloc_size, int element_size,
qsort_cmp2 compare, my_bool with_delete, qsort_cmp2 compare, my_bool with_delete,
void (*free_element)(void*)); void (*free_element)(void*));
......
...@@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size, ...@@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size,
((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1)))) ((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1))))
{ {
tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */ tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */
/* Fix allocation size so that we don't loose any memory */ /* Fix allocation size so that we don't lose any memory */
default_alloc_size/=(sizeof(TREE_ELEMENT)+size); default_alloc_size/=(sizeof(TREE_ELEMENT)+size);
if (!default_alloc_size) if (!default_alloc_size)
default_alloc_size=1; default_alloc_size=1;
......
...@@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str) ...@@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str)
#include "sql_select.h" #include "sql_select.h"
static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2)
{
return memcmp(key1, key2, (int)arg);
}
static int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
{
return my_sortcmp(key1, key2, (int)arg);
}
// did not make this one static - at least gcc gets confused when
// I try to declare a static function as a friend. If you can figure
// out the syntax to make a static function a friend, make this one
// static
int composite_key_cmp(void* arg, byte* key1, byte* key2)
{
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
Field** field = item->table->field, **field_end;
field_end = field + item->table->fields;
for(; field < field_end; ++field)
{
int res;
int len = (*field)->field_length;
switch((*field)->type())
{
case FIELD_TYPE_STRING:
case FIELD_TYPE_VAR_STRING:
res = my_sortcmp(key1, key2, len);
break;
default:
res = memcmp(key1, key2, len);
break;
}
if(res)
return res;
key1 += len;
key2 += len;
}
return 0;
}
Item_sum_count_distinct::~Item_sum_count_distinct() Item_sum_count_distinct::~Item_sum_count_distinct()
{ {
if (table) if (table)
free_tmp_table(current_thd, table); free_tmp_table(current_thd, table);
delete tmp_table_param; delete tmp_table_param;
if(use_tree)
delete_tree(&tree);
} }
...@@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd) ...@@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd)
0, 0, current_lex->options | thd->options))) 0, 0, current_lex->options | thd->options)))
return 1; return 1;
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be
// MyISAM
{
qsort_cmp2 compare_key;
void* cmp_arg;
int key_len;
if(table->fields == 1) // if we have only one field, which is
// the most common use of count(distinct), it is much faster
// to use a simpler key compare method that can take advantage
// of not having to worry about other fields
{
switch(table->field[0]->type())
{
// if we have a string, we must take care of charsets
// and case sensitivity
case FIELD_TYPE_STRING:
case FIELD_TYPE_VAR_STRING:
compare_key = (qsort_cmp2)simple_str_key_cmp;
break;
default: // since at this point we cannot have blobs
// anything else can be compared with memcmp
compare_key = (qsort_cmp2)simple_raw_key_cmp;
break;
}
cmp_arg = (void*)(key_len = table->field[0]->field_length);
rec_offset = 1;
}
else // too bad, cannot cheat - there is more than one field
{
cmp_arg = (void*)this;
compare_key = (qsort_cmp2)composite_key_cmp;
Field** field, **field_end;
field_end = (field = table->field) + table->fields;
for(key_len = 0; field < field_end; ++field)
{
key_len += (*field)->field_length;
}
rec_offset = table->reclength - key_len;
}
init_tree(&tree, 0, key_len, compare_key, 0, 0);
tree.cmp_arg = cmp_arg;
use_tree = 1;
}
return 0; return 0;
} }
...@@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset() ...@@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset()
table->file->extra(HA_EXTRA_NO_CACHE); table->file->extra(HA_EXTRA_NO_CACHE);
table->file->delete_all_rows(); table->file->delete_all_rows();
table->file->extra(HA_EXTRA_WRITE_CACHE); table->file->extra(HA_EXTRA_WRITE_CACHE);
if(use_tree)
delete_tree(&tree);
(void) add(); (void) add();
} }
...@@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add() ...@@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add()
if ((*field)->is_real_null(0)) if ((*field)->is_real_null(0))
return 0; // Don't count NULL return 0; // Don't count NULL
if ((error=table->file->write_row(table->record[0]))) if(use_tree)
{
if(!tree_insert(&tree, table->record[0] + rec_offset, 0))
return 1;
}
else if ((error=table->file->write_row(table->record[0])))
{ {
if (error != HA_ERR_FOUND_DUPP_KEY && if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE) error != HA_ERR_FOUND_DUPP_UNIQUE)
...@@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int() ...@@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int()
{ {
if (!table) // Empty query if (!table) // Empty query
return LL(0); return LL(0);
if(use_tree)
return tree.elements_in_tree;
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
return table->file->records; return table->file->records;
} }
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#pragma interface /* gcc class implementation */ #pragma interface /* gcc class implementation */
#endif #endif
#include <my_tree.h>
class Item_sum :public Item_result_field class Item_sum :public Item_result_field
{ {
public: public:
...@@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int ...@@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int
table_map used_table_cache; table_map used_table_cache;
bool fix_fields(THD *thd,TABLE_LIST *tables); bool fix_fields(THD *thd,TABLE_LIST *tables);
TMP_TABLE_PARAM *tmp_table_param; TMP_TABLE_PARAM *tmp_table_param;
TREE tree;
bool use_tree; // If there are no blobs, we can use a tree, which
// is faster than heap table. In that case, we still use the table
// to help get things set up, but we insert nothing in it
int rec_offset; // the first few bytes of record ( at least one)
// are just markers for deleted and NULLs. We want to skip them since
// they will just bloat the tree without providing any valuable info
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
public: public:
Item_sum_count_distinct(List<Item> &list) Item_sum_count_distinct(List<Item> &list)
:Item_sum_int(list),table(0),used_table_cache(~(table_map) 0), :Item_sum_int(list),table(0),used_table_cache(~(table_map) 0),
tmp_table_param(0) tmp_table_param(0),use_tree(0)
{ quick_group=0; } { quick_group=0; }
~Item_sum_count_distinct(); ~Item_sum_count_distinct();
table_map used_tables() const { return used_table_cache; } table_map used_tables() const { return used_table_cache; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment