add VARIANCE function

parent 710881e4
......@@ -90,6 +90,7 @@ tonu@x153.internalnet
tonu@x3.internalnet
venu@myvenu.com
venu@work.mysql.com
vva@eagle.mysql.r18.ru
vva@genie.(none)
walrus@mysql.com
wax@mysql.com
......
......@@ -42,21 +42,21 @@ insert into t1 values (null,null,'');
select count(distinct a),count(distinct grp) from t1;
count(distinct a) count(distinct grp)
6 3
select sum(a),count(a),avg(a),std(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1;
sum(a) count(a) avg(a) std(a) bit_or(a) bit_and(a) min(a) max(a) min(c) max(c)
21 6 3.5000 1.7078 7 0 1 6 E
select grp, sum(a),count(a),avg(a),std(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1 group by grp;
grp sum(a) count(a) avg(a) std(a) bit_or(a) bit_and(a) min(a) max(a) min(c) max(c)
NULL 0 0 NULL NULL 0 0 NULL NULL
1 1 1 1.0000 0.0000 1 1 1 1 a a
2 5 2 2.5000 0.5000 3 2 2 3 b c
3 15 3 5.0000 0.8165 7 4 4 6 C E
select grp, sum(a)+count(a)+avg(a)+std(a)+bit_or(a)+bit_and(a)+min(a)+max(a)+min(c)+max(c) as sum from t1 group by grp;
select sum(a),count(a),avg(a),std(a),variance(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1;
sum(a) count(a) avg(a) std(a) variance(a) bit_or(a) bit_and(a) min(a) max(a) min(c) max(c)
21 6 3.5000 1.7078 2.9167 7 0 1 6 E
select grp, sum(a),count(a),avg(a),std(a),variance(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1 group by grp;
grp sum(a) count(a) avg(a) std(a) variance(a) bit_or(a) bit_and(a) min(a) max(a) min(c) max(c)
NULL 0 0 NULL NULL NULL 0 0 NULL NULL
1 1 1 1.0000 0.0000 0.0000 1 1 1 1 a a
2 5 2 2.5000 0.5000 0.2500 3 2 2 3 b c
3 15 3 5.0000 0.8165 0.6667 7 4 4 6 C E
select grp, sum(a)+count(a)+avg(a)+std(a)+variance(a)+bit_or(a)+bit_and(a)+min(a)+max(a)+min(c)+max(c) as sum from t1 group by grp;
grp sum
NULL NULL
1 7
2 20
3 44.816496580928
2 20.25
3 45.483163247594
create table t2 (grp int, a bigint unsigned, c char(10));
insert into t2 select grp,max(a)+max(grp),max(c) from t1 group by grp;
replace into t2 select grp, a, c from t1 limit 2,1;
......@@ -72,14 +72,14 @@ CREATE TABLE t1 (id int(11),value1 float(10,2));
INSERT INTO t1 VALUES (1,0.00),(1,1.00), (1,2.00), (2,10.00), (2,11.00), (2,12.00);
CREATE TABLE t2 (id int(11),name char(20));
INSERT INTO t2 VALUES (1,'Set One'),(2,'Set Two');
select id, avg(value1), std(value1) from t1 group by id;
id avg(value1) std(value1)
1 1.000000 0.816497
2 11.000000 0.816497
select name, avg(value1), std(value1) from t1, t2 where t1.id = t2.id group by t1.id;
name avg(value1) std(value1)
Set One 1.000000 0.816497
Set Two 11.000000 0.816497
select id, avg(value1), std(value1), variance(value1) from t1 group by id;
id avg(value1) std(value1) variance(value1)
1 1.000000 0.816497 0.666667
2 11.000000 0.816497 0.666667
select name, avg(value1), std(value1), variance(value1) from t1, t2 where t1.id = t2.id group by t1.id;
name avg(value1) std(value1) variance(value1)
Set One 1.000000 0.816497 0.666667
Set Two 11.000000 0.816497 0.666667
drop table t1,t2;
create table t1 (id int not null);
create table t2 (id int not null,rating int null);
......
......@@ -62,4 +62,8 @@ select Fld1, max(Fld2) from t1 group by Fld1 having std(Fld2) is not null;
Fld1 max(Fld2)
1 20
3 50
select Fld1, max(Fld2) from t1 group by Fld1 having variance(Fld2) is not null;
Fld1 max(Fld2)
1 20
3 50
drop table t1;
......@@ -2656,14 +2656,14 @@ companynr count(*)
58 23
53 4
50 11
select count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1) from t2 where companynr = 34 and fld4<>"";
count(*) min(fld4) max(fld4) sum(fld1) avg(fld1) std(fld1)
70 absentee vest 17788966 254128.0857 3272.5940
select companynr,count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1) from t2 group by companynr limit 3;
companynr count(*) min(fld4) max(fld4) sum(fld1) avg(fld1) std(fld1)
00 82 Anthony windmills 10355753 126289.6707 115550.9757
29 95 abut wetness 14473298 152350.5053 8368.5480
34 70 absentee vest 17788966 254128.0857 3272.5940
select count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1),variance(fld1) from t2 where companynr = 34 and fld4<>"";
count(*) min(fld4) max(fld4) sum(fld1) avg(fld1) std(fld1) variance(fld1)
70 absentee vest 17788966 254128.0857 3272.5940 10709871.3069
select companynr,count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1),variance(fld1) from t2 group by companynr limit 3;
companynr count(*) min(fld4) max(fld4) sum(fld1) avg(fld1) std(fld1) variance(fld1)
00 82 Anthony windmills 10355753 126289.6707 115550.9757 13352027981.7087
29 95 abut wetness 14473298 152350.5053 8368.5480 70032594.9026
34 70 absentee vest 17788966 254128.0857 3272.5940 10709871.3069
select companynr,t2nr,count(price),sum(price),min(price),max(price),avg(price) from t3 where companynr = 37 group by companynr,t2nr limit 10;
companynr t2nr count(price) sum(price) min(price) max(price) avg(price)
37 1 1 5987435 5987435 5987435 5987435.0000
......
......@@ -21,9 +21,9 @@ select count(distinct a),count(distinct grp) from t1;
insert into t1 values (null,null,'');
select count(distinct a),count(distinct grp) from t1;
select sum(a),count(a),avg(a),std(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1;
select grp, sum(a),count(a),avg(a),std(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1 group by grp;
select grp, sum(a)+count(a)+avg(a)+std(a)+bit_or(a)+bit_and(a)+min(a)+max(a)+min(c)+max(c) as sum from t1 group by grp;
select sum(a),count(a),avg(a),std(a),variance(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1;
select grp, sum(a),count(a),avg(a),std(a),variance(a),bit_or(a),bit_and(a),min(a),max(a),min(c),max(c) from t1 group by grp;
select grp, sum(a)+count(a)+avg(a)+std(a)+variance(a)+bit_or(a)+bit_and(a)+min(a)+max(a)+min(c)+max(c) as sum from t1 group by grp;
create table t2 (grp int, a bigint unsigned, c char(10));
insert into t2 select grp,max(a)+max(grp),max(c) from t1 group by grp;
......@@ -40,8 +40,8 @@ CREATE TABLE t1 (id int(11),value1 float(10,2));
INSERT INTO t1 VALUES (1,0.00),(1,1.00), (1,2.00), (2,10.00), (2,11.00), (2,12.00);
CREATE TABLE t2 (id int(11),name char(20));
INSERT INTO t2 VALUES (1,'Set One'),(2,'Set Two');
select id, avg(value1), std(value1) from t1 group by id;
select name, avg(value1), std(value1) from t1, t2 where t1.id = t2.id group by t1.id;
select id, avg(value1), std(value1), variance(value1) from t1 group by id;
select name, avg(value1), std(value1), variance(value1) from t1, t2 where t1.id = t2.id group by t1.id;
drop table t1,t2;
#
......
......@@ -59,4 +59,5 @@ select Fld1, max(Fld2) as q from t1 group by Fld1 having q is not null;
select Fld1, max(Fld2) from t1 group by Fld1 having max(Fld2) is not null;
select Fld1, max(Fld2) from t1 group by Fld1 having avg(Fld2) is not null;
select Fld1, max(Fld2) from t1 group by Fld1 having std(Fld2) is not null;
select Fld1, max(Fld2) from t1 group by Fld1 having variance(Fld2) is not null;
drop table t1;
......@@ -1577,8 +1577,8 @@ select fld3 from t2 where (((fld3 like "_%L%" ) or (fld3 like "%ok%")) and ( fld
select count(*) from t1;
select companynr,count(*),sum(fld1) from t2 group by companynr;
select companynr,count(*) from t2 group by companynr order by companynr desc limit 5;
select count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1) from t2 where companynr = 34 and fld4<>"";
select companynr,count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1) from t2 group by companynr limit 3;
select count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1),variance(fld1) from t2 where companynr = 34 and fld4<>"";
select companynr,count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1),variance(fld1) from t2 group by companynr limit 3;
select companynr,t2nr,count(price),sum(price),min(price),max(price),avg(price) from t3 where companynr = 37 group by companynr,t2nr limit 10;
select /*! SQL_SMALL_RESULT */ companynr,t2nr,count(price),sum(price),min(price),max(price),avg(price) from t3 where companynr = 37 group by companynr,t2nr limit 10;
select companynr,count(price),sum(price),min(price),max(price),avg(price) from t3 group by companynr ;
......
......@@ -686,7 +686,7 @@ void Item_avg_field::make_field(Send_field *tmp_field)
init_make_field(tmp_field,FIELD_TYPE_DOUBLE);
}
void Item_std_field::make_field(Send_field *tmp_field)
void Item_variance_field::make_field(Send_field *tmp_field)
{
init_make_field(tmp_field,FIELD_TYPE_DOUBLE);
}
......
......@@ -33,7 +33,8 @@ class Item {
enum Type {FIELD_ITEM,FUNC_ITEM,SUM_FUNC_ITEM,STRING_ITEM,
INT_ITEM,REAL_ITEM,NULL_ITEM,VARBIN_ITEM,
COPY_STR_ITEM,FIELD_AVG_ITEM, DEFAULT_ITEM,
PROC_ITEM,COND_ITEM,REF_ITEM,FIELD_STD_ITEM, CONST_ITEM,
PROC_ITEM,COND_ITEM,REF_ITEM,FIELD_STD_ITEM,
FIELD_VARIANCE_ITEM,CONST_ITEM,
SUBSELECT_ITEM, ROW_ITEM};
enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE };
......
......@@ -255,12 +255,24 @@ double Item_sum_avg::val()
** Standard deviation
*/
void Item_sum_std::reset()
double Item_sum_std::val()
{
sum=sum_sqr=0.0; count=0; (void) Item_sum_std::add();
double tmp= Item_sum_variance::val();
return tmp <= 0.0 ? 0.0 : sqrt(tmp);
}
bool Item_sum_std::add()
/*
** variance
*/
void Item_sum_variance::reset()
{
sum=sum_sqr=0.0;
count=0;
(void) Item_sum_variance::add();
}
bool Item_sum_variance::add()
{
double nr=args[0]->val();
if (!args[0]->null_value)
......@@ -272,7 +284,7 @@ bool Item_sum_std::add()
return 0;
}
double Item_sum_std::val()
double Item_sum_variance::val()
{
if (!count)
{
......@@ -283,11 +295,10 @@ double Item_sum_std::val()
/* Avoid problems when the precision isn't good enough */
double tmp=ulonglong2double(count);
double tmp2=(sum_sqr - sum*sum/tmp)/tmp;
return tmp2 <= 0.0 ? 0.0 : sqrt(tmp2);
return tmp2 <= 0.0 ? 0.0 : tmp2;
}
void Item_sum_std::reset_field()
void Item_sum_variance::reset_field()
{
double nr=args[0]->val();
char *res=result_field->ptr;
......@@ -304,7 +315,7 @@ void Item_sum_std::reset_field()
}
}
void Item_sum_std::update_field(int offset)
void Item_sum_variance::update_field(int offset)
{
double nr,old_nr,old_sqr;
longlong field_count;
......@@ -838,6 +849,17 @@ String *Item_avg_field::val_str(String *str)
}
Item_std_field::Item_std_field(Item_sum_std *item)
: Item_variance_field(item)
{
}
double Item_std_field::val()
{
double tmp= Item_variance_field::val();
return tmp <= 0.0 ? 0.0 : sqrt(tmp);
}
Item_variance_field::Item_variance_field(Item_sum_variance *item)
{
name=item->name;
decimals=item->decimals;
......@@ -846,7 +868,7 @@ Item_std_field::Item_std_field(Item_sum_std *item)
maybe_null=1;
}
double Item_std_field::val()
double Item_variance_field::val()
{
double sum,sum_sqr;
longlong count;
......@@ -862,10 +884,10 @@ double Item_std_field::val()
null_value=0;
double tmp= (double) count;
double tmp2=(sum_sqr - sum*sum/tmp)/tmp;
return tmp2 <= 0.0 ? 0.0 : sqrt(tmp2);
return tmp2 <= 0.0 ? 0.0 : tmp2;
}
String *Item_std_field::val_str(String *str)
String *Item_variance_field::val_str(String *str)
{
double nr=val();
if (null_value)
......
......@@ -27,7 +27,7 @@ class Item_sum :public Item_result_field
{
public:
enum Sumfunctype {COUNT_FUNC,COUNT_DISTINCT_FUNC,SUM_FUNC,AVG_FUNC,MIN_FUNC,
MAX_FUNC, UNIQUE_USERS_FUNC,STD_FUNC,SUM_BIT_FUNC,
MAX_FUNC, UNIQUE_USERS_FUNC,STD_FUNC,VARIANCE_FUNC,SUM_BIT_FUNC,
UDF_SUM_FUNC };
Item **args,*tmp_args[2];
......@@ -235,14 +235,14 @@ class Item_sum_avg :public Item_sum_num
const char *func_name() const { return "avg"; }
};
class Item_sum_std;
class Item_sum_variance;
class Item_std_field :public Item_result_field
class Item_variance_field :public Item_result_field
{
public:
Field *field;
Item_std_field(Item_sum_std *item);
enum Type type() const { return FIELD_STD_ITEM; }
Item_variance_field(Item_sum_variance *item);
enum Type type() const {return FIELD_VARIANCE_ITEM; }
double val();
longlong val_int() { return (longlong) val(); }
String *val_str(String*);
......@@ -251,26 +251,59 @@ class Item_std_field :public Item_result_field
void fix_length_and_dec() {}
};
class Item_sum_std :public Item_sum_num
/*
variance(a) =
= sqrt ( sum (ai - avg(a))^2 / count(a) )
= sqrt ( sum (ai^2 - 2*ai*avg(a) + avg(a)^2) / count(a) )
= sqrt ( (sum(ai^2) - sum(2*ai*avg(a)) + sum(avg(a)^2))/count(a) ) =
= sqrt ( (sum(ai^2) - 2*avg(a)*sum(a) + count(a)*avg(a)^2)/count(a) ) =
= sqrt ( (sum(ai^2) - 2*sum(a)*sum(a)/count(a) + count(a)*sum(a)^2/count(a)^2 )/count(a) ) =
= sqrt ( (sum(ai^2) - 2*sum(a)^2/count(a) + sum(a)^2/count(a) )/count(a) ) =
= sqrt ( (sum(ai^2) - sum(a)^2/count(a))/count(a) )
*/
class Item_sum_variance : public Item_sum_num
{
double sum;
double sum_sqr;
double sum, sum_sqr;
ulonglong count;
void fix_length_and_dec() { decimals+=4; maybe_null=1; }
public:
Item_sum_std(Item *item_par) :Item_sum_num(item_par),count(0) {}
enum Sumfunctype sum_func () const { return STD_FUNC; }
Item_sum_variance(Item *item_par) :Item_sum_num(item_par),count(0) {}
enum Sumfunctype sum_func () const { return VARIANCE_FUNC; }
void reset();
bool add();
double val();
void reset_field();
void update_field(int offset);
Item *result_item(Field *field)
{ return new Item_std_field(this); }
const char *func_name() const { return "std"; }
{ return new Item_variance_field(this); }
const char *func_name() const { return "variance"; }
};
class Item_sum_std;
class Item_std_field :public Item_variance_field
{
public:
Item_std_field(Item_sum_std *item);
enum Type type() const { return FIELD_STD_ITEM; }
double val();
};
class Item_sum_std :public Item_sum_variance
{
public:
Item_sum_std(Item *item_par) :Item_sum_variance(item_par){}
enum Sumfunctype sum_func () const { return STD_FUNC; }
double val();
Item *result_item(Field *field)
{ return new Item_std_field(this); }
const char *func_name() const { return "std"; }
};
// This class is a string or number function depending on num_func
......
......@@ -584,6 +584,7 @@ static SYMBOL sql_functions[] = {
{ "UNIX_TIMESTAMP", SYM(UNIX_TIMESTAMP),0,0},
{ "UPPER", SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_ucase)},
{ "USER", SYM(USER),0,0},
{ "VARIANCE", SYM(VARIANCE_SYM),0,0},
{ "VERSION", SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_version)},
{ "WEEK", SYM(WEEK_SYM),0,0},
{ "WEEKDAY", SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekday)},
......
......@@ -3773,13 +3773,14 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
else
return new Field_double(item_sum->max_length,maybe_null,
item->name, table, item_sum->decimals);
case Item_sum::STD_FUNC: /* Place for sum & count */
case Item_sum::VARIANCE_FUNC: /* Place for sum & count */
case Item_sum::STD_FUNC:
if (group)
return new Field_string(sizeof(double)*2+sizeof(longlong),
maybe_null, item->name,table,my_charset_bin);
else
return new Field_double(item_sum->max_length, maybe_null,
item->name,table,item_sum->decimals);
item->name,table,item_sum->decimals);
case Item_sum::UNIQUE_USERS_FUNC:
return new Field_long(9,maybe_null,item->name,table,1);
default:
......
......@@ -158,6 +158,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize);
%token SQL_THREAD
%token START_SYM
%token STD_SYM
%token VARIANCE_SYM
%token STOP_SYM
%token SUM_SYM
%token SUPER_SYM
......@@ -2335,6 +2336,8 @@ sum_expr:
{ $$=new Item_sum_max($3); }
| STD_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_std($3); }
| VARIANCE_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_variance($3); }
| SUM_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_sum($3); };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment