Commit 800aaae1 authored by svoj@mysql.com's avatar svoj@mysql.com

BUG#3190 fix (request for STDDEV_SAMP, VAR_SAMP).

This bug is also known as WL#1639.
parent fe2a8004
...@@ -804,3 +804,10 @@ select cast(min(ifl) as decimal(5,2)) from t3; ...@@ -804,3 +804,10 @@ select cast(min(ifl) as decimal(5,2)) from t3;
cast(min(ifl) as decimal(5,2)) cast(min(ifl) as decimal(5,2))
1.00 1.00
drop table t1, t2, t3; drop table t1, t2, t3;
CREATE TABLE t1 (id int(11),value1 float(10,2));
INSERT INTO t1 VALUES (1,0.00),(1,1.00), (1,2.00), (2,10.00), (2,11.00), (2,12.00), (2,13.00);
select id, stddev_pop(value1), var_pop(value1), stddev_samp(value1), var_samp(value1) from t1 group by id;
id stddev_pop(value1) var_pop(value1) stddev_samp(value1) var_samp(value1)
1 0.816497 0.666667 1.000000 1.000000
2 1.118034 1.250000 1.290994 1.666667
DROP TABLE t1;
...@@ -514,3 +514,13 @@ insert into t3 values(1), (2); ...@@ -514,3 +514,13 @@ insert into t3 values(1), (2);
select cast(min(ifl) as decimal(5,2)) from t3; select cast(min(ifl) as decimal(5,2)) from t3;
drop table t1, t2, t3; drop table t1, t2, t3;
#
# BUG#3190, WL#1639: Standard Deviation STDDEV - 2 different calculations
#
CREATE TABLE t1 (id int(11),value1 float(10,2));
INSERT INTO t1 VALUES (1,0.00),(1,1.00), (1,2.00), (2,10.00), (2,11.00), (2,12.00), (2,13.00);
select id, stddev_pop(value1), var_pop(value1), stddev_samp(value1), var_samp(value1) from t1 group by id;
DROP TABLE t1;
...@@ -853,7 +853,7 @@ Item *Item_sum_std::copy_or_same(THD* thd) ...@@ -853,7 +853,7 @@ Item *Item_sum_std::copy_or_same(THD* thd)
Item_sum_variance::Item_sum_variance(THD *thd, Item_sum_variance *item): Item_sum_variance::Item_sum_variance(THD *thd, Item_sum_variance *item):
Item_sum_num(thd, item), hybrid_type(item->hybrid_type), Item_sum_num(thd, item), hybrid_type(item->hybrid_type),
cur_dec(item->cur_dec), count(item->count) cur_dec(item->cur_dec), count(item->count), sample(item->sample)
{ {
if (hybrid_type == DECIMAL_RESULT) if (hybrid_type == DECIMAL_RESULT)
{ {
...@@ -1001,7 +1001,7 @@ double Item_sum_variance::val_real() ...@@ -1001,7 +1001,7 @@ double Item_sum_variance::val_real()
if (hybrid_type == DECIMAL_RESULT) if (hybrid_type == DECIMAL_RESULT)
return val_real_from_decimal(); return val_real_from_decimal();
if (!count) if (count <= sample)
{ {
null_value=1; null_value=1;
return 0.0; return 0.0;
...@@ -1009,30 +1009,31 @@ double Item_sum_variance::val_real() ...@@ -1009,30 +1009,31 @@ double Item_sum_variance::val_real()
null_value=0; null_value=0;
/* Avoid problems when the precision isn't good enough */ /* Avoid problems when the precision isn't good enough */
double tmp=ulonglong2double(count); double tmp=ulonglong2double(count);
double tmp2=(sum_sqr - sum*sum/tmp)/tmp; double tmp2= (sum_sqr - sum*sum/tmp)/(tmp - (double)sample);
return tmp2 <= 0.0 ? 0.0 : tmp2; return tmp2 <= 0.0 ? 0.0 : tmp2;
} }
my_decimal *Item_sum_variance::val_decimal(my_decimal *dec_buf) my_decimal *Item_sum_variance::val_decimal(my_decimal *dec_buf)
{ {
my_decimal count_buf, sum_sqr_buf; my_decimal count_buf, count1_buf, sum_sqr_buf;
DBUG_ASSERT(fixed ==1 ); DBUG_ASSERT(fixed ==1 );
if (hybrid_type == REAL_RESULT) if (hybrid_type == REAL_RESULT)
return val_decimal_from_real(dec_buf); return val_decimal_from_real(dec_buf);
if (!count) if (count <= sample)
{ {
null_value= 1; null_value= 1;
return 0; return 0;
} }
null_value= 0; null_value= 0;
int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &count_buf); int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &count_buf);
int2my_decimal(E_DEC_FATAL_ERROR, count-sample, 0, &count1_buf);
my_decimal_mul(E_DEC_FATAL_ERROR, &sum_sqr_buf, my_decimal_mul(E_DEC_FATAL_ERROR, &sum_sqr_buf,
dec_sum+cur_dec, dec_sum+cur_dec); dec_sum+cur_dec, dec_sum+cur_dec);
my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &sum_sqr_buf, &count_buf, 2); my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &sum_sqr_buf, &count_buf, 2);
my_decimal_sub(E_DEC_FATAL_ERROR, &sum_sqr_buf, dec_sqr+cur_dec, dec_buf); my_decimal_sub(E_DEC_FATAL_ERROR, &sum_sqr_buf, dec_sqr+cur_dec, dec_buf);
my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &sum_sqr_buf, &count_buf, 2); my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &sum_sqr_buf, &count1_buf, 2);
return dec_buf; return dec_buf;
} }
...@@ -2004,6 +2005,7 @@ Item_variance_field::Item_variance_field(Item_sum_variance *item) ...@@ -2004,6 +2005,7 @@ Item_variance_field::Item_variance_field(Item_sum_variance *item)
max_length=item->max_length; max_length=item->max_length;
field=item->result_field; field=item->result_field;
maybe_null=1; maybe_null=1;
sample= item->sample;
if ((hybrid_type= item->hybrid_type) == DECIMAL_RESULT) if ((hybrid_type= item->hybrid_type) == DECIMAL_RESULT)
{ {
f_scale0= item->f_scale0; f_scale0= item->f_scale0;
...@@ -2028,11 +2030,11 @@ double Item_variance_field::val_real() ...@@ -2028,11 +2030,11 @@ double Item_variance_field::val_real()
float8get(sum_sqr,(field->ptr+sizeof(double))); float8get(sum_sqr,(field->ptr+sizeof(double)));
count=sint8korr(field->ptr+sizeof(double)*2); count=sint8korr(field->ptr+sizeof(double)*2);
if ((null_value= !count)) if ((null_value= (count <= sample)))
return 0.0; return 0.0;
double tmp= (double) count; double tmp= (double) count;
double tmp2=(sum_sqr - sum*sum/tmp)/tmp; double tmp2= (sum_sqr - sum*sum/tmp)/(tmp - (double)sample);
return tmp2 <= 0.0 ? 0.0 : tmp2; return tmp2 <= 0.0 ? 0.0 : tmp2;
} }
...@@ -2052,11 +2054,12 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf) ...@@ -2052,11 +2054,12 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)
return val_decimal_from_real(dec_buf); return val_decimal_from_real(dec_buf);
longlong count= sint8korr(field->ptr+dec_bin_size0+dec_bin_size1); longlong count= sint8korr(field->ptr+dec_bin_size0+dec_bin_size1);
if ((null_value= !count)) if ((null_value= (count <= sample)))
return 0; return 0;
my_decimal dec_count, dec_sum, dec_sqr, tmp; my_decimal dec_count, dec1_count, dec_sum, dec_sqr, tmp;
int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &dec_count); int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &dec_count);
int2my_decimal(E_DEC_FATAL_ERROR, count-sample, 0, &dec1_count);
binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr, binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr,
&dec_sum, f_precision0, f_scale0); &dec_sum, f_precision0, f_scale0);
binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr+dec_bin_size0, binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr+dec_bin_size0,
...@@ -2064,7 +2067,7 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf) ...@@ -2064,7 +2067,7 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)
my_decimal_mul(E_DEC_FATAL_ERROR, &tmp, &dec_sum, &dec_sum); my_decimal_mul(E_DEC_FATAL_ERROR, &tmp, &dec_sum, &dec_sum);
my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &tmp, &dec_count, 2); my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &tmp, &dec_count, 2);
my_decimal_sub(E_DEC_FATAL_ERROR, &dec_sum, &dec_sqr, dec_buf); my_decimal_sub(E_DEC_FATAL_ERROR, &dec_sum, &dec_sqr, dec_buf);
my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &dec_sum, &dec_count, 2); my_decimal_div(E_DEC_FATAL_ERROR, dec_buf, &dec_sum, &dec1_count, 2);
return dec_buf; return dec_buf;
} }
......
...@@ -379,6 +379,7 @@ class Item_variance_field :public Item_result_field ...@@ -379,6 +379,7 @@ class Item_variance_field :public Item_result_field
uint f_precision0, f_scale0; uint f_precision0, f_scale0;
uint f_precision1, f_scale1; uint f_precision1, f_scale1;
uint dec_bin_size0, dec_bin_size1; uint dec_bin_size0, dec_bin_size1;
uint sample;
Item_variance_field(Item_sum_variance *item); Item_variance_field(Item_sum_variance *item);
enum Type type() const {return FIELD_VARIANCE_ITEM; } enum Type type() const {return FIELD_VARIANCE_ITEM; }
double val_real(); double val_real();
...@@ -422,9 +423,10 @@ class Item_sum_variance : public Item_sum_num ...@@ -422,9 +423,10 @@ class Item_sum_variance : public Item_sum_num
uint f_precision0, f_scale0; uint f_precision0, f_scale0;
uint f_precision1, f_scale1; uint f_precision1, f_scale1;
uint dec_bin_size0, dec_bin_size1; uint dec_bin_size0, dec_bin_size1;
uint sample;
Item_sum_variance(Item *item_par) :Item_sum_num(item_par), hybrid_type(REAL_RESULT), Item_sum_variance(Item *item_par, uint sample_arg) :Item_sum_num(item_par),
cur_dec(0),count(0) hybrid_type(REAL_RESULT), cur_dec(0), count(0), sample(sample_arg)
{} {}
Item_sum_variance(THD *thd, Item_sum_variance *item); Item_sum_variance(THD *thd, Item_sum_variance *item);
enum Sumfunctype sum_func () const { return VARIANCE_FUNC; } enum Sumfunctype sum_func () const { return VARIANCE_FUNC; }
...@@ -463,7 +465,8 @@ class Item_std_field :public Item_variance_field ...@@ -463,7 +465,8 @@ class Item_std_field :public Item_variance_field
class Item_sum_std :public Item_sum_variance class Item_sum_std :public Item_sum_variance
{ {
public: public:
Item_sum_std(Item *item_par) :Item_sum_variance(item_par) {} Item_sum_std(Item *item_par, uint sample_arg)
:Item_sum_variance(item_par, sample_arg) {}
Item_sum_std(THD *thd, Item_sum_std *item) Item_sum_std(THD *thd, Item_sum_std *item)
:Item_sum_variance(thd, item) :Item_sum_variance(thd, item)
{} {}
......
...@@ -735,6 +735,8 @@ static SYMBOL sql_functions[] = { ...@@ -735,6 +735,8 @@ static SYMBOL sql_functions[] = {
{ "STARTPOINT", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_startpoint)}, { "STARTPOINT", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_startpoint)},
{ "STD", SYM(STD_SYM)}, { "STD", SYM(STD_SYM)},
{ "STDDEV", SYM(STD_SYM)}, { "STDDEV", SYM(STD_SYM)},
{ "STDDEV_POP", SYM(STD_SYM)},
{ "STDDEV_SAMP", SYM(STDDEV_SAMP_SYM)},
{ "STR_TO_DATE", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_str_to_date)}, { "STR_TO_DATE", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_str_to_date)},
{ "STRCMP", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_strcmp)}, { "STRCMP", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_strcmp)},
{ "SUBSTR", SYM(SUBSTRING)}, { "SUBSTR", SYM(SUBSTRING)},
...@@ -760,6 +762,8 @@ static SYMBOL sql_functions[] = { ...@@ -760,6 +762,8 @@ static SYMBOL sql_functions[] = {
{ "UPPER", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_ucase)}, { "UPPER", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_ucase)},
{ "UUID", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_uuid)}, { "UUID", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_uuid)},
{ "VARIANCE", SYM(VARIANCE_SYM)}, { "VARIANCE", SYM(VARIANCE_SYM)},
{ "VAR_POP", SYM(VARIANCE_SYM)},
{ "VAR_SAMP", SYM(VAR_SAMP_SYM)},
{ "VERSION", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_version)}, { "VERSION", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_version)},
{ "WEEKDAY", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekday)}, { "WEEKDAY", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekday)},
{ "WEEKOFYEAR", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekofyear)}, { "WEEKOFYEAR", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_weekofyear)},
......
...@@ -561,6 +561,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); ...@@ -561,6 +561,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token START_SYM %token START_SYM
%token STATUS_SYM %token STATUS_SYM
%token STD_SYM %token STD_SYM
%token STDDEV_SAMP_SYM
%token STOP_SYM %token STOP_SYM
%token STORAGE_SYM %token STORAGE_SYM
%token STRAIGHT_JOIN %token STRAIGHT_JOIN
...@@ -624,6 +625,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); ...@@ -624,6 +625,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token UTC_DATE_SYM %token UTC_DATE_SYM
%token UTC_TIMESTAMP_SYM %token UTC_TIMESTAMP_SYM
%token UTC_TIME_SYM %token UTC_TIME_SYM
%token VAR_SAMP_SYM
%token VALUES %token VALUES
%token VALUE_SYM %token VALUE_SYM
%token VARBINARY %token VARBINARY
...@@ -4754,9 +4756,13 @@ sum_expr: ...@@ -4754,9 +4756,13 @@ sum_expr:
| MAX_SYM '(' DISTINCT in_sum_expr ')' | MAX_SYM '(' DISTINCT in_sum_expr ')'
{ $$=new Item_sum_max($4); } { $$=new Item_sum_max($4); }
| STD_SYM '(' in_sum_expr ')' | STD_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_std($3); } { $$=new Item_sum_std($3, 0); }
| VARIANCE_SYM '(' in_sum_expr ')' | VARIANCE_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_variance($3); } { $$=new Item_sum_variance($3, 0); }
| STDDEV_SAMP_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_std($3, 1); }
| VAR_SAMP_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_variance($3, 1); }
| SUM_SYM '(' in_sum_expr ')' | SUM_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_sum($3); } { $$=new Item_sum_sum($3); }
| SUM_SYM '(' DISTINCT in_sum_expr ')' | SUM_SYM '(' DISTINCT in_sum_expr ')'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment