Commit 3692adeb authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

Fix avg_frequency statistics and remove stderr dumps

Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent bff65a81
...@@ -164,12 +164,12 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f ...@@ -164,12 +164,12 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 98.04 98.02 Using where 1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 98.04 98.02 Using where
explain extended select * from users where city = 'Helsinki'; explain extended select * from users where city = 'Helsinki';
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 50.00 Using where 1 SIMPLE users ALL NULL NULL NULL NULL 101 2.00 Using where
Warnings: Warnings:
Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Helsinki' Note 1003 select `test`.`users`.`city` AS `city` from `test`.`users` where `test`.`users`.`city` = 'Helsinki'
analyze select * from users where city = 'helsinki'; analyze select * from users where city = 'helsinki';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 50.00 1.98 Using where 1 SIMPLE users ALL NULL NULL NULL NULL 101 101.00 2.00 1.98 Using where
drop table t1_bin; drop table t1_bin;
drop table t1_json; drop table t1_json;
drop table users; drop table users;
......
...@@ -1470,8 +1470,6 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub ...@@ -1470,8 +1470,6 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
max_idx++; max_idx++;
} }
//todo: do we need to account for zero value-length similarly to binary histograms.
if (max_idx > min_idx) if (max_idx > min_idx)
{ {
// value spans multiple buckets // value spans multiple buckets
...@@ -1480,7 +1478,7 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub ...@@ -1480,7 +1478,7 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
} else } else
{ {
// the value fits within a single bucket // the value fits within a single bucket
sel = MIN(avg_sel, get_width()); sel = MY_MIN(avg_sel, (1.0/get_width()));
} }
return sel; return sel;
} }
...@@ -1494,7 +1492,7 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub ...@@ -1494,7 +1492,7 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
double Histogram_json::range_selectivity(Field *field, key_range *min_endp, double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) key_range *max_endp)
{ {
fprintf(stderr, "Histogram_json::range_selectivity\n"); //fprintf(stderr, "Histogram_json::range_selectivity\n");
double min = 0.0, max = 1.0; double min = 0.0, max = 1.0;
double width = 1.0/(int)histogram_bounds.size(); double width = 1.0/(int)histogram_bounds.size();
if (min_endp) if (min_endp)
...@@ -1529,8 +1527,8 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp, ...@@ -1529,8 +1527,8 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
} }
min = min_bucket_idx * (width) + min_sel * (width); min = min_bucket_idx * (width) + min_sel * (width);
fprintf(stderr, "min pos_in_interval =%g\n", min_sel); //fprintf(stderr, "min pos_in_interval =%g\n", min_sel);
fprintf(stderr, "min =%g\n", min); //fprintf(stderr, "min =%g\n", min);
} }
if (max_endp) if (max_endp)
{ {
...@@ -1562,13 +1560,13 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp, ...@@ -1562,13 +1560,13 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
} }
max = min_bucket_idx * (width) + max_sel * (width); max = min_bucket_idx * (width) + max_sel * (width);
fprintf(stderr, "max pos_in_interval =%g\n", max_sel); //fprintf(stderr, "max pos_in_interval =%g\n", max_sel);
fprintf(stderr, "max =%g\n", max); //fprintf(stderr, "max =%g\n", max);
} }
double sel = max - min; double sel = max - min;
fprintf(stderr, "final selection = %g\n", sel); //fprintf(stderr, "final selection = %g\n", sel);
fprintf(stderr, "Histogram_json::range_selectivity ends\n"); //fprintf(stderr, "Histogram_json::range_selectivity ends\n");
return sel; return sel;
} }
...@@ -4504,7 +4502,6 @@ double Histogram_binary::range_selectivity(Field *field, ...@@ -4504,7 +4502,6 @@ double Histogram_binary::range_selectivity(Field *field,
key_range *min_endp, key_range *min_endp,
key_range *max_endp) key_range *max_endp)
{ {
fprintf(stderr, "Histogram_binary::range_selectivity\n");
double sel, min_mp_pos, max_mp_pos; double sel, min_mp_pos, max_mp_pos;
Column_statistics *col_stats= field->read_stats; Column_statistics *col_stats= field->read_stats;
...@@ -4527,8 +4524,6 @@ double Histogram_binary::range_selectivity(Field *field, ...@@ -4527,8 +4524,6 @@ double Histogram_binary::range_selectivity(Field *field,
else else
max_mp_pos= 1.0; max_mp_pos= 1.0;
// GSOC-todo: previously it was if (hist && hist->is_usable) - I wonder in what cases
// (hist) would be null and if it makes sense to handle that case now.
if (is_usable(field->table->in_use)) if (is_usable(field->table->in_use))
{ {
double bucket_sel= 1.0 / (get_width() + 1); double bucket_sel= 1.0 / (get_width() + 1);
...@@ -4536,19 +4531,18 @@ double Histogram_binary::range_selectivity(Field *field, ...@@ -4536,19 +4531,18 @@ double Histogram_binary::range_selectivity(Field *field,
uint max= find_bucket(max_mp_pos, FALSE); uint max= find_bucket(max_mp_pos, FALSE);
sel= bucket_sel * (max - min + 1); sel= bucket_sel * (max - min + 1);
fprintf(stderr, "bucket_sel =%g\n", bucket_sel); /*fprintf(stderr, "bucket_sel =%g\n", bucket_sel);
fprintf(stderr, "min pos_in_interval =%g\n", min_mp_pos); fprintf(stderr, "min pos_in_interval =%g\n", min_mp_pos);
fprintf(stderr, "max pos_in_interval =%g\n", max_mp_pos); fprintf(stderr, "max pos_in_interval =%g\n", max_mp_pos);
fprintf(stderr, "min =%d\n", min); fprintf(stderr, "min =%d\n", min);
fprintf(stderr, "max =%d\n", max); fprintf(stderr, "max =%d\n", max);*/
} }
else else
{ {
/* GSOC-todo: figure how to handle the else case below for Histogram_json*/
sel= (max_mp_pos - min_mp_pos); sel= (max_mp_pos - min_mp_pos);
} }
fprintf(stderr, "final sel =%g\n", sel); /*fprintf(stderr, "final sel =%g\n", sel);
fprintf(stderr, "Histogram_binary::range_selectivity ends\n"); fprintf(stderr, "Histogram_binary::range_selectivity ends\n");*/
return sel; return sel;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment