Commit c2d2c1e7 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-26519: Improved histograms

Save extra information in the histogram:

    "target_histogram_size": nnn,
    "collected_at": "(date and time)",
    "collected_by": "(server version)",
parent a0916cf5
--source include/json_hb_histogram.inc
# The time on ANALYSE FORMAT=JSON is rather variable
--replace_regex /("(collected_at|collected_by)": )"[^"]*"/\1"REPLACED"/
......@@ -1487,7 +1487,7 @@ set histogram_size=254;
set histogram_type=@DOUBLE_PREC_TYPE;
ANALYZE TABLE City;
FLUSH TABLES;
select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='COUNTRYLANGUAGE' and UPPER(column_name) = 'PERCENTAGE';;
select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='COUNTRYLANGUAGE' and UPPER(column_name) = 'PERCENTAGE';;
UPPER(db_name) WORLD
UPPER(table_name) COUNTRYLANGUAGE
UPPER(column_name) PERCENTAGE
......@@ -1498,9 +1498,8 @@ avg_length 4.0000
avg_frequency 2.7640
hist_size 100
hist_type SINGLE_PREC_HB
hex(histogram) 0000000000000000000000000101010101010101010202020303030304040404050505050606070707080809090A0A0B0C0D0D0E0E0F10111213131415161718191B1C1E202224292A2E33373B4850575F6A76818C9AA7B9C4CFDADFE5EBF0F4F8FAFCFF
decode_histogram(hist_type,histogram) 0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.004,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.004,0.000,0.000,0.004,0.000,0.000,0.000,0.004,0.000,0.000,0.000,0.004,0.000,0.000,0.000,0.004,0.000,0.004,0.000,0.000,0.004,0.000,0.004,0.000,0.004,0.000,0.004,0.004,0.004,0.000,0.004,0.000,0.004,0.004,0.004,0.004,0.004,0.000,0.004,0.004,0.004,0.004,0.004,0.004,0.008,0.004,0.008,0.008,0.008,0.008,0.020,0.004,0.016,0.020,0.016,0.016,0.051,0.031,0.027,0.031,0.043,0.047,0.043,0.043,0.055,0.051,0.071,0.043,0.043,0.043,0.020,0.024,0.024,0.020,0.016,0.016,0.008,0.008,0.012,0.000
select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='CITY' and UPPER(column_name) = 'POPULATION';;
select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='CITY' and UPPER(column_name) = 'POPULATION';;
UPPER(db_name) WORLD
UPPER(table_name) CITY
UPPER(column_name) POPULATION
......@@ -1511,7 +1510,6 @@ avg_length 4.0000
avg_frequency 1.0467
hist_size 254
hist_type DOUBLE_PREC_HB
hex(histogram) 1F00A1002B023002350238023F02430249024E02520258025D02630268026E02720276027B02800285028C02920297029D02A102A802AC02B402BC02C402CC02D302DA02E302EA02F102F802010305030C03120319031F03290333033D0343034F03590363036D037803840390039A03A603B303C303D103E003F203020412042404330440045304600472047F049104A204B804C804DE04F2040A0526053F0558056F058E05B305D905F4051306380667068406AB06DA06020731075C079407C507F8072E085E08A508DF0824096909CC092E0A760AD50A400BA90B150CAD0C310D240E130F0E103B11B9126B14F0166B192F1CB71FFF240630483FC567
decode_histogram(hist_type,histogram) 0.00047,0.00198,0.00601,0.00008,0.00008,0.00005,0.00011,0.00006,0.00009,0.00008,0.00006,0.00009,0.00008,0.00009,0.00008,0.00009,0.00006,0.00006,0.00008,0.00008,0.00008,0.00011,0.00009,0.00008,0.00009,0.00006,0.00011,0.00006,0.00012,0.00012,0.00012,0.00012,0.00011,0.00011,0.00014,0.00011,0.00011,0.00011,0.00014,0.00006,0.00011,0.00009,0.00011,0.00009,0.00015,0.00015,0.00015,0.00009,0.00018,0.00015,0.00015,0.00015,0.00017,0.00018,0.00018,0.00015,0.00018,0.00020,0.00024,0.00021,0.00023,0.00027,0.00024,0.00024,0.00027,0.00023,0.00020,0.00029,0.00020,0.00027,0.00020,0.00027,0.00026,0.00034,0.00024,0.00034,0.00031,0.00037,0.00043,0.00038,0.00038,0.00035,0.00047,0.00056,0.00058,0.00041,0.00047,0.00056,0.00072,0.00044,0.00060,0.00072,0.00061,0.00072,0.00066,0.00085,0.00075,0.00078,0.00082,0.00073,0.00108,0.00089,0.00105,0.00105,0.00151,0.00150,0.00110,0.00145,0.00163,0.00160,0.00165,0.00232,0.00201,0.00371,0.00365,0.00383,0.00459,0.00583,0.00662,0.00984,0.00969,0.01080,0.01379,0.02063,0.04308,0.05960,0.15816,0.59464
set histogram_type=@SINGLE_PREC_TYPE;
set histogram_size=0;
......@@ -1648,10 +1646,10 @@ test.t2 analyze status OK
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 03070B0F13171B1F23272B2F33373B3F43474B4F53575B5F63676B6F73777B7F83878B8F93979B9FA3A7ABAFB3B7BBBFC3C7CBCFD3D7DBDFE3E7EBEFF3F7FB
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 0.012,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016,0.016
set histogram_size=0;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables;
......@@ -1669,9 +1667,9 @@ Level Code Message
select db_name, table_name, column_name,
HEX(min_value), HEX(max_value),
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats;
db_name table_name column_name HEX(min_value) HEX(max_value) nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
db_name table_name column_name HEX(min_value) HEX(max_value) nulls_ratio avg_frequency hist_size hist_type decode_histogram(hist_type,histogram)
test t1 a D879626AF872675F73E662F8 D879626AF872675F73E662F8 0.0000 1.0000 0 NULL NULL
drop table t1;
#
......
......@@ -91,6 +91,7 @@ SELECT * FROM mysql.index_stats;
SELECT COUNT(*) FROM t1;
SELECT * FROM mysql.column_stats
WHERE db_name='test' AND table_name='t1' AND column_name='a';
SELECT MIN(t1.a), MAX(t1.a),
......@@ -187,6 +188,7 @@ DELETE FROM mysql.column_stats;
set histogram_size=4;
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
SELECT db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
......@@ -200,6 +202,7 @@ set histogram_size=8;
set histogram_type=@DOUBLE_PREC_TYPE;
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
SELECT db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
......@@ -668,8 +671,10 @@ ANALYZE TABLE City;
FLUSH TABLES;
--enable_result_log
--query_vertical select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='COUNTRYLANGUAGE' and UPPER(column_name) = 'PERCENTAGE';
--query_vertical select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='CITY' and UPPER(column_name) = 'POPULATION';
--source include/histogram_replaces.inc
--query_vertical select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='COUNTRYLANGUAGE' and UPPER(column_name) = 'PERCENTAGE';
--source include/histogram_replaces.inc
--query_vertical select UPPER(db_name),UPPER(table_name),UPPER(column_name),min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,decode_histogram(hist_type,histogram) from mysql.column_stats where UPPER(db_name)='WORLD' and UPPER(table_name)='CITY' and UPPER(column_name) = 'POPULATION';
set histogram_type=@SINGLE_PREC_TYPE;
set histogram_size=0;
......@@ -715,6 +720,7 @@ set histogram_size=10;
analyze table t1 persistent for all;
--source include/histogram_replaces.inc
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
......@@ -741,6 +747,7 @@ show variables like 'histogram%';
analyze table t1 persistent for all;
--source include/histogram_replaces.inc
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
......@@ -786,10 +793,11 @@ set histogram_size=63;
analyze table t2 persistent for all;
--source include/histogram_replaces.inc
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats;
set histogram_size=0;
......@@ -807,10 +815,11 @@ insert into t1 values(unhex('D879626AF872675F73E662F8'));
analyze table t1 persistent for all;
show warnings;
--source include/histogram_replaces.inc
select db_name, table_name, column_name,
HEX(min_value), HEX(max_value),
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
hist_size, hist_type, decode_histogram(hist_type,histogram)
FROM mysql.column_stats;
drop table t1;
......@@ -974,6 +983,7 @@ INSERT INTO t1 SELECT id+9192 FROM t1;
--echo # This query will should show a full table scan analysis.
--echo #
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
DECODE_HISTOGRAM(hist_type, histogram)
from mysql.column_stats;
......@@ -984,6 +994,7 @@ set analyze_sample_percentage=0.1;
--echo # This query will show an innacurate avg_frequency value.
--echo #
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
DECODE_HISTOGRAM(hist_type, histogram)
from mysql.column_stats;
......@@ -993,6 +1004,7 @@ from mysql.column_stats;
--echo #
set analyze_sample_percentage=25;
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
DECODE_HISTOGRAM(hist_type, histogram)
from mysql.column_stats;
......@@ -1003,6 +1015,7 @@ set analyze_sample_percentage=0;
--echo # Test self adjusting sampling level.
--echo #
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
DECODE_HISTOGRAM(hist_type, histogram)
from mysql.column_stats;
......@@ -1014,6 +1027,7 @@ explain select * from t1;
set analyze_sample_percentage=100;
ANALYZE TABLE t1;
--source include/histogram_replaces.inc
select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency,
DECODE_HISTOGRAM(hist_type, histogram)
from mysql.column_stats;
......
This diff is collapsed.
......@@ -8,7 +8,6 @@ let $histogram_type_override='JSON_HB';
--source include/have_innodb.inc
--source include/have_stat_tables.inc
--source include/have_sequence.inc
--source include/analyze-format.inc
--disable_warnings
drop table if exists t1;
--enable_warnings
......@@ -33,6 +32,7 @@ set histogram_type=json_hb;
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
analyze table t1_json persistent for all;
--source include/json_hb_histogram.inc
select * from mysql.column_stats where table_name='t1_json';
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
......@@ -134,6 +134,7 @@ set histogram_size=50;
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
--enable_result_log
--source include/histogram_replaces.inc
SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats;
analyze select * from Country use index () where Code between 'BBC' and 'GGG';
analyze select * from Country use index () where Code < 'BBC';
......@@ -161,6 +162,7 @@ from mysql.column_stats where table_name='t10' and db_name=database();
insert into t10 values ('Berlin'),('Paris'),('Rome');
set histogram_size=10, histogram_type='json_hb';
analyze table t10 persistent for all;
--source include/histogram_replaces.inc
select histogram
from mysql.column_stats where table_name='t10' and db_name=database();
drop table t10;
......@@ -184,6 +186,7 @@ CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (NULL,1), (NULL,2);
SET histogram_type = JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
--source include/histogram_replaces.inc
SELECT DECODE_HISTOGRAM(hist_type, histogram) from mysql.column_stats;
drop table t1;
......@@ -214,6 +217,7 @@ select hex(a) from t1;
set histogram_type='json_hb';
analyze table t1 persistent for all;
--source include/histogram_replaces.inc
select decode_histogram(hist_type, histogram)
from mysql.column_stats
where db_name=database() and table_name='t1';
......@@ -228,6 +232,7 @@ create table t1 ( a varchar(100) character set cp1251);
insert into t1 values ( _cp1251 x'88'),( _cp1251 x'98');
analyze table t1 persistent for all;
--source include/histogram_replaces.inc
select hist_type, histogram
from mysql.column_stats
where db_name=database() and table_name='t1';
......@@ -265,6 +270,7 @@ insert into t1 select 6 from seq_1_to_25;
set histogram_size=4, histogram_type=JSON_HB;
analyze table t1 persistent for all;
--source include/json_hb_histogram.inc
select histogram from mysql.column_stats where table_name = 't1';
drop table t1;
......@@ -299,6 +305,7 @@ insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
select a, count(*) from t1 group by a order by a;
set histogram_type=json_hb, histogram_size=default;
analyze table t1 persistent for all;
--source include/json_hb_histogram.inc
select * from mysql.column_stats where table_name='t1';
analyze select * from t1 where a=0;
analyze select * from t1 where a=50;
......
......@@ -22,7 +22,13 @@
/*
Un-escape a JSON string and save it into *out.
@brief
Un-escape a JSON string and save it into *out.
@detail
There's no way to tell how much space is needed for the output.
Start with a small string and increase its size until json_unescape()
succeeds.
*/
static bool json_unescape_to_string(const char *val, int val_len, String* out)
......@@ -55,7 +61,13 @@ static bool json_unescape_to_string(const char *val, int val_len, String* out)
/*
Escape a JSON string and save it into *out.
@brief
Escape a JSON string and save it into *out.
@detail
There's no way to tell how much space is needed for the output.
Start with a small string and increase its size until json_escape()
succeeds.
*/
static bool json_escape_to_string(const String *str, String* out)
......@@ -145,6 +157,8 @@ class Histogram_json_builder : public Histogram_builder
bucket.size= 0;
writer.start_object();
append_histogram_params();
writer.add_member(Histogram_json_hb::JSON_NAME).start_array();
}
......@@ -153,6 +167,27 @@ class Histogram_json_builder : public Histogram_builder
private:
bool bucket_is_empty() { return bucket.ndv == 0; }
void append_histogram_params()
{
char buf[128];
time_t cur_time_t= my_time(0);
struct tm curtime;
localtime_r(&cur_time_t, &curtime);
my_snprintf(buf, sizeof(buf), "%d-%02d-%02d %2d:%02d:%02d %s",
curtime.tm_year + 1900,
curtime.tm_mon+1,
curtime.tm_mday,
curtime.tm_hour,
curtime.tm_min,
curtime.tm_sec,
system_time_zone);
writer.add_member("target_histogram_size").add_ull(hist_width);
writer.add_member("collected_at").add_str(buf);
writer.add_member("collected_by").add_str(server_version);
}
/*
Flush the current bucket out (to JSON output), and set it to be empty.
*/
......@@ -423,6 +458,15 @@ class Json_saved_parser_state
};
/*
@brief
Read a constant from JSON document and save it in *out.
@detail
The JSON document stores constant in text form, we need to save it in
KeyTupleFormat. String constants in JSON may be escaped.
*/
bool read_bucket_endpoint(json_engine_t *je, Field *field, String *out,
const char **err)
{
......@@ -508,8 +552,9 @@ int Histogram_json_hb::parse_bucket(json_engine_t *je, Field *field,
double size_d;
longlong ndv_ll;
StringBuffer<128> value_buf;
int rc;
while (!json_scan_next(je) && je->state != JST_OBJ_END)
while (!(rc= json_scan_next(je)) && je->state != JST_OBJ_END)
{
Json_saved_parser_state save1(je);
Json_string start_str("start");
......@@ -579,6 +624,9 @@ int Histogram_json_hb::parse_bucket(json_engine_t *je, Field *field,
return 1;
}
if (rc)
return 1;
if (!have_start)
{
*err= "\"start\" element not present";
......@@ -625,13 +673,12 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, const char *db_name,
json_engine_t je;
int rc;
const char *err= "JSON parse error";
double total_size= 0.0;
int end_element= -1;
double total_size;
int end_element;
bool end_assigned;
DBUG_ENTER("Histogram_json_hb::parse");
DBUG_ASSERT(type_arg == JSON_HB);
Json_string hist_key_name(JSON_NAME);
json_scan_start(&je, &my_charset_utf8mb4_bin,
(const uchar*)hist_data,
(const uchar*)hist_data+hist_data_len);
......@@ -645,32 +692,45 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, const char *db_name,
goto err;
}
if (json_scan_next(&je))
goto err;
if (je.state != JST_KEY || !json_key_matches(&je, hist_key_name.get()))
while (1)
{
err= "Root element must be histogram_hb";
goto err;
}
if (json_scan_next(&je))
goto err;
if (je.state == JST_OBJ_END)
break; // End of object
if (json_scan_next(&je))
goto err;
if (je.state != JST_KEY)
goto err; // Can' really have this: JSON object has keys in it
if (je.state != JST_ARRAY_START)
{
err= "histogram_hb must contain an array";
goto err;
}
Json_string hist_key_name(JSON_NAME);
if (json_key_matches(&je, hist_key_name.get()))
{
total_size= 0.0;
end_element= -1;
if (json_scan_next(&je))
goto err;
while (!(rc= parse_bucket(&je, field, &total_size, &end_assigned, &err)))
{
if (end_assigned && end_element != -1)
end_element= (int)buckets.size();
}
if (je.state != JST_ARRAY_START)
{
err= "histogram_hb must contain an array";
goto err;
}
if (rc > 0) // Got error other than EOF
goto err;
while (!(rc= parse_bucket(&je, field, &total_size, &end_assigned, &err)))
{
if (end_assigned && end_element != -1)
end_element= (int)buckets.size();
}
if (rc > 0) // Got error other than EOF
goto err;
}
else
{
// Some unknown member. Skip it.
if (json_skip_key(&je))
return 1;
}
}
if (buckets.size() < 1)
{
......
......@@ -24,6 +24,11 @@
Histogram format in JSON:
{
// The next three are saved but not currently analyzed:
"target_histogram_size": nnn,
"collected_at": "(date and time)",
"collected_by": "(server version)",
"histogram_hb": [
{ "start": "value", "size":nnn.nn, "ndv": nnn },
...
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment