Commit 1d981685 authored by Sergei Petrunia's avatar Sergei Petrunia

Move JSON histograms code into its own files

parent 4ab2b78b
...@@ -151,6 +151,7 @@ SET (SQL_SOURCE ...@@ -151,6 +151,7 @@ SET (SQL_SOURCE
sql_analyze_stmt.cc sql_analyze_stmt.cc
sql_join_cache.cc sql_join_cache.cc
create_options.cc multi_range_read.cc create_options.cc multi_range_read.cc
opt_histogram_json.cc
opt_index_cond_pushdown.cc opt_subselect.cc opt_index_cond_pushdown.cc opt_subselect.cc
opt_table_elimination.cc sql_expression_cache.cc opt_table_elimination.cc sql_expression_cache.cc
gcalc_slicescan.cc gcalc_tools.cc gcalc_slicescan.cc gcalc_tools.cc
......
This diff is collapsed.
/*
Copyright (c) 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
#include "sql_statistics.h"
/*
An equi-height histogram which stores real values for bucket bounds.
Handles @@histogram_type=JSON_HB
*/
class Histogram_json_hb : public Histogram_base
{
size_t size; /* Number of elements in the histogram */
/* Collection-time only: collected histogram in the JSON form. */
std::string json_text;
// Array of histogram bucket endpoints in KeyTupleFormat.
std::vector<std::string> histogram_bounds;
public:
static constexpr const char* JSON_NAME="histogram_hb_v1";
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len) override;
void serialize(Field *field) override;
Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows) override;
// returns number of buckets in the histogram
uint get_width() override
{
return (uint)size;
}
Histogram_type get_type() override
{
return JSON_HB;
}
/*
@brief
Legacy: this returns the size of the histogram on disk.
@detail
This is only called at collection time when json_text is non-empty.
*/
uint get_size() override
{
return json_text.size();
}
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size) override;
bool is_available() override {return true; }
bool is_usable(THD *thd) override
{
return thd->variables.optimizer_use_condition_selectivity > 3 &&
is_available();
}
double point_selectivity(Field *field, key_range *endpoint,
double avg_selection) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
void set_json_text(ulonglong sz, uchar *json_text_arg)
{
size = (uint8) sz;
json_text.assign((const char*)json_text_arg,
strlen((const char*)json_text_arg));
}
private:
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less);
};
This diff is collapsed.
...@@ -162,11 +162,18 @@ class Histogram_base : public Sql_alloc ...@@ -162,11 +162,18 @@ class Histogram_base : public Sql_alloc
virtual uint get_width()=0; virtual uint get_width()=0;
virtual Histogram_builder *create_builder(Field *col, uint col_len, /*
ha_rows rows)=0; The creation-time workflow is:
* create a histogram
* init_for_collection()
* create_builder()
* feed the data to the builder
* serialize();
*/
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size)=0; ulonglong size)=0;
virtual Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows)=0;
virtual bool is_available()=0; virtual bool is_available()=0;
...@@ -177,19 +184,26 @@ class Histogram_base : public Sql_alloc ...@@ -177,19 +184,26 @@ class Histogram_base : public Sql_alloc
virtual double range_selectivity(Field *field, key_range *min_endp, virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0; key_range *max_endp)=0;
// Legacy: return the size of the histogram on disk. /*
// This will be stored in mysql.column_stats.hist_size column. Legacy: return the size of the histogram on disk.
// Newer, JSON-based histograms may return 0.
This will be stored in mysql.column_stats.hist_size column.
The value is not really needed as one can look at
LENGTH(mysql.column_stats.histogram) directly.
*/
virtual uint get_size()=0; virtual uint get_size()=0;
virtual ~Histogram_base()= default; virtual ~Histogram_base()= default;
Histogram_base() : owner(NULL) {} Histogram_base() : owner(NULL) {}
/*
Memory management: a histogram may be (exclusively) "owned" by a particular
thread (done for histograms that are being collected). By default, a
histogram has owner==NULL and is not owned by any particular thread.
*/
THD *get_owner() { return owner; } THD *get_owner() { return owner; }
void set_owner(THD *thd) { owner=thd; } void set_owner(THD *thd) { owner=thd; }
private: private:
// Owner is a thread that *exclusively* owns this histogram (and so can
// delete it at any time)
THD *owner; THD *owner;
}; };
...@@ -353,75 +367,72 @@ class Histogram_binary : public Histogram_base ...@@ -353,75 +367,72 @@ class Histogram_binary : public Histogram_base
/* /*
An equi-height histogram which stores real values for bucket bounds. This is used to collect the the basic statistics from a Unique object:
- count of values
Handles @@histogram_type=JSON_HB - count of distinct values
- count of distinct values that have occurred only once
*/ */
class Histogram_json_hb : public Histogram_base class Basic_stats_collector
{ {
private: ulonglong count; /* number of values retrieved */
size_t size; /* Number of elements in the histogram */ ulonglong count_distinct; /* number of distinct values retrieved */
/* number of distinct values that occured only once */
/* Collection-time only: collected histogram in the JSON form. */ ulonglong count_distinct_single_occurence;
std::string json_text;
// Array of histogram bucket endpoints in KeyTupleFormat.
std::vector<std::string> histogram_bounds;
public: public:
static constexpr const char* JSON_NAME="histogram_hb_v1"; Basic_stats_collector()
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len) override;
void serialize(Field *field) override;
Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows) override;
// returns number of buckets in the histogram
uint get_width() override
{ {
return (uint)size; count= 0;
count_distinct= 0;
count_distinct_single_occurence= 0;
} }
Histogram_type get_type() override ulonglong get_count_distinct() const { return count_distinct; }
ulonglong get_count_single_occurence() const
{ {
return JSON_HB; return count_distinct_single_occurence;
} }
ulonglong get_count() const { return count; }
void set_json_text(ulonglong sz, uchar *json_text_arg) void next(void *elem, element_count elem_cnt)
{ {
size = (uint8) sz; count_distinct++;
json_text.assign((const char*)json_text_arg, if (elem_cnt == 1)
strlen((const char*)json_text_arg)); count_distinct_single_occurence++;
count+= elem_cnt;
} }
};
uint get_size() override
{
return size;
}
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, /*
ulonglong size) override; Histogram_builder is a helper class that is used to build histograms
for columns.
bool is_available() override {return true; } Do not create directly, call Histogram->get_builder(...);
*/
bool is_usable(THD *thd) override class Histogram_builder
{ {
return thd->variables.optimizer_use_condition_selectivity > 3 && protected:
is_available(); Field *column; /* table field for which the histogram is built */
} uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */
double point_selectivity(Field *field, key_range *endpoint, Histogram_builder(Field *col, uint col_len, ha_rows rows) :
double avg_selection) override; column(col), col_length(col_len), records(rows)
double range_selectivity(Field *field, key_range *min_endp, {}
key_range *max_endp) override;
private: public:
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less); // A histogram builder will also collect the counters
Basic_stats_collector counters;
virtual int next(void *elem, element_count elem_cnt)=0;
virtual void finalize()=0;
virtual ~Histogram_builder(){}
}; };
class Columns_statistics; class Columns_statistics;
class Index_statistics; class Index_statistics;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment