Commit f0f0d072 authored by Sergei Golubchik's avatar Sergei Golubchik

MDEV-14500 filesort to support engines with slow rnd_pos

If the engine wants to avoid rnd_pos() - force a temporary table
before a filesort. But don't do it if addon_fields are used.
parent cd29aee5
create table t1(c1 int not null, c2 double not null, c3 char(255) not null) engine=archive;
insert t1 select seq, seq+0.7, concat('row with c1 = ', seq) from seq_1_to_10;
explain partitions select c1,c3 from t1 order by c2;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 NULL ALL NULL NULL NULL NULL 10 Using filesort
set max_length_for_sort_data = 4;
explain partitions select c1,c3 from t1 order by c2;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 NULL ALL NULL NULL NULL NULL 10 Using temporary; Using filesort
flush status;
select c1,c3 from t1 order by c2;
c1 c3
1 row with c1 = 1
2 row with c1 = 2
3 row with c1 = 3
4 row with c1 = 4
5 row with c1 = 5
6 row with c1 = 6
7 row with c1 = 7
8 row with c1 = 8
9 row with c1 = 9
10 row with c1 = 10
set max_length_for_sort_data = default;
show status where variable_name like '%tmp%' and value != 0;
Variable_name Value
Created_tmp_tables 1
Handler_tmp_write 10
Rows_tmp_read 20
alter table t1 partition by hash (c1) partitions 3;
explain partitions select c1,c3 from t1 order by c2;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0,p1,p2 ALL NULL NULL NULL NULL 10 Using filesort
set max_length_for_sort_data = 4;
explain partitions select c1,c3 from t1 order by c2;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0,p1,p2 ALL NULL NULL NULL NULL 10 Using temporary; Using filesort
flush status;
select c1,c3 from t1 order by c2;
c1 c3
1 row with c1 = 1
2 row with c1 = 2
3 row with c1 = 3
4 row with c1 = 4
5 row with c1 = 5
6 row with c1 = 6
7 row with c1 = 7
8 row with c1 = 8
9 row with c1 = 9
10 row with c1 = 10
set max_length_for_sort_data = default;
show status where variable_name like '%tmp%' and value != 0;
Variable_name Value
Created_tmp_tables 1
Handler_tmp_write 10
Rows_tmp_read 20
drop table t1;
#
# MDEV-14500 Support engines without rnd_pos
#
source include/have_archive.inc;
source include/have_sequence.inc;
source include/have_partition.inc;
create table t1(c1 int not null, c2 double not null, c3 char(255) not null) engine=archive;
insert t1 select seq, seq+0.7, concat('row with c1 = ', seq) from seq_1_to_10;
explain partitions select c1,c3 from t1 order by c2;
set max_length_for_sort_data = 4;
explain partitions select c1,c3 from t1 order by c2;
flush status;
select c1,c3 from t1 order by c2;
set max_length_for_sort_data = default;
show status where variable_name like '%tmp%' and value != 0;
alter table t1 partition by hash (c1) partitions 3;
explain partitions select c1,c3 from t1 order by c2;
set max_length_for_sort_data = 4;
explain partitions select c1,c3 from t1 order by c2;
flush status;
select c1,c3 from t1 order by c2;
set max_length_for_sort_data = default;
show status where variable_name like '%tmp%' and value != 0;
drop table t1;
...@@ -1957,6 +1957,30 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length, ...@@ -1957,6 +1957,30 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
return length; return length;
} }
bool filesort_use_addons(TABLE *table, uint sortlength,
uint *length, uint *fields, uint *null_fields)
{
Field **pfield, *field;
*length= *fields= *null_fields= 0;
for (pfield= table->field; (field= *pfield) ; pfield++)
{
if (!bitmap_is_set(table->read_set, field->field_index))
continue;
if (field->flags & BLOB_FLAG)
return false;
(*length)+= field->max_packed_col_length(field->pack_length());
if (field->maybe_null())
(*null_fields)++;
(*fields)++;
}
if (!*fields)
return false;
(*length)+= (*null_fields+7)/8;
return *length + sortlength <
table->in_use->variables.max_length_for_sort_data;
}
/** /**
Get descriptors of fields appended to sorted fields and Get descriptors of fields appended to sorted fields and
...@@ -1991,11 +2015,8 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf) ...@@ -1991,11 +2015,8 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf)
Field **pfield; Field **pfield;
Field *field; Field *field;
SORT_ADDON_FIELD *addonf; SORT_ADDON_FIELD *addonf;
uint length= 0; uint length, fields, null_fields;
uint fields= 0;
uint null_fields= 0;
MY_BITMAP *read_set= table->read_set; MY_BITMAP *read_set= table->read_set;
ulong max_sort_len= table->in_use->variables.max_length_for_sort_data;
DBUG_ENTER("get_addon_fields"); DBUG_ENTER("get_addon_fields");
/* /*
...@@ -2011,26 +2032,14 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf) ...@@ -2011,26 +2032,14 @@ get_addon_fields(TABLE *table, uint sortlength, LEX_STRING *addon_buf)
addon_buf->str= 0; addon_buf->str= 0;
addon_buf->length= 0; addon_buf->length= 0;
for (pfield= table->field; (field= *pfield) ; pfield++) // see remove_const() for HA_SLOW_RND_POS explanation
{ if (table->file->ha_table_flags() & HA_SLOW_RND_POS)
if (!bitmap_is_set(read_set, field->field_index)) sortlength= 0;
continue;
if (field->flags & BLOB_FLAG)
DBUG_RETURN(0);
length+= field->max_packed_col_length(field->pack_length());
if (field->maybe_null())
null_fields++;
fields++;
}
if (!fields)
DBUG_RETURN(0);
length+= (null_fields+7)/8;
if (length+sortlength > max_sort_len || if (!filesort_use_addons(table, sortlength, &length, &fields, &null_fields) ||
!my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC), !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC), &addonf,
&addonf, sizeof(SORT_ADDON_FIELD) * (fields+1), sizeof(SORT_ADDON_FIELD) * (fields+1),
&addon_buf->str, length, &addon_buf->str, length, NullS))
NullS))
DBUG_RETURN(0); DBUG_RETURN(0);
......
...@@ -161,6 +161,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, ...@@ -161,6 +161,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
Filesort_tracker* tracker, JOIN *join=NULL, Filesort_tracker* tracker, JOIN *join=NULL,
table_map first_table_bit=0); table_map first_table_bit=0);
bool filesort_use_addons(TABLE *table, uint sortlength,
uint *length, uint *fields, uint *null_fields);
void change_double_for_sort(double nr,uchar *to); void change_double_for_sort(double nr,uchar *to);
#endif /* FILESORT_INCLUDED */ #endif /* FILESORT_INCLUDED */
...@@ -308,6 +308,13 @@ enum enum_alter_inplace_result { ...@@ -308,6 +308,13 @@ enum enum_alter_inplace_result {
#define HA_SLOW_CMP_REF (1ULL << 54) #define HA_SLOW_CMP_REF (1ULL << 54)
#define HA_CMP_REF_IS_EXPENSIVE HA_SLOW_CMP_REF #define HA_CMP_REF_IS_EXPENSIVE HA_SLOW_CMP_REF
/**
Some engines are unable to provide an efficient implementation for rnd_pos().
Server will try to avoid it, if possible
TODO better to do it with cost estimates, not with an explicit flag
*/
#define HA_SLOW_RND_POS (1ULL << 55)
/* bits in index_flags(index_number) for what you can do with index */ /* bits in index_flags(index_number) for what you can do with index */
#define HA_READ_NEXT 1 /* TODO really use this flag */ #define HA_READ_NEXT 1 /* TODO really use this flag */
......
...@@ -13193,6 +13193,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, ...@@ -13193,6 +13193,21 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
JOIN_TAB *head= join->join_tab + join->const_tables; JOIN_TAB *head= join->join_tab + join->const_tables;
*simple_order= head->on_expr_ref[0] == NULL; *simple_order= head->on_expr_ref[0] == NULL;
if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS)
{
uint u1, u2, u3;
/*
normally the condition is (see filesort_use_addons())
length + sortlength <= max_length_for_sort_data
but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative
is to use a temporary table, which is rather expensive.
TODO proper cost estimations
*/
*simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3);
}
} }
else else
{ {
......
...@@ -108,7 +108,7 @@ class ha_archive: public handler ...@@ -108,7 +108,7 @@ class ha_archive: public handler
return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_CAN_BIT_FIELD | return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_CAN_BIT_FIELD |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
HA_STATS_RECORDS_IS_EXACT | HA_CAN_EXPORT | HA_STATS_RECORDS_IS_EXACT | HA_CAN_EXPORT |
HA_HAS_RECORDS | HA_CAN_REPAIR | HA_HAS_RECORDS | HA_CAN_REPAIR | HA_SLOW_RND_POS |
HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY); HA_FILE_BASED | HA_CAN_INSERT_DELAYED | HA_CAN_GEOMETRY);
} }
ulong index_flags(uint idx, uint part, bool all_parts) const ulong index_flags(uint idx, uint part, bool all_parts) const
......
...@@ -107,7 +107,7 @@ class ha_tina: public handler ...@@ -107,7 +107,7 @@ class ha_tina: public handler
{ {
return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT | return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_EXPORT | HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_EXPORT |
HA_CAN_REPAIR); HA_CAN_REPAIR | HA_SLOW_RND_POS);
} }
ulong index_flags(uint idx, uint part, bool all_parts) const ulong index_flags(uint idx, uint part, bool all_parts) const
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment