Commit da4d2327 authored by Gleb Shchepa's avatar Gleb Shchepa

Bug #30584: delete with order by and limit clauses does not

            use limit efficiently
Bug #36569: UPDATE ... WHERE ... ORDER BY... always does a
            filesort even if not required

Also two bugs reported after QA review (before the commit
of bugs above to public trees, no documentation needed):

Bug #53737: Performance regressions after applying patch
            for bug 36569
Bug #53742: UPDATEs have no effect after applying patch
            for bug 36569


Execution of single-table UPDATE and DELETE statements did not use the 
same optimizer as was used in the compilation of SELECT statements. 
Instead, it had an optimizer of its own that did not take into account 
that you can omit sorting by retrieving rows using an index.

Extra optimization has been added: when applicable, single-table 
UPDATE/DELETE statements use an existing index instead of filesort. A 
corresponding SELECT query would do the former.

Also handling of the DESC ordering expression has been added when
reverse index scan is applicable.

From now on most single table UPDATE and DELETE statements show the 
same disk access patterns as the corresponding SELECT query. We verify 
this by comparing the result of SHOW STATUS LIKE 'Sort%

Currently the get_index_for_order function 
a) checks quick select index (if any) for compatibility with the
   ORDER expression list or
b) chooses the cheapest available compatible index, but only if 
   the index scan is cheaper than filesort.
Second way is implemented by the new test_if_cheaper_ordering
function (extracted part the test_if_skip_sort_order()).
parent a4baec5c
......@@ -333,7 +333,7 @@ rows_examined sql_text
2 INSERT INTO t1 SELECT b+sleep(.01) from t2
4 UPDATE t1 SET a=a+sleep(.01) WHERE a>2
8 UPDATE t1 SET a=a+sleep(.01) ORDER BY a DESC
2 UPDATE t2 set b=b+sleep(.01) limit 1
1 UPDATE t2 set b=b+sleep(.01) limit 1
4 UPDATE t1 SET a=a+sleep(.01) WHERE a in (SELECT b from t2)
6 DELETE FROM t1 WHERE a=a+sleep(.01) ORDER BY a LIMIT 2
DROP TABLE t1,t2;
......
This diff is collapsed.
......@@ -306,8 +306,8 @@ Handler_read_first 0
Handler_read_key 0
Handler_read_next 0
Handler_read_prev 0
Handler_read_rnd 1
Handler_read_rnd_next 9
Handler_read_rnd 0
Handler_read_rnd_next 0
alter table t1 disable keys;
flush status;
delete from t1 order by a limit 1;
......
This diff is collapsed.
......@@ -1841,96 +1841,6 @@ SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
}
/*
Find the best index to retrieve first N records in given order
SYNOPSIS
get_index_for_order()
table Table to be accessed
order Required ordering
limit Number of records that will be retrieved
DESCRIPTION
Find the best index that allows to retrieve first #limit records in the
given order cheaper then one would retrieve them using full table scan.
IMPLEMENTATION
Run through all table indexes and find the shortest index that allows
records to be retrieved in given order. We look for the shortest index
as we will have fewer index pages to read with it.
This function is used only by UPDATE/DELETE, so we take into account how
the UPDATE/DELETE code will work:
* index can only be scanned in forward direction
* HA_EXTRA_KEYREAD will not be used
Perhaps these assumptions could be relaxed.
RETURN
Number of the index that produces the required ordering in the cheapest way
MAX_KEY if no such index was found.
*/
uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit)
{
uint idx;
uint match_key= MAX_KEY, match_key_len= MAX_KEY_LENGTH + 1;
ORDER *ord;
for (ord= order; ord; ord= ord->next)
if (!ord->asc)
return MAX_KEY;
for (idx= 0; idx < table->s->keys; idx++)
{
if (!(table->keys_in_use_for_query.is_set(idx)))
continue;
KEY_PART_INFO *keyinfo= table->key_info[idx].key_part;
uint n_parts= table->key_info[idx].key_parts;
uint partno= 0;
/*
The below check is sufficient considering we now have either BTREE
indexes (records are returned in order for any index prefix) or HASH
indexes (records are not returned in order for any index prefix).
*/
if (!(table->file->index_flags(idx, 0, 1) & HA_READ_ORDER))
continue;
for (ord= order; ord && partno < n_parts; ord= ord->next, partno++)
{
Item *item= order->item[0];
if (!(item->type() == Item::FIELD_ITEM &&
((Item_field*)item)->field->eq(keyinfo[partno].field)))
break;
}
if (!ord && table->key_info[idx].key_length < match_key_len)
{
/*
Ok, the ordering is compatible and this key is shorter then
previous match (we want shorter keys as we'll have to read fewer
index pages for the same number of records)
*/
match_key= idx;
match_key_len= table->key_info[idx].key_length;
}
}
if (match_key != MAX_KEY)
{
/*
Found an index that allows records to be retrieved in the requested
order. Now we'll check if using the index is cheaper then doing a table
scan.
*/
double full_scan_time= table->file->scan_time();
double index_scan_time= table->file->read_time(match_key, 1, limit);
if (index_scan_time > full_scan_time)
match_key= MAX_KEY;
}
return match_key;
}
/*
Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
objects from table read plans.
......@@ -8977,7 +8887,6 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
}
rev_it.rewind();
q->dont_free=1; // Don't free shared mem
delete q;
}
......@@ -9067,6 +8976,27 @@ int QUICK_SELECT_DESC::get_next()
}
/**
Create a compatible quick select with the result ordered in an opposite way
@param used_key_parts_arg Number of used key parts
@retval NULL in case of errors (OOM etc)
@retval pointer to a newly created QUICK_SELECT_DESC if success
*/
QUICK_SELECT_I *QUICK_RANGE_SELECT::make_reverse(uint used_key_parts_arg)
{
QUICK_SELECT_DESC *new_quick= new QUICK_SELECT_DESC(this, used_key_parts_arg);
if (new_quick == NULL || new_quick->error != 0)
{
delete new_quick;
return NULL;
}
return new_quick;
}
/*
Compare if found key is over max-value
Returns 0 if key <= range->max_key
......@@ -11673,6 +11603,7 @@ void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
/* purecov: end */
}
void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
{
List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
......@@ -11761,7 +11692,7 @@ void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
}
#endif /* NOT_USED */
#endif /* !DBUG_OFF */
/*****************************************************************************
** Instantiate templates
......
......@@ -352,6 +352,11 @@ public:
*/
virtual void dbug_dump(int indent, bool verbose)= 0;
#endif
/*
Returns a QUICK_SELECT with reverse order of to the index.
*/
virtual QUICK_SELECT_I *make_reverse(uint used_key_parts_arg) { return NULL; }
};
......@@ -437,6 +442,7 @@ public:
#ifndef DBUG_OFF
void dbug_dump(int indent, bool verbose);
#endif
QUICK_SELECT_I *make_reverse(uint used_key_parts_arg);
private:
/* Default copy ctor used by QUICK_SELECT_DESC */
};
......@@ -782,6 +788,10 @@ public:
int get_next();
bool reverse_sorted() { return 1; }
int get_type() { return QS_TYPE_RANGE_DESC; }
QUICK_SELECT_I *make_reverse(uint used_key_parts_arg)
{
return this; // is already reverse sorted
}
private:
bool range_reads_after_key(QUICK_RANGE *range);
int reset(void) { rev_it.rewind(); return QUICK_RANGE_SELECT::reset(); }
......@@ -807,6 +817,7 @@ class SQL_SELECT :public Sql_alloc {
SQL_SELECT();
~SQL_SELECT();
void cleanup();
void set_quick(QUICK_SELECT_I *new_quick) { delete quick; quick= new_quick; }
bool check_quick(THD *thd, bool force_quick_range, ha_rows limit)
{
key_map tmp;
......@@ -833,7 +844,6 @@ public:
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
struct st_table_ref *ref,
ha_rows records);
uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit);
SQL_SELECT *make_select(TABLE *head, table_map const_tables,
table_map read_tables, COND *conds,
bool allow_null_cond, int *error);
......
......@@ -42,12 +42,14 @@ static int rr_from_cache(READ_RECORD *info);
static int init_rr_cache(THD *thd, READ_RECORD *info);
static int rr_cmp(uchar *a,uchar *b);
static int rr_index_first(READ_RECORD *info);
static int rr_index_last(READ_RECORD *info);
static int rr_index(READ_RECORD *info);
static int rr_index_desc(READ_RECORD *info);
/**
Initialize READ_RECORD structure to perform full index scan (in forward
direction) using read_record.read_record() interface.
Initialize READ_RECORD structure to perform full index scan in desired
direction using read_record.read_record() interface
This function has been added at late stage and is used only by
UPDATE/DELETE. Other statements perform index scans using
......@@ -59,10 +61,11 @@ static int rr_index(READ_RECORD *info);
@param print_error If true, call table->file->print_error() if an error
occurs (except for end-of-records error)
@param idx index to scan
@param reverse Scan in the reverse direction
*/
void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
bool print_error, uint idx)
bool print_error, uint idx, bool reverse)
{
empty_record(table);
bzero((char*) info,sizeof(*info));
......@@ -77,7 +80,7 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
if (!table->file->inited)
table->file->ha_index_init(idx, 1);
/* read_record will be changed to rr_index in rr_index_first */
info->read_record= rr_index_first;
info->read_record= reverse ? rr_index_last : rr_index_first;
}
......@@ -364,6 +367,29 @@ static int rr_index_first(READ_RECORD *info)
}
/**
Reads last row in an index scan.
@param info Scan info
@retval
0 Ok
@retval
-1 End of records
@retval
1 Error
*/
static int rr_index_last(READ_RECORD *info)
{
int tmp= info->file->index_last(info->record);
info->read_record= rr_index_desc;
if (tmp)
tmp= rr_handle_error(info, tmp);
return tmp;
}
/**
Reads index sequentially after first row.
......@@ -389,6 +415,31 @@ static int rr_index(READ_RECORD *info)
}
/**
Reads index sequentially from the last row to the first.
Read the prev index record (in backward direction) and translate return
value.
@param info Scan info
@retval
0 Ok
@retval
-1 End of records
@retval
1 Error
*/
static int rr_index_desc(READ_RECORD *info)
{
int tmp= info->file->index_prev(info->record);
if (tmp)
tmp= rr_handle_error(info, tmp);
return tmp;
}
int rr_sequential(READ_RECORD *info)
{
int tmp;
......
......@@ -71,7 +71,7 @@ void init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form,
SQL_SELECT *select, int use_record_cache,
bool print_errors, bool disable_rr_cache);
void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
bool print_error, uint idx);
bool print_error, uint idx, bool reverse);
void end_read_record(READ_RECORD *info);
void rr_unlock_row(st_join_table *tab);
......
......@@ -47,7 +47,7 @@
*/
bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
SQL_I_List<ORDER> *order, ha_rows limit, ulonglong options)
SQL_I_List<ORDER> *order_list, ha_rows limit, ulonglong options)
{
bool will_batch;
int error, loc_error;
......@@ -58,6 +58,9 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
bool transactional_table, safe_update, const_cond;
bool const_cond_result;
ha_rows deleted= 0;
bool reverse= FALSE;
ORDER *order= (ORDER *) ((order_list && order_list->elements) ?
order_list->first : NULL);
uint usable_index= MAX_KEY;
SELECT_LEX *select_lex= &thd->lex->select_lex;
THD::killed_state killed_status= THD::NOT_KILLED;
......@@ -79,7 +82,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
DBUG_RETURN(TRUE);
/* check ORDER BY even if it can be ignored */
if (order && order->elements)
if (order)
{
TABLE_LIST tables;
List<Item> fields;
......@@ -89,9 +92,9 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
tables.table = table;
tables.alias = table_list->alias;
if (select_lex->setup_ref_array(thd, order->elements) ||
if (select_lex->setup_ref_array(thd, order_list->elements) ||
setup_order(thd, select_lex->ref_pointer_array, &tables,
fields, all_fields, order->first))
fields, all_fields, order))
{
delete select;
free_underlaid_joins(thd, &thd->lex->select_lex);
......@@ -182,6 +185,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
table->covering_keys.clear_all();
table->quick_keys.clear_all(); // Can't use 'only index'
select=make_select(table, 0, 0, conds, 0, &error);
if (error)
DBUG_RETURN(TRUE);
......@@ -217,22 +221,25 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
if (options & OPTION_QUICK)
(void) table->file->extra(HA_EXTRA_QUICK);
if (order && order->elements)
if (order)
{
uint length= 0;
SORT_FIELD *sortorder;
ha_rows examined_rows;
if ((!select || table->quick_keys.is_clear_all()) && limit != HA_POS_ERROR)
usable_index= get_index_for_order(table, order->first, limit);
table->update_const_key_parts(conds);
order= simple_remove_const(order, conds);
if (usable_index == MAX_KEY)
bool need_sort;
usable_index= get_index_for_order(order, table, select, limit,
&need_sort, &reverse);
if (need_sort)
{
DBUG_ASSERT(usable_index == MAX_KEY);
table->sort.io_cache= (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
MYF(MY_FAE | MY_ZEROFILL));
if (!(sortorder= make_unireg_sortorder(order->first,
&length, NULL)) ||
if (!(sortorder= make_unireg_sortorder(order, &length, NULL)) ||
(table->sort.found_records = filesort(thd, table, sortorder, length,
select, HA_POS_ERROR, 1,
&examined_rows))
......@@ -263,7 +270,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
if (usable_index==MAX_KEY || (select && select->quick))
init_read_record(&info, thd, table, select, 1, 1, FALSE);
else
init_read_record_idx(&info, thd, table, 1, usable_index);
init_read_record_idx(&info, thd, table, 1, usable_index, reverse);
init_ftfuncs(thd, select_lex, 1);
thd_proc_info(thd, "updating");
......
This diff is collapsed.
......@@ -847,4 +847,12 @@ inline bool optimizer_flag(THD *thd, uint flag)
return (thd->variables.optimizer_switch & flag);
}
uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
ha_rows limit, bool *need_sort, bool *reverse);
ORDER *simple_remove_const(ORDER *order, COND *where);
bool const_expression_in_where(COND *cond, Item *comp_item,
Field *comp_field= NULL,
Item **const_item= NULL);
#endif /* SQL_SELECT_INCLUDED */
......@@ -203,12 +203,13 @@ int mysql_update(THD *thd,
{
bool using_limit= limit != HA_POS_ERROR;
bool safe_update= test(thd->variables.option_bits & OPTION_SAFE_UPDATES);
bool used_key_is_modified, transactional_table, will_batch;
bool used_key_is_modified= FALSE, transactional_table, will_batch;
bool can_compare_record;
int res;
int error, loc_error;
uint used_index= MAX_KEY, dup_key_found;
uint used_index, dup_key_found;
bool need_sort= TRUE;
bool reverse= FALSE;
#ifndef NO_EMBEDDED_ACCESS_CHECKS
uint want_privilege;
#endif
......@@ -358,11 +359,7 @@ int mysql_update(THD *thd,
my_ok(thd); // No matching records
DBUG_RETURN(0);
}
if (!select && limit != HA_POS_ERROR)
{
if ((used_index= get_index_for_order(table, order, limit)) != MAX_KEY)
need_sort= FALSE;
}
/* If running in safe sql mode, don't allow updates without keys */
if (table->quick_keys.is_clear_all())
{
......@@ -378,24 +375,20 @@ int mysql_update(THD *thd,
table->mark_columns_needed_for_update();
/* Check if we are modifying a key that we are used to search with */
table->update_const_key_parts(conds);
order= simple_remove_const(order, conds);
if (select && select->quick)
{
used_index= select->quick->index;
used_key_is_modified= (!select->quick->unique_key_range() &&
select->quick->is_keys_used(table->write_set));
}
else
{
used_key_is_modified= 0;
if (used_index == MAX_KEY) // no index for sort order
used_index= get_index_for_order(order, table, select, limit,
&need_sort, &reverse);
if (need_sort)
{ // Assign table scan index to check below for modified key fields:
used_index= table->file->key_used_on_scan;
}
if (used_index != MAX_KEY)
{ // Check if we are modifying a key that we are used to search with:
used_key_is_modified= is_key_used(table, used_index, table->write_set);
}
#ifdef WITH_PARTITION_STORAGE_ENGINE
if (used_key_is_modified || order ||
partition_key_modified(table, table->write_set))
......@@ -414,7 +407,7 @@ int mysql_update(THD *thd,
table->use_all_columns();
}
/* note: We avoid sorting avoid if we sort on the used index */
/* note: We avoid sorting if we sort on the used index */
if (order && (need_sort || used_key_is_modified))
{
/*
......@@ -476,7 +469,7 @@ int mysql_update(THD *thd,
if (used_index == MAX_KEY || (select && select->quick))
init_read_record(&info, thd, table, select, 0, 1, FALSE);
else
init_read_record_idx(&info, thd, table, 1, used_index);
init_read_record_idx(&info, thd, table, 1, used_index, reverse);
thd_proc_info(thd, "Searching rows for update");
ha_rows tmp_limit= limit;
......
......@@ -33,6 +33,7 @@
#include "sql_base.h" // release_table_share
#include <m_ctype.h>
#include "my_md5.h"
#include "sql_select.h"
/* INFORMATION_SCHEMA name */
LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")};
......@@ -4916,6 +4917,61 @@ void init_mdl_requests(TABLE_LIST *table_list)
}
/**
Update TABLE::const_key_parts for single table UPDATE/DELETE query
@param conds WHERE clause expression
@retval TRUE error (OOM)
@retval FALSE success
@note
Set const_key_parts bits if key fields are equal to constants in
the WHERE expression.
*/
bool TABLE::update_const_key_parts(COND *conds)
{
bzero((char*) const_key_parts, sizeof(key_part_map) * s->keys);
if (conds == NULL)
return FALSE;
for (uint index= 0; index < s->keys; index++)
{
KEY_PART_INFO *keyinfo= key_info[index].key_part;
KEY_PART_INFO *keyinfo_end= keyinfo + key_info[index].key_parts;
for (key_part_map part_map= (key_part_map)1;
keyinfo < keyinfo_end;
keyinfo++, part_map<<= 1)
{
if (const_expression_in_where(conds, NULL, keyinfo->field))
const_key_parts[index]|= part_map;
}
}
return FALSE;
}
/**
Test if the order list consists of simple field expressions
@param order Linked list of ORDER BY arguments
@return TRUE if @a order is empty or consist of simple field expressions
*/
bool is_simple_order(ORDER *order)
{
for (ORDER *ord= order; ord; ord= ord->next)
{
if (ord->item[0]->real_item()->type() != Item::FIELD_ITEM)
return FALSE;
}
return TRUE;
}
/*****************************************************************************
** Instansiate templates
*****************************************************************************/
......
......@@ -1104,6 +1104,8 @@ public:
file->extra(HA_EXTRA_NO_KEYREAD);
}
}
bool update_const_key_parts(COND *conds);
};
......@@ -2088,6 +2090,8 @@ inline void mark_as_null_row(TABLE *table)
bfill(table->null_flags,table->s->null_bytes,255);
}
bool is_simple_order(ORDER *order);
#endif /* MYSQL_CLIENT */
#endif /* TABLE_INCLUDED */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment