Commit f26e14e2 authored by Monty's avatar Monty

Adding option to tell that cmp_ref handler call is expensive

- In Spider, calling cmp_ref() can be very expensive. In ha_partition.cc
  we don't anymore sort rows according to position for the Spider
  engine.
- Removed Spider specific call info(HA_EXTRA_STARTING_ORDERED_INDEX_SCAN)
  from handle_ordered_index_scan(). It's caused performance issues and
  does not change results for queries with ORDER BY.
- The visible effect of this patch is that for some storage engines,
  rows may be returned in a different order if there is no ORDER BY clause.

- Based in Spiral Patch 052:
  052_mariadb-10.2.0.add_partition_skip_pk_sort_for_non_clustered_index
  MDEV-7748
- The major difference from original patch is that there is no variable to
  get the old behaviour.

Other things:
- Optimized ha_partition::cmp_ref() and cmp_part_ids() to make them
  simpler and faster.
- Changed arguments to cmp_key_part_id() to be same as
  cmp_key_rowid_part_id to simplify code.

Original author: Kentoku SHIBA
First reviewer:  Jacob Mathew
Second reviewer: Michael Widenius
parent dc17ac16
...@@ -91,9 +91,6 @@ static handler *partition_create_handler(handlerton *hton, ...@@ -91,9 +91,6 @@ static handler *partition_create_handler(handlerton *hton,
static uint partition_flags(); static uint partition_flags();
static uint alter_table_flags(uint flags); static uint alter_table_flags(uint flags);
extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
/* /*
If frm_error() is called then we will use this to to find out what file If frm_error() is called then we will use this to to find out what file
extensions exist for the storage engine. This is also used by the default extensions exist for the storage engine. This is also used by the default
...@@ -5250,17 +5247,11 @@ bool ha_partition::init_record_priority_queue() ...@@ -5250,17 +5247,11 @@ bool ha_partition::init_record_priority_queue()
/* Initialize priority queue, initialized to reading forward. */ /* Initialize priority queue, initialized to reading forward. */
int (*cmp_func)(void *, uchar *, uchar *); int (*cmp_func)(void *, uchar *, uchar *);
void *cmp_arg; void *cmp_arg= (void*) this;
if (!m_using_extended_keys) if (!m_using_extended_keys && !(table_flags() & HA_CMP_REF_IS_EXPENSIVE))
{
cmp_func= cmp_key_rowid_part_id; cmp_func= cmp_key_rowid_part_id;
cmp_arg= (void*)this;
}
else else
{
cmp_func= cmp_key_part_id; cmp_func= cmp_key_part_id;
cmp_arg= (void*)m_curr_key_info;
}
DBUG_PRINT("info", ("partition queue_init(1) used_parts: %u", used_parts)); DBUG_PRINT("info", ("partition queue_init(1) used_parts: %u", used_parts));
if (init_queue(&m_queue, used_parts, 0, 0, cmp_func, cmp_arg, 0, 0)) if (init_queue(&m_queue, used_parts, 0, 0, cmp_func, cmp_arg, 0, 0))
{ {
...@@ -5480,22 +5471,13 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key, ...@@ -5480,22 +5471,13 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key,
/* Compare two part_no partition numbers */ /* Compare two part_no partition numbers */
static int cmp_part_ids(uchar *ref1, uchar *ref2) static int cmp_part_ids(uchar *ref1, uchar *ref2)
{ {
/* The following was taken from ha_partition::cmp_ref */ uint32 diff2= uint2korr(ref2);
my_ptrdiff_t diff1= ref2[1] - ref1[1]; uint32 diff1= uint2korr(ref1);
my_ptrdiff_t diff2= ref2[0] - ref1[0]; if (diff2 > diff1)
if (!diff1 && !diff2) return -1;
if (diff2 < diff1)
return 1;
return 0; return 0;
if (diff1 > 0)
return(-1);
if (diff1 < 0)
return(+1);
if (diff2 > 0)
return(-1);
return(+1);
} }
...@@ -5504,10 +5486,11 @@ static int cmp_part_ids(uchar *ref1, uchar *ref2) ...@@ -5504,10 +5486,11 @@ static int cmp_part_ids(uchar *ref1, uchar *ref2)
Provide ordering by (key_value, part_no). Provide ordering by (key_value, part_no).
*/ */
extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2) extern "C" int cmp_key_part_id(void *ptr, uchar *ref1, uchar *ref2)
{ {
ha_partition *file= (ha_partition*)ptr;
int res; int res;
if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS, if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS,
ref2 + PARTITION_BYTES_IN_POS))) ref2 + PARTITION_BYTES_IN_POS)))
{ {
return res; return res;
...@@ -7406,10 +7389,6 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) ...@@ -7406,10 +7389,6 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
DBUG_ENTER("ha_partition::handle_ordered_index_scan"); DBUG_ENTER("ha_partition::handle_ordered_index_scan");
DBUG_PRINT("enter", ("partition this: %p", this)); DBUG_PRINT("enter", ("partition this: %p", this));
if (!m_using_extended_keys &&
(error= loop_extra(HA_EXTRA_STARTING_ORDERED_INDEX_SCAN)))
DBUG_RETURN(error);
if (m_pre_calling) if (m_pre_calling)
error= handle_pre_scan(reverse_order, m_pre_call_use_parallel); error= handle_pre_scan(reverse_order, m_pre_call_use_parallel);
else else
...@@ -7594,7 +7573,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) ...@@ -7594,7 +7573,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
after that read the first entry and copy it to the buffer to return in. after that read the first entry and copy it to the buffer to return in.
*/ */
queue_set_max_at_top(&m_queue, reverse_order); queue_set_max_at_top(&m_queue, reverse_order);
queue_set_cmp_arg(&m_queue, m_using_extended_keys? m_curr_key_info : (void*)this); queue_set_cmp_arg(&m_queue, (void*) this);
m_queue.elements= j - queue_first_element(&m_queue); m_queue.elements= j - queue_first_element(&m_queue);
queue_fix(&m_queue); queue_fix(&m_queue);
return_top_record(buf); return_top_record(buf);
...@@ -7874,9 +7853,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same) ...@@ -7874,9 +7853,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
DBUG_PRINT("info",("partition m_mrr_range_current->id: %u", DBUG_PRINT("info",("partition m_mrr_range_current->id: %u",
m_mrr_range_current ? m_mrr_range_current->id : 0)); m_mrr_range_current ? m_mrr_range_current->id : 0));
queue_set_max_at_top(&m_queue, FALSE); queue_set_max_at_top(&m_queue, FALSE);
queue_set_cmp_arg(&m_queue, (m_using_extended_keys ? queue_set_cmp_arg(&m_queue, (void*) this);
m_curr_key_info :
(void*) this));
m_queue.elements= j; m_queue.elements= j;
queue_fix(&m_queue); queue_fix(&m_queue);
return_top_record(buf); return_top_record(buf);
...@@ -10040,7 +10017,7 @@ uint ha_partition::min_record_length(uint options) const ...@@ -10040,7 +10017,7 @@ uint ha_partition::min_record_length(uint options) const
int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
{ {
int cmp; int cmp;
my_ptrdiff_t diff1, diff2; uint32 diff1, diff2;
DBUG_ENTER("ha_partition::cmp_ref"); DBUG_ENTER("ha_partition::cmp_ref");
cmp= m_file[0]->cmp_ref((ref1 + PARTITION_BYTES_IN_POS), cmp= m_file[0]->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
...@@ -10048,7 +10025,10 @@ int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) ...@@ -10048,7 +10025,10 @@ int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
if (cmp) if (cmp)
DBUG_RETURN(cmp); DBUG_RETURN(cmp);
if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1])) diff2= uint2korr(ref2);
diff1= uint2korr(ref1);
if (diff1 == diff2)
{ {
/* This means that the references are same and are in same partition.*/ /* This means that the references are same and are in same partition.*/
DBUG_RETURN(0); DBUG_RETURN(0);
...@@ -10061,22 +10041,7 @@ int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2) ...@@ -10061,22 +10041,7 @@ int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
Remove this assert if DB_ROW_ID is changed to be per partition. Remove this assert if DB_ROW_ID is changed to be per partition.
*/ */
DBUG_ASSERT(!m_innodb); DBUG_ASSERT(!m_innodb);
DBUG_RETURN(diff2 > diff1 ? -1 : 1);
diff1= ref2[1] - ref1[1];
diff2= ref2[0] - ref1[0];
if (diff1 > 0)
{
DBUG_RETURN(-1);
}
if (diff1 < 0)
{
DBUG_RETURN(+1);
}
if (diff2 > 0)
{
DBUG_RETURN(-1);
}
DBUG_RETURN(+1);
} }
......
...@@ -143,6 +143,7 @@ typedef struct st_partition_part_key_multi_range_hld ...@@ -143,6 +143,7 @@ typedef struct st_partition_part_key_multi_range_hld
} PARTITION_PART_KEY_MULTI_RANGE_HLD; } PARTITION_PART_KEY_MULTI_RANGE_HLD;
extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
class ha_partition :public handler class ha_partition :public handler
...@@ -1381,5 +1382,6 @@ class ha_partition :public handler ...@@ -1381,5 +1382,6 @@ class ha_partition :public handler
} }
friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
}; };
#endif /* HA_PARTITION_INCLUDED */ #endif /* HA_PARTITION_INCLUDED */
...@@ -161,6 +161,7 @@ enum enum_alter_inplace_result { ...@@ -161,6 +161,7 @@ enum enum_alter_inplace_result {
*/ */
#define HA_BINLOG_ROW_CAPABLE (1ULL << 34) #define HA_BINLOG_ROW_CAPABLE (1ULL << 34)
#define HA_BINLOG_STMT_CAPABLE (1ULL << 35) #define HA_BINLOG_STMT_CAPABLE (1ULL << 35)
/* /*
When a multiple key conflict happens in a REPLACE command mysql When a multiple key conflict happens in a REPLACE command mysql
expects the conflicts to be reported in the ascending order of expects the conflicts to be reported in the ascending order of
...@@ -289,6 +290,9 @@ enum enum_alter_inplace_result { ...@@ -289,6 +290,9 @@ enum enum_alter_inplace_result {
/* The following is for partition handler */ /* The following is for partition handler */
#define HA_CAN_MULTISTEP_MERGE (1LL << 53) #define HA_CAN_MULTISTEP_MERGE (1LL << 53)
/* calling cmp_ref() on the engine is expensive */
#define HA_CMP_REF_IS_EXPENSIVE (1ULL << 54)
/* bits in index_flags(index_number) for what you can do with index */ /* bits in index_flags(index_number) for what you can do with index */
#define HA_READ_NEXT 1 /* TODO really use this flag */ #define HA_READ_NEXT 1 /* TODO really use this flag */
#define HA_READ_PREV 2 /* supports ::index_prev */ #define HA_READ_PREV 2 /* supports ::index_prev */
......
...@@ -9195,6 +9195,7 @@ ulonglong ha_spider::table_flags() const ...@@ -9195,6 +9195,7 @@ ulonglong ha_spider::table_flags() const
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_ROW_CAPABLE |
HA_BINLOG_STMT_CAPABLE | HA_BINLOG_STMT_CAPABLE |
HA_PARTIAL_COLUMN_READ | HA_PARTIAL_COLUMN_READ |
HA_CMP_REF_IS_EXPENSIVE |
#ifdef SPIDER_ENGINE_CONDITION_PUSHDOWN_IS_ALWAYS_ON #ifdef SPIDER_ENGINE_CONDITION_PUSHDOWN_IS_ALWAYS_ON
HA_CAN_TABLE_CONDITION_PUSHDOWN | HA_CAN_TABLE_CONDITION_PUSHDOWN |
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment