Commit e25db795 authored by Sergey Petrunya's avatar Sergey Petrunya

DS-MRR, key-ordered retrievals: commit for buildbot

parent 7391175d
...@@ -287,6 +287,84 @@ scan_it_again: ...@@ -287,6 +287,84 @@ scan_it_again:
* DS-MRR implementation * DS-MRR implementation
***************************************************************************/ ***************************************************************************/
void SimpleBuffer::write(const uchar *data, size_t bytes)
{
DBUG_ASSERT(have_space_for(bytes));
if (direction == -1)
write_pos -= bytes;
memcpy(write_pos, data, bytes);
if (direction == 1)
write_pos += bytes;
}
bool SimpleBuffer::have_space_for(size_t bytes)
{
if (direction == 1)
return (write_pos + bytes < end);
else
return (write_pos - bytes >= start);
}
size_t SimpleBuffer::used_size()
{
return (direction == 1)? write_pos - read_pos : read_pos - write_pos;
}
uchar *SimpleBuffer::read(size_t bytes)
{
DBUG_ASSERT(have_data(bytes));
uchar *res;
if (direction == 1)
{
res= read_pos;
read_pos += bytes;
return res;
}
else
{
read_pos= read_pos - bytes;
return read_pos;
}
}
bool SimpleBuffer::have_data(size_t bytes)
{
return (direction == 1)? (write_pos - read_pos >= (ptrdiff_t)bytes) :
(read_pos - write_pos >= (ptrdiff_t)bytes);
}
void SimpleBuffer::reset_for_writing()
{
if (direction == 1)
write_pos= read_pos= start;
else
write_pos= read_pos= end;
}
void SimpleBuffer::reset_for_reading()
{
/*
Do we need this at all?
if (direction == 1)
pos= start;
else
pos= end;
//end?
*/
}
uchar *SimpleBuffer::end_of_space()
{
if (direction == 1)
return start;
else
return end;
//TODO: check this.
}
/** /**
DS-MRR: Initialize and start MRR scan DS-MRR: Initialize and start MRR scan
...@@ -309,7 +387,6 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -309,7 +387,6 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
void *seq_init_param, uint n_ranges, uint mode, void *seq_init_param, uint n_ranges, uint mode,
HANDLER_BUFFER *buf) HANDLER_BUFFER *buf)
{ {
uint elem_size;
Item *pushed_cond= NULL; Item *pushed_cond= NULL;
handler *new_h2= 0; handler *new_h2= 0;
THD *thd= current_thd; THD *thd= current_thd;
...@@ -331,68 +408,68 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -331,68 +408,68 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
use_default_impl= FALSE; use_default_impl= FALSE;
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
// psergey2: split the buffer:
/* /*
psergey2-note: we can't split the buffer here because we don't know how key Figure out what steps we'll need to do
length. we'll only be able to do it when we've got the first range.
*/ */
do_sort_keys= FALSE;
if ((mode & HA_MRR_SINGLE_POINT) && if ((mode & HA_MRR_SINGLE_POINT) &&
optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
{ {
//do_sort_keys= TRUE; // will use key buffer to sort keys; do_sort_keys= TRUE;
use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS); use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS);
} }
/*
do_rowid_fetch= FALSE; do_rowid_fetch= FALSE;
if (!doing_cpk_scan && !index_only_read) doing_cpk_scan= check_cpk_scan(h->active_index, mode);
if (!doing_cpk_scan /* && !index_only_read */)
{ {
do_rowid_fetch= TRUE; //will use rowid buffer to store/sort rowids, etc /* Will use rowid buffer to store/sort rowids, etc */
do_rowid_fetch= TRUE;
} }
DBUG_ASSERT(do_sort_keys || do_rowid_fetch);
full_buf= buf->buffer;
full_buf_end= buf->buffer_end;
if (do_sort_keys && do_rowid_fetch) /*
{ At start, alloc all of the buffer for rowids. Key sorting code will grab a
split buffer space proportionally piece if necessary.
}
else
{
// give all space to one buffer
if (do_sort_keys)
{
//sort_buffer_start= ...;
}
else
{
DBUG_ASSERT(do_rowid_fetch);
//rowid_buffer_start= ...;
}
}
*/ */
mrr_buf= buf->buffer; rowid_buffer.set_buffer_space(full_buf, full_buf_end, 1);
mrr_buf_end= buf->buffer_end;
if (is_mrr_assoc) if (is_mrr_assoc)
status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
if ((doing_cpk_scan= check_cpk_scan(h->active_index, mode))) /*
psergey2-todo: for CPK scans:
- use MRR irrespectively of @@mrr_sort_keys setting,
- dont do rowid retrieval.
*/
if (do_sort_keys)
{ {
/* It's a DS-MRR/CPK scan */ /* It's a DS-MRR/CPK scan */
cpk_tuple_length= 0; /* dummy value telling it needs to be inited */ key_tuple_length= 0; /* dummy value telling it needs to be inited */
cpk_have_range= FALSE; key_buff_elem_size= 0;
in_index_range= FALSE;
h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
h->mrr_funcs= *seq_funcs; h->mrr_funcs= *seq_funcs;
keyno= h->active_index != MAX_KEY? h->active_index : h2->active_index;
dsmrr_fill_key_buffer(); dsmrr_fill_key_buffer();
if (dsmrr_eof) if (dsmrr_eof && !do_rowid_fetch)
buf->end_of_used_area= mrr_buf_last; buf->end_of_used_area= key_buffer.end_of_space();
DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */
} }
/* In regular DS-MRR, buffer stores {rowid, range_id} pairs */ if (!do_rowid_fetch)
elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); {
mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/ elem_size)* elem_size; /*
mrr_buf_end= mrr_buf_last; We have the keys and won't need to fetch rowids, as key lookup will be
the last operation, done in multi_range_read_next().
*/
DBUG_RETURN(0);
}
rowid_buff_elem_size= h->ref_length + (is_mrr_assoc? sizeof(char*) : 0);
/* /*
psergey2: this is only needed when psergey2: this is only needed when
- doing a rowid-to-row scan - doing a rowid-to-row scan
...@@ -416,7 +493,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -416,7 +493,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
DBUG_RETURN(1); DBUG_RETURN(1);
DBUG_ASSERT(h->active_index != MAX_KEY); DBUG_ASSERT(h->active_index != MAX_KEY);
uint mrr_keyno= h->active_index; keyno= h->active_index;
/* Create a separate handler object to do rnd_pos() calls. */ /* Create a separate handler object to do rnd_pos() calls. */
if (!(new_h2= h->clone(thd->mem_root)) || if (!(new_h2= h->clone(thd->mem_root)) ||
...@@ -426,7 +503,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -426,7 +503,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
DBUG_RETURN(1); DBUG_RETURN(1);
} }
if (mrr_keyno == h->pushed_idx_cond_keyno) if (keyno == h->pushed_idx_cond_keyno)
pushed_cond= h->pushed_idx_cond; pushed_cond= h->pushed_idx_cond;
/* /*
...@@ -443,12 +520,14 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -443,12 +520,14 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
h2= new_h2; /* Ok, now can put it into h2 */ h2= new_h2; /* Ok, now can put it into h2 */
table->prepare_for_position(); table->prepare_for_position();
h2->extra(HA_EXTRA_KEYREAD); h2->extra(HA_EXTRA_KEYREAD);
h2->mrr_funcs= *seq_funcs; //psergey3-todo: sort out where to store
h2->mrr_iter= h->mrr_iter;
if (h2->ha_index_init(mrr_keyno, FALSE)) if (h2->ha_index_init(keyno, FALSE))
goto error; goto error;
if (pushed_cond) if (pushed_cond)
h2->idx_cond_push(mrr_keyno, pushed_cond); h2->idx_cond_push(keyno, pushed_cond);
} }
else else
{ {
...@@ -468,9 +547,14 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -468,9 +547,14 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
goto error; goto error;
} }
if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, if (!do_sort_keys &&
mode, buf) || h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
dsmrr_fill_rowid_buffer()) mode, buf))
{
goto error;
}
if (dsmrr_fill_rowid_buffer())
{ {
goto error; goto error;
} }
...@@ -479,7 +563,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -479,7 +563,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
adjust *buf to indicate that the remaining buffer space will not be used. adjust *buf to indicate that the remaining buffer space will not be used.
*/ */
if (dsmrr_eof) if (dsmrr_eof)
buf->end_of_used_area= mrr_buf_last; buf->end_of_used_area= rowid_buffer.end_of_space();
/* /*
h->inited == INDEX may occur when 'range checked for each record' is h->inited == INDEX may occur when 'range checked for each record' is
...@@ -526,21 +610,24 @@ static int rowid_cmp(void *h, uchar *a, uchar *b) ...@@ -526,21 +610,24 @@ static int rowid_cmp(void *h, uchar *a, uchar *b)
/** /**
DS-MRR: Fill the buffer with rowids and sort it by rowid DS-MRR: Fill and sort the rowid buffer
{This is an internal function of DiskSweep MRR implementation} {This is an internal function of DiskSweep MRR implementation}
Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
buffer. When the buffer is full or scan is completed, sort the buffer by buffer. When the buffer is full or scan is completed, sort the buffer by
rowid and return. rowid and return.
The function assumes that rowids buffer is empty when it is invoked. The function assumes that rowids buffer is empty when it is invoked.
New2:
we will need to scan either
- the source sequence getting records
- use dsmrr_next_from_index..
dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
scanning. scanning.
psergey2: this func will 'fill the rowid buffer'. If filling the rowid buffer
requires that key buffer is filled/sorted first, will do that, too.
@param h Table handler @param h Table handler
@retval 0 OK, the next portion of rowids is in the buffer, @retval 0 OK, the next portion of rowids is in the buffer,
...@@ -554,40 +641,32 @@ int DsMrr_impl::dsmrr_fill_rowid_buffer() ...@@ -554,40 +641,32 @@ int DsMrr_impl::dsmrr_fill_rowid_buffer()
int res; int res;
DBUG_ENTER("DsMrr_impl::dsmrr_fill_rowid_buffer"); DBUG_ENTER("DsMrr_impl::dsmrr_fill_rowid_buffer");
mrr_buf_cur= mrr_buf; rowid_buffer.reset_for_writing();
mrr_buf_next_identical= mrr_buf_cur; identical_rowid_ptr= NULL;
/*
psergey2-todo:
- call here fill/sort key buffer, if needed.
psergey2-todo: then, get keys either from
- multi_range_read_next()
- sorted key buffer
psergey2-todo: if we're traversing an ordered key sequence, while (rowid_buffer.have_space_for(rowid_buff_elem_size))
check if next keys are the same as previous.
(note that it's easy as ordered sequence allows forward/backward
navigation so we don't need to buffer things)
*/
while ((mrr_buf_cur < mrr_buf_end) &&
!(res= h2->handler::multi_range_read_next(&range_info)))
{ {
if (do_sort_keys)
res= dsmrr_next_from_index(&range_info);
else
res= h2->handler::multi_range_read_next(&range_info);
if (res)
break;
KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
if (h2->mrr_funcs.skip_index_tuple && if (!do_sort_keys && /* If keys are sorted then this check is already done */
h2->mrr_funcs.skip_index_tuple &&
h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr)) h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
continue; continue;
/* Put rowid, or {rowid, range_id} pair into the buffer */ /* Put rowid, or {rowid, range_id} pair into the buffer */
h2->position(table->record[0]); h2->position(table->record[0]);
memcpy(mrr_buf_cur, h2->ref, h2->ref_length); rowid_buffer.write(h2->ref, h2->ref_length);
mrr_buf_cur += h2->ref_length;
if (is_mrr_assoc) if (is_mrr_assoc)
{ rowid_buffer.write((uchar*)&range_info, sizeof(void*));
memcpy(mrr_buf_cur, &range_info, sizeof(void*));
mrr_buf_cur += sizeof(void*);
}
} }
if (res && res != HA_ERR_END_OF_FILE) if (res && res != HA_ERR_END_OF_FILE)
...@@ -596,12 +675,11 @@ int DsMrr_impl::dsmrr_fill_rowid_buffer() ...@@ -596,12 +675,11 @@ int DsMrr_impl::dsmrr_fill_rowid_buffer()
/* Sort the buffer contents by rowid */ /* Sort the buffer contents by rowid */
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size; uint n_rowids= rowid_buffer.used_size() / elem_size;
my_qsort2(rowid_buffer.used_area(), n_rowids, elem_size,
(qsort2_cmp)rowid_cmp, (void*)h);
my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
(void*)h);
mrr_buf_last= mrr_buf_cur;
mrr_buf_cur= mrr_buf;
DBUG_RETURN(0); DBUG_RETURN(0);
} }
...@@ -616,8 +694,8 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) ...@@ -616,8 +694,8 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
{ {
DsMrr_impl *dsmrr= (DsMrr_impl*)arg; DsMrr_impl *dsmrr= (DsMrr_impl*)arg;
TABLE *table= dsmrr->h->table; TABLE *table= dsmrr->h->table;
int res;
KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part; KEY_PART_INFO *part= table->key_info[dsmrr->keyno].key_part;
if (dsmrr->use_key_pointers) if (dsmrr->use_key_pointers)
{ {
...@@ -626,15 +704,31 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) ...@@ -626,15 +704,31 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
key2= *((uchar**)key2); // todo is this alignment-safe? key2= *((uchar**)key2); // todo is this alignment-safe?
} }
uchar *key1_end= key1 + dsmrr->cpk_tuple_length; uchar *key1_end= key1 + dsmrr->key_tuple_length;
while (key1 < key1_end) while (key1 < key1_end)
{ {
Field* f = part->field; Field* f = part->field;
int len = part->store_length; int len = part->store_length;
int res = f->cmp(key1, key2); if (part->null_bit)
if (res) {
if (*key1) // key1 == NULL
{
if (!*key2) // key1(NULL) < key2(notNULL)
return -1;
goto equals;
}
else if (*key2) // key1(notNULL) > key2 (NULL)
return 1;
// Step over NULL byte for f->cmp().
key1++;
key2++;
len--;
}
if ((res= f->key_cmp(key1, key2)))
return res; return res;
equals:
key1 += len; key1 += len;
key2 += len; key2 += len;
part++; part++;
...@@ -643,6 +737,68 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) ...@@ -643,6 +737,68 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
} }
/*
Setup key/rowid buffer sizes based on sample_key
DESCRIPTION
Setup key/rowid buffer sizes based on sample_key and its length.
This function must be called when all buffer space is empty.
*/
void DsMrr_impl::setup_buffer_sizes(key_range *sample_key)
{
key_tuple_length= sample_key->length;
key_tuple_map= sample_key->keypart_map;
key_size_in_keybuf= use_key_pointers ? sizeof(char*) :
key_tuple_length;
key_buff_elem_size= key_size_in_keybuf +
(int)is_mrr_assoc * sizeof(void*);
uint rowid_buf_elem_size= h->ref_length +
(int)is_mrr_assoc * sizeof(char*);
KEY *key_info= &h->table->key_info[keyno];
/*
Use rec_per_key statistics as a basis to find out how many rowids
we'll get for each key value.
TODO: are we guaranteed to get r_p_c==1 for unique keys?
TODO: what should be the default value to use when there is no
statistics?
*/
uint parts= my_count_bits(key_tuple_map);
ulong rpc;
if ((rpc= key_info->rec_per_key[parts - 1]))
{
rowid_buf_elem_size *= rpc;
}
double fraction_for_rowids=
((double) rowid_buf_elem_size /
((double)rowid_buf_elem_size + key_buff_elem_size));
uint bytes_for_rowids=
round(fraction_for_rowids * (full_buf_end - full_buf));
uint bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
if (bytes_for_keys < key_buff_elem_size + 1)
{
uint add= key_buff_elem_size + 1 - bytes_for_keys;
bytes_for_rowids -= add;
DBUG_ASSERT(bytes_for_rowids >=
(h->ref_length + (int)is_mrr_assoc * sizeof(char*) + 1));
}
rowid_buffer.set_buffer_space(full_buf, full_buf + bytes_for_rowids, 1);
key_buffer.set_buffer_space(full_buf + bytes_for_rowids, full_buf_end, 1);
index_ranges_unique= test(key_info->flags & HA_NOSAME &&
key_info->key_parts ==
my_count_bits(sample_key->keypart_map));
}
/* /*
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
...@@ -650,69 +806,55 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) ...@@ -650,69 +806,55 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
DsMrr_impl::dsmrr_fill_key_buffer() DsMrr_impl::dsmrr_fill_key_buffer()
DESCRIPTION DESCRIPTION
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort DS-MRR/CPK: Enumerate the input range (=key) sequence, fill the key buffer
(lookup_key, range_id) pairs and sort.
dsmrr_eof is set to indicate whether we've exhausted the list of ranges dsmrr_eof is set to indicate whether we've exhausted the list of ranges
we're scanning. we're scanning.
psergey2-q: can this be used for filling/sorting key buffer in general case?
a: yes.
qq: can we push sequence iteration init down into here?
*/ */
void DsMrr_impl::dsmrr_fill_key_buffer() void DsMrr_impl::dsmrr_fill_key_buffer()
{ {
//psergey2: here, no identicals detection is necessary since we always scan
// the unordered sequence.
int res; int res;
KEY_MULTI_RANGE cur_range; KEY_MULTI_RANGE cur_range;
DBUG_ENTER("DsMrr_impl::dsmrr_fill_key_buffer"); DBUG_ENTER("DsMrr_impl::dsmrr_fill_key_buffer");
mrr_buf_cur= mrr_buf; // reset the buffer for writing.
while ((mrr_buf_cur < mrr_buf_end) && key_buffer.reset_for_writing();
while ((key_buffer.have_space_for(key_buff_elem_size)) &&
!(res= h->mrr_funcs.next(h->mrr_iter, &cur_range))) !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range)))
{ {
DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
if (!cpk_tuple_length) if (!key_tuple_length)
{ {
cpk_tuple_length= cur_range.start_key.length; /* This only happens when we've just started filling the buffer */
key_buf_element_size= use_key_pointers ? sizeof(char*) : DBUG_ASSERT(key_buffer.used_size() == 0);
cpk_tuple_length; setup_buffer_sizes(&cur_range.start_key);
cpk_is_unique_scan= test(table->key_info[h->active_index].key_parts ==
my_count_bits(cur_range.start_key.keypart_map));
uint elem_size= key_buf_element_size + (int)is_mrr_assoc * sizeof(void*);
mrr_buf_last= mrr_buf + ((mrr_buf_end - mrr_buf)/elem_size) * elem_size;
mrr_buf_end= mrr_buf_last;
} }
//psergey2: if keys are materialized, store pointers, not copy keys
/* Put key, or {key, range_id} pair into the buffer */ /* Put key, or {key, range_id} pair into the buffer */
if (use_key_pointers) if (use_key_pointers)
memcpy(mrr_buf_cur, &cur_range.start_key.key, sizeof(char*)); key_buffer.write((uchar*)&cur_range.start_key.key, sizeof(char*));
else else
memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length); key_buffer.write(cur_range.start_key.key, key_tuple_length);
mrr_buf_cur += key_buf_element_size;
if (is_mrr_assoc) if (is_mrr_assoc)
{ key_buffer.write((uchar*)&cur_range.ptr, sizeof(void*));
memcpy(mrr_buf_cur, &cur_range.ptr, sizeof(void*));
mrr_buf_cur += sizeof(void*);
}
} }
dsmrr_eof= test(res); dsmrr_eof= test(res);
/* Sort the buffer contents by rowid */ /* Sort the buffer contents by rowid */
uint elem_size= key_buf_element_size + (int)is_mrr_assoc * sizeof(void*); uint key_elem_size= key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size; uint n_keys= key_buffer.used_size() / key_elem_size;
my_qsort2(mrr_buf, n_rowids, elem_size, my_qsort2(key_buffer.used_area(), n_keys, key_elem_size,
(qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this); (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this);
mrr_buf_last= mrr_buf_cur;
mrr_buf_cur= mrr_buf; last_identical_key_ptr= NULL;
in_identical_keys_range= FALSE;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -721,60 +863,105 @@ void DsMrr_impl::dsmrr_fill_key_buffer() ...@@ -721,60 +863,105 @@ void DsMrr_impl::dsmrr_fill_key_buffer()
DS-MRR/CPK: multi_range_read_next() function DS-MRR/CPK: multi_range_read_next() function
DESCRIPTION DESCRIPTION
DsMrr_impl::dsmrr_next_cpk() DsMrr_impl::dsmrr_next_from_index()
range_info OUT identifier of range that the returned record belongs to range_info OUT identifier of range that the returned record belongs to
DESCRIPTION DESCRIPTION
DS-MRR/CPK: multi_range_read_next() function.
This is similar to DsMrr_impl::dsmrr_next(), the differences are that This function walks over key buffer and does index reads, i.e. it produces
- we get records with index_read(), not with rnd_pos() {current_record, range_id} pairs.
- we may get multiple records for one key (=element of the buffer)
- unlike dsmrr_fill_rowid_buffer(), dsmrr_fill_key_buffer() never fails. The function has the same call contract like multi_range_read_next()'s.
We actually iterate nested sequences:
- a disjoint sequence of index ranges
- each range has multiple records
- each record goes into multiple identical ranges.
RETURN RETURN
0 OK, next record was successfully read 0 OK, next record was successfully read
HA_ERR_END_OF_FILE End of records HA_ERR_END_OF_FILE End of records
Other Some other error Other Some other error
psergey2-todo: this should detect identical keys.
*/ */
int DsMrr_impl::dsmrr_next_cpk(char **range_info) int DsMrr_impl::dsmrr_next_from_index(char **range_info_arg)
{ {
int res; int res;
uchar *key_in_buf;
handler *file= do_rowid_fetch? h2: h;
while (cpk_have_range) while (in_identical_keys_range)
{ {
//read_and_check:
/* Read record/key pointer from the buffer */
key_in_buf= identical_key_it.get_next(key_size_in_keybuf);
if (is_mrr_assoc)
cur_range_info= (char*)identical_key_it.get_next(sizeof(void*));
if (h->mrr_funcs.skip_record && if (key_in_buf == last_identical_key_ptr)
h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL))
{ {
cpk_have_range= FALSE; /* We're looking at the last of the identical keys */
break; in_identical_keys_range= FALSE;
}
check_record:
if ((h->mrr_funcs.skip_index_tuple &&
h->mrr_funcs.skip_index_tuple(h->mrr_iter, *(char**)cur_range_info)) ||
(h->mrr_funcs.skip_record &&
h->mrr_funcs.skip_record(h->mrr_iter, *(char**)cur_range_info, NULL)))
{
continue;
} }
memcpy(range_info_arg, cur_range_info, sizeof(void*));
uchar *lookup_tuple= use_key_pointers? (*((uchar**)mrr_buf_cur)) : mrr_buf_cur; return 0;
res= h->index_next_same(table->record[0], lookup_tuple, cpk_tuple_length); }
if (h->mrr_funcs.skip_index_tuple && /* Try returrning next record from the current range */
h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info)) while (in_index_range)
continue; {
res= file->ha_index_next_same(table->record[0], cur_index_tuple,
key_tuple_length);
if (res != HA_ERR_END_OF_FILE) if (res)
{ {
if (is_mrr_assoc) if (res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); return res; /* Fatal error */
return res;
in_index_range= FALSE; /* no more records here */
break;
} }
/* No more records in this range. Exit this loop and go get another range */ if (last_identical_key_ptr)
cpk_have_range= FALSE; {
in_identical_keys_range= TRUE;
identical_key_it.init(&key_buffer);
cur_range_info= first_identical_range_info;
} }
do goto check_record;
// goto read_and_check;
}
while(1)
{
DBUG_ASSERT(!in_identical_keys_range && !in_index_range);
/* Jump over the keys that were handled by identical key processing */
if (last_identical_key_ptr)
{
while (key_buffer.read(key_size_in_keybuf) != last_identical_key_ptr)
{ {
if (is_mrr_assoc)
key_buffer.read(sizeof(void*));
}
if (is_mrr_assoc)
key_buffer.read(sizeof(void*));
last_identical_key_ptr= NULL;
}
/* First, make sure we have a range at start of the buffer */ /* First, make sure we have a range at start of the buffer */
if (mrr_buf_cur == mrr_buf_last) if (!key_buffer.have_data(key_buff_elem_size))
{ {
if (dsmrr_eof) if (dsmrr_eof)
{ {
...@@ -782,58 +969,56 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) ...@@ -782,58 +969,56 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info)
goto end; goto end;
} }
dsmrr_fill_key_buffer(); dsmrr_fill_key_buffer();
} if (!key_buffer.have_data(key_buff_elem_size))
if (mrr_buf_cur == mrr_buf_last)
{ {
res= HA_ERR_END_OF_FILE; res= HA_ERR_END_OF_FILE;
goto end; goto end;
} }
}
/* Get the next range to scan*/
cur_index_tuple= key_in_buf= key_buffer.read(key_size_in_keybuf);
if (use_key_pointers)
cur_index_tuple= *((uchar**)cur_index_tuple);
/* Ok, got the range. Try making a lookup. */
uchar *lookup_tuple= use_key_pointers? (*((uchar**)mrr_buf_cur)) : mrr_buf_cur;
mrr_buf_cur += key_buf_element_size;
if (is_mrr_assoc) if (is_mrr_assoc)
cur_range_info= (char*)key_buffer.read(sizeof(void*));
/* Do index lookup */
if ((res= file->ha_index_read_map(table->record[0], cur_index_tuple,
key_tuple_map, HA_READ_KEY_EXACT)))
{ {
memcpy(&cpk_saved_range_info, mrr_buf_cur, sizeof(void*)); if (res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc); return res;
continue; /* to next key and make another lookup */
} }
if (h->mrr_funcs.skip_record && /* Check if subsequent keys in the key buffer are the same as this one */
h->mrr_funcs.skip_record(h->mrr_iter, cpk_saved_range_info, NULL)) {
continue; uchar *ptr;
identical_key_it.init(&key_buffer);
res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length, last_identical_key_ptr= NULL;
HA_READ_KEY_EXACT); while ((ptr= identical_key_it.get_next(key_size_in_keybuf)))
{
/* if (is_mrr_assoc)
Check pushed index condition. Performance-wise, it does not make any identical_key_it.get_next(sizeof(void*));
sense to put this call here (the above call has already accessed the full
record). That's the best I could do, though, because:
- ha_innobase doesn't support IndexConditionPushdown on clustered PK
- MRR interface doesn't allow the storage engine to refuse a pushed index
condition.
Having this call here is not fully harmless: EXPLAIN shows "pushed index
condition", which is technically true but doesn't bring the benefits that
one might expect.
*/
if (h->mrr_funcs.skip_index_tuple &&
h->mrr_funcs.skip_index_tuple(h->mrr_iter, cpk_saved_range_info))
continue;
if (res && res != HA_ERR_END_OF_FILE) if (key_tuple_cmp(this, key_in_buf, ptr))
goto end; break;
if (!res) last_identical_key_ptr= ptr;
}
if (last_identical_key_ptr)
{ {
memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); in_identical_keys_range= TRUE;
/* identical_key_it.init(&key_buffer);
Attempt reading more rows from this range only if there actually can first_identical_range_info= cur_range_info;
be multiple matches: }
*/ }
cpk_have_range= !cpk_is_unique_scan;
break; in_index_range= !index_ranges_unique;
goto check_record;
} }
} while (true);
end: end:
return res; return res;
...@@ -842,9 +1027,6 @@ end: ...@@ -842,9 +1027,6 @@ end:
/** /**
DS-MRR implementation: multi_range_read_next() function DS-MRR implementation: multi_range_read_next() function
psergey2-todo: put identical rowid detection code here
it should always work because rowid sequences are always sorted
*/ */
int DsMrr_impl::dsmrr_next(char **range_info) int DsMrr_impl::dsmrr_next(char **range_info)
...@@ -852,89 +1034,108 @@ int DsMrr_impl::dsmrr_next(char **range_info) ...@@ -852,89 +1034,108 @@ int DsMrr_impl::dsmrr_next(char **range_info)
int res; int res;
uchar *cur_range_info= 0; uchar *cur_range_info= 0;
uchar *rowid; uchar *rowid;
uchar *range_id;
if (use_default_impl) if (use_default_impl)
return h->handler::multi_range_read_next(range_info); return h->handler::multi_range_read_next(range_info);
if (doing_cpk_scan) if (!do_rowid_fetch)
return dsmrr_next_cpk(range_info); return dsmrr_next_from_index(range_info);
if (mrr_buf_next_identical != mrr_buf_cur) while (identical_rowid_ptr)
{ {
/* /*
There are multiple rowids. Return the record again, now with different Current record (the one we've returned in previous call) was obtained
range_id from a rowid that matched multiple range_ids. Return this record again,
with next matching range_id.
*/ */
do rowid= rowid_buffer.read(h->ref_length);
{
if (is_mrr_assoc) if (is_mrr_assoc)
memcpy(range_info, mrr_buf_next_identical + h->ref_length, sizeof(uchar*)); {
} while (!h2->mrr_funcs.skip_record || uchar *range_ptr= rowid_buffer.read(sizeof(uchar*));
!h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) range_info, rowid)); memcpy(range_info, range_ptr, sizeof(uchar*));
mrr_buf_next_identical += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
return 0;
} }
do if (rowid == identical_rowid_ptr)
{
if (mrr_buf_cur == mrr_buf_last)
{ {
if (dsmrr_eof) identical_rowid_ptr= NULL; /* reached the last of identical rowids */
}
if (!h2->mrr_funcs.skip_record ||
!h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) *range_info, rowid))
{ {
res= HA_ERR_END_OF_FILE; return 0;
goto end;
} }
res= dsmrr_fill_rowid_buffer();
if (res)
goto end;
} }
/* return eof if there are no rowids in the buffer after re-fill attempt */ while (1)
if (mrr_buf_cur == mrr_buf_last)
{ {
res= HA_ERR_END_OF_FILE; if (!rowid_buffer.have_data(1))
goto end; {
if (dsmrr_eof)
return HA_ERR_END_OF_FILE;
if (do_sort_keys && key_buffer.used_size() == 0)
dsmrr_fill_key_buffer();
if ((res= dsmrr_fill_rowid_buffer()))
return res;
} }
rowid= mrr_buf_cur;
if (is_mrr_assoc) /* Return eof if there are no rowids in the buffer after re-fill attempt */
memcpy(&cur_range_info, mrr_buf_cur + h->ref_length, sizeof(uchar**)); if (!rowid_buffer.have_data(1))
return HA_ERR_END_OF_FILE;
size_t element_size= h->ref_length + sizeof(void*) * test(is_mrr_assoc); rowid= rowid_buffer.read(h->ref_length);
mrr_buf_cur += element_size; identical_rowid_ptr= NULL;
mrr_buf_next_identical= mrr_buf_cur;
if (is_mrr_assoc)
{
range_id= rowid_buffer.read(sizeof(uchar*));
memcpy(&cur_range_info, range_id, sizeof(uchar*));
memcpy(range_info, range_id, sizeof(uchar*));
}
//psergey2-note: the below isn't right- we won't want to skip over this
// rowid because this (rowid, range_id) pair has nothing.. the next
// identical rowids might have something.. (but we set identicals later,
// dont we?)
if (h2->mrr_funcs.skip_record && if (h2->mrr_funcs.skip_record &&
h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
continue; continue;
res= h->ha_rnd_pos(table->record[0], rowid); res= h->ha_rnd_pos(table->record[0], rowid);
if (res == HA_ERR_RECORD_DELETED) if (res == HA_ERR_RECORD_DELETED)
continue; continue;
if (0)//(!res) /*
Check if subsequent buffer elements have the same rowid value as this
one. If yes, remember this fact so that we don't make any more rnd_pos()
calls with this value.
*/
if (!res)
{ {
/* /*
Note: this implies that SQL layer doesn't touch table->record[0] Note: this implies that SQL layer doesn't touch table->record[0]
between calls. between calls.
*/ */
uchar *current_el= mrr_buf_cur - element_size; uchar *ptr;
while (mrr_buf_cur != mrr_buf_last && SimpleBuffer::PeekIterator identical_rowid_it;
!h2->cmp_ref(current_el, mrr_buf_cur)) identical_rowid_it.init(&rowid_buffer);
while ((ptr= identical_rowid_it.get_next(h->ref_length)))
{ {
mrr_buf_cur += element_size; if (is_mrr_assoc)
identical_rowid_it.get_next(sizeof(void*));
if (h2->cmp_ref(rowid, ptr))
break;
identical_rowid_ptr= ptr;
} }
} }
break; return 0;
} while (true);
if (is_mrr_assoc)
{
memcpy(range_info, rowid + h->ref_length, sizeof(void*));
} }
end:
return res; return res;
} }
......
...@@ -37,6 +37,96 @@ ...@@ -37,6 +37,96 @@
*/ */
/*
A simple memory buffer for reading and writing.
when writing, there is no user-visible "current" position, although
internally 'pos' points to just after the end of used area (or at the
start of it for reverse buffer).
When reading, there is current position pointing at start (for reverse
buffer, end) of the element that will be read next.
^^ why end for reverse? it's more logical to point at start
One can peek at what's behind that element by using advance_ptr function.
TODO: will the reverse buffer store {tuple; rowid} or {rowid; tuple} pairs?
(why does it matter??? Read and write in the same order and then it
shouldn't matter.)
*/
class SimpleBuffer
{
uchar *start;
uchar *end;
uchar *read_pos;
uchar *write_pos;
/*
1 <=> buffer grows/is filled/is read from start to end
-1 <=> everthing is done from end to start instead.
*/
int direction;
public:
/* Write-mode functions */
void reset_for_writing();
void write(const uchar *data, size_t bytes);
bool have_space_for(size_t bytes);
uchar *used_area() { return (direction == 1)? read_pos : write_pos; }
size_t used_size();
/* Read-mode functions */
void reset_for_reading();
uchar *read(size_t bytes);
bool have_data(size_t bytes);
uchar *end_of_space();
/* Control functions */
void set_buffer_space(uchar *start_arg, uchar *end_arg, int direction_arg)
{
start= start_arg;
end= end_arg;
direction= direction_arg;
reset_for_writing();
}
friend class PeekIterator;
class PeekIterator
{
// if direction==1 : pointer to what to return next
// if direction==-1: pointer to the end of what is to be returned next
uchar *pos;
SimpleBuffer *sb;
public:
void init(SimpleBuffer *sb_arg)
{
sb= sb_arg;
pos= sb->read_pos;
}
/* Return pointer to next chunk of nbytes bytes and avance over it */
uchar *get_next(size_t nbytes)
{
if (sb->direction == 1)
{
if (pos + nbytes > sb->write_pos)
return NULL;
uchar *res= pos;
pos += nbytes;
return res;
}
else
{
if (pos - nbytes <= sb->write_pos)
return NULL;
pos -= nbytes;
return pos;
}
}
};
};
/* /*
DS-MRR implementation for one table. Create/use one object of this class for DS-MRR implementation for one table. Create/use one object of this class for
each ha_{myisam/innobase/etc} object. That object will be further referred to each ha_{myisam/innobase/etc} object. That object will be further referred to
...@@ -73,6 +163,8 @@ ...@@ -73,6 +163,8 @@
scanning. scanning.
*/ */
class DsMrr_impl class DsMrr_impl
{ {
public: public:
...@@ -108,14 +200,27 @@ private: ...@@ -108,14 +200,27 @@ private:
/* Secondary handler object. It is used for scanning the index */ /* Secondary handler object. It is used for scanning the index */
handler *h2; handler *h2;
uchar *full_buf;
uchar *full_buf_end;
/* Buffer to store rowids, or (rowid, range_id) pairs */ /* Buffer to store rowids, or (rowid, range_id) pairs */
uchar *mrr_buf; SimpleBuffer rowid_buffer;
uchar *mrr_buf_cur; /* Current position when reading/writing */
uchar *mrr_buf_last; /* When reading: end of used buffer space */
uchar *mrr_buf_end; /* End of the buffer */
uchar *mrr_buf_next_identical; uchar *identical_rowid_ptr;
/* Identical keys */
bool in_identical_keys_range;
uchar *last_identical_key_ptr;
SimpleBuffer::PeekIterator identical_key_it;
SimpleBuffer key_buffer;
uint keyno;
/* Execution control */
bool do_sort_keys;
bool use_key_pointers; bool use_key_pointers;
bool do_rowid_fetch;
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
...@@ -129,18 +234,33 @@ private: ...@@ -129,18 +234,33 @@ private:
/** DS-MRR/CPK variables start */ /** DS-MRR/CPK variables start */
/* Length of lookup tuple being used, in bytes */ /* Length of lookup tuple being used, in bytes */
uint cpk_tuple_length; uint key_tuple_length;
key_part_map key_tuple_map;
/*
This is
= key_tuple_length if we copy keys to buffer
= sizeof(void*) if we're using pointers to materialized keys.
*/
uint key_size_in_keybuf;
/* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */
uint key_buff_elem_size;
/* = h->ref_length [ + sizeof(range_assoc_info) ] */
uint rowid_buff_elem_size;
uint key_buf_element_size;
/* /*
TRUE <=> We're scanning on a full primary key (and not on prefix), and so TRUE <=> We're scanning on a full primary key (and not on prefix), and so
can get max. one match for each key can get max. one match for each key
*/ */
bool cpk_is_unique_scan; bool index_ranges_unique;
/* TRUE<=> we're in a middle of enumerating records from a range */ /* TRUE<=> we're in a middle of enumerating records from a range */
bool cpk_have_range; bool in_index_range;
/* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */ uchar *cur_index_tuple;
char *cpk_saved_range_info; /* if in_index_range==TRUE: range_id of the range we're enumerating */
char *cur_range_info;
char *first_identical_range_info;
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
COST_VECT *cost); COST_VECT *cost);
...@@ -150,6 +270,11 @@ private: ...@@ -150,6 +270,11 @@ private:
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2); static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
int dsmrr_fill_rowid_buffer(); int dsmrr_fill_rowid_buffer();
void dsmrr_fill_key_buffer(); void dsmrr_fill_key_buffer();
int dsmrr_next_cpk(char **range_info); int dsmrr_next_from_index(char **range_info);
void setup_buffer_sizes(key_range *sample_key);
static range_seq_t key_buf_seq_init(void *init_param, uint n_ranges, uint flags);
static uint key_buf_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
}; };
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment