Commit b28118ab authored by unknown's avatar unknown

Fix for BUG#5117:

 * Renamed handler::estimate_number_of_rows to handler::estimate_rows_upper_bound function, which can also return "unknown"
 * made filesort to use full sort buffer if number of rows to sort is not known.


sql/examples/ha_tina.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/filesort.cc:
   * Fix for BUG#5117: made filesort() to use full sort buffer if number of rows to sort is not known.
   * Comments and assertion added
sql/ha_berkeley.cc:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/ha_berkeley.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/ha_innodb.cc:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/ha_innodb.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/handler.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
parent 3f70b62c
......@@ -90,6 +90,12 @@ class ha_tina: public handler
/* The next method will never be called */
virtual double read_time(ha_rows rows) { DBUG_ASSERT(0); return((double) rows / 20.0+1); }
virtual bool fast_key_read() { return 1;}
/*
TODO: return actual upper bound of number of records in the table.
(e.g. save number of records seen on full table scan and/or use file size
as upper bound)
*/
ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; }
int open(const char *name, int mode, uint test_if_locked);
int close(void);
......
......@@ -169,7 +169,13 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
else
#endif
{
records=table->file->estimate_number_of_rows();
records= table->file->estimate_rows_upper_bound();
/*
If number of records is not known, use as much of sort buffer
as possible.
*/
if (records == HA_POS_ERROR)
records--; // we use 'records+1' below.
selected_records_file= 0;
}
......@@ -315,7 +321,7 @@ static char **make_char_array(register uint fields, uint length, myf my_flag)
} /* make_char_array */
/* Read all buffer pointers into memory */
/* Read 'count' number of buffer pointers into memory */
static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count)
{
......@@ -336,8 +342,40 @@ static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count)
}
/*
Search after sort_keys and write them into tempfile.
SYNOPSIS
find_all_keys()
param Sorting parameter
select Use this to get source data
sort_keys Array of pointers to sort key + addon buffers.
buffpek_pointers File to write BUFFPEKs describing sorted segments
in tempfile.
tempfile File to write sorted sequences of sortkeys to.
indexfile If !NULL, use it for source data (contains rowids)
/* Search after sort_keys and place them in a temp. file */
NOTE
Basic idea:
while (get_next_sortkey())
{
if (no free space in sort_keys buffers)
{
sort sort_keys buffer;
dump sorted sequence to 'tempfile';
dump BUFFPEK describing sequence location into 'buffpek_pointers';
}
put sort key into 'sort_keys';
}
if (sort_keys has some elements && dumped at least once)
sort-dump-dump as above;
else
don't sort, leave sort_keys array to be sorted by caller.
All produced sequences are guaranteed to be non-empty.
RETURN
Number of records written on success.
HA_POS_ERROR on error.
*/
static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
uchar **sort_keys,
......@@ -452,7 +490,25 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
} /* find_all_keys */
/* Skriver en buffert med nycklar till filen */
/*
Sort the buffer and write:
1) the sorted sequence to tempfile
2) a BUFFPEK describing the sorted sequence position to buffpek_pointers
(was: Skriver en buffert med nycklar till filen)
SYNOPSIS
write_keys()
param Sort parameters
sort_keys Array of pointers to keys to sort
count Number of elements in sort_keys array
buffpek_pointers One 'BUFFPEK' struct will be written into this file.
The BUFFPEK::{file_pos, count} will indicate where
the sorted data was stored.
tempfile The sorted sequence will be written into this file.
RETURN
0 OK
1 Error
*/
static int
write_keys(SORTPARAM *param, register uchar **sort_keys, uint count,
......@@ -785,6 +841,20 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
/*
Merge buffers to one buffer
SYNOPSIS
merge_buffers()
param Sort parameter
from_file File with source data (BUFFPEKs point to this file)
to_file File to write the sorted result data.
sort_buffer Buffer for data to store up to MERGEBUFF2 sort keys.
lastbuff OUT Store here BUFFPEK describing data written to to_file
Fb First element in source BUFFPEKs array
Tb Last element in source BUFFPEKs array
flag
RETURN
0 - OK
other - error
*/
int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
......@@ -822,6 +892,9 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
strpos= (uchar*) sort_buffer;
org_max_rows=max_rows= param->max_rows;
/* The following will fire if there is not enough space in sort_buffer */
DBUG_ASSERT(maxcount!=0);
if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
(queue_compare) (cmp= get_ptr_compare(sort_length)),
(void*) &sort_length))
......
......@@ -25,7 +25,7 @@
We will need an updated Berkeley DB version for this.
- Killing threads that has got a 'deadlock'
- SHOW TABLE STATUS should give more information about the table.
- Get a more accurate count of the number of rows (estimate_number_of_rows()).
- Get a more accurate count of the number of rows (estimate_rows_upper_bound()).
We could store the found number of rows when the table is scanned and
then increment the counter for each attempted write.
- We will need to extend the manager thread to makes checkpoints at
......@@ -63,7 +63,7 @@
#define HA_BERKELEY_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_BERKELEY_RANGE_COUNT 100
#define HA_BERKELEY_MAX_ROWS 10000000 /* Max rows in table */
/* extra rows for estimate_number_of_rows() */
/* extra rows for estimate_rows_upper_bound() */
#define HA_BERKELEY_EXTRA_ROWS 100
/* Bits for share->status */
......@@ -2556,7 +2556,7 @@ static void update_status(BDB_SHARE *share, TABLE *table)
Used when sorting to allocate buffers and by the optimizer.
*/
ha_rows ha_berkeley::estimate_number_of_rows()
ha_rows ha_berkeley::estimate_rows_upper_bound()
{
return share->rows + HA_BERKELEY_EXTRA_ROWS;
}
......
......@@ -100,7 +100,7 @@ class ha_berkeley: public handler
ulong table_flags(void) const { return int_table_flags; }
uint max_supported_keys() const { return MAX_KEY-1; }
uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
ha_rows estimate_number_of_rows();
ha_rows estimate_rows_upper_bound();
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
bool has_transactions() { return 1;}
......
......@@ -4115,7 +4115,7 @@ Gives an UPPER BOUND to the number of rows in a table. This is used in
filesort.cc. */
ha_rows
ha_innobase::estimate_number_of_rows(void)
ha_innobase::estimate_rows_upper_bound(void)
/*======================================*/
/* out: upper bound of rows */
{
......@@ -4124,7 +4124,7 @@ ha_innobase::estimate_number_of_rows(void)
ulonglong estimate;
ulonglong local_data_file_length;
DBUG_ENTER("estimate_number_of_rows");
DBUG_ENTER("estimate_rows_upper_bound");
/* We do not know if MySQL can call this function before calling
external_lock(). To be safe, update the thd of the current table
......@@ -4204,7 +4204,7 @@ ha_innobase::read_time(
time_for_scan = scan_time();
if ((total_rows = estimate_number_of_rows()) < rows)
if ((total_rows = estimate_rows_upper_bound()) < rows)
return time_for_scan;
return (ranges + (double) rows / (double) total_rows * time_for_scan);
......
......@@ -150,7 +150,7 @@ class ha_innobase: public handler
void position(byte *record);
ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key);
ha_rows estimate_number_of_rows();
ha_rows estimate_rows_upper_bound();
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
......
......@@ -300,7 +300,15 @@ class handler :public Sql_alloc
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
virtual bool has_transactions(){ return 0;}
virtual uint extra_rec_buf_length() { return 0; }
virtual ha_rows estimate_number_of_rows() { return records+EXTRA_RECORDS; }
/*
Return upper bound of current number of records in the table
(max. of how many records one will retrieve when doing a full table scan)
If upper bound is not known, HA_POS_ERROR should be returned as a max
possible upper bound.
*/
virtual ha_rows estimate_rows_upper_bound()
{ return records+EXTRA_RECORDS; }
virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment