opt_range.cc 316 KB
Newer Older
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17 18 19 20 21
/*
  TODO:
  Fix that MAYBE_KEY are stored in the tree so that we can detect use
  of full hash keys for queries like:

22 23
  select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);

24 25
*/

26
/*
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  This file contains:

  RangeAnalysisModule  
    A module that accepts a condition, index (or partitioning) description, 
    and builds lists of intervals (in index/partitioning space), such that 
    all possible records that match the condition are contained within the 
    intervals.
    The entry point for the range analysis module is get_mm_tree() function.
    
    The lists are returned in form of complicated structure of interlinked
    SEL_TREE/SEL_IMERGE/SEL_ARG objects.
    See check_quick_keys, find_used_partitions for examples of how to walk 
    this structure.
    All direct "users" of this module are located within this file, too.


  PartitionPruningModule
    A module that accepts a partitioned table, condition, and finds which
    partitions we will need to use in query execution. Search down for
    "PartitionPruningModule" for description.
    The module has single entry point - prune_partitions() function.


  Range/index_merge/groupby-minmax optimizer module  
    A module that accepts a table, condition, and returns 
     - a QUICK_*_SELECT object that can be used to retrieve rows that match
       the specified condition, or a "no records will match the condition" 
       statement.

    The module entry points are
      test_quick_select()
      get_quick_select_for_ref()


  Record retrieval code for range/index_merge/groupby-min-max.
    Implementations of QUICK_*_SELECT classes.
63 64
*/

65
#ifdef USE_PRAGMA_IMPLEMENTATION
bk@work.mysql.com's avatar
bk@work.mysql.com committed
66 67 68 69 70 71 72 73 74 75 76 77
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
#include <m_ctype.h>
#include "sql_select.h"

#ifndef EXTRA_DEBUG
#define test_rb_tree(A,B) {}
#define test_use_count(A) {}
#endif

78
/*
79
  Convert double value to #rows. Currently this does floor(), and we
80 81
  might consider using round() instead.
*/
82
#define double2rows(x) ((ha_rows)(x))
83

bk@work.mysql.com's avatar
bk@work.mysql.com committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
static int sel_cmp(Field *f,char *a,char *b,uint8 a_flag,uint8 b_flag);

static char is_null_string[2]= {1,0};

class SEL_ARG :public Sql_alloc
{
public:
  uint8 min_flag,max_flag,maybe_flag;
  uint8 part;					// Which key part
  uint8 maybe_null;
  uint16 elements;				// Elements in tree
  ulong use_count;				// use of this sub_tree
  Field *field;
  char *min_value,*max_value;			// Pointer to range

  SEL_ARG *left,*right,*next,*prev,*parent,*next_key_part;
  enum leaf_color { BLACK,RED } color;
  enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;

  SEL_ARG() {}
  SEL_ARG(SEL_ARG &);
  SEL_ARG(Field *,const char *,const char *);
  SEL_ARG(Field *field, uint8 part, char *min_value, char *max_value,
	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
  SEL_ARG(enum Type type_arg)
109 110 111
    :elements(1),use_count(1),left(0),next_key_part(0),color(BLACK),
     type(type_arg)
  {}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
112 113
  inline bool is_same(SEL_ARG *arg)
  {
114
    if (type != arg->type || part != arg->part)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
      return 0;
    if (type != KEY_RANGE)
      return 1;
    return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
  }
  inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
  inline void maybe_smaller() { maybe_flag=1; }
  inline int cmp_min_to_min(SEL_ARG* arg)
  {
    return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
  }
  inline int cmp_min_to_max(SEL_ARG* arg)
  {
    return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
  }
  inline int cmp_max_to_max(SEL_ARG* arg)
  {
    return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
  }
  inline int cmp_max_to_min(SEL_ARG* arg)
  {
    return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
  }
  SEL_ARG *clone_and(SEL_ARG* arg)
  {						// Get overlapping range
    char *new_min,*new_max;
    uint8 flag_min,flag_max;
    if (cmp_min_to_min(arg) >= 0)
    {
      new_min=min_value; flag_min=min_flag;
    }
    else
    {
      new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
    }
    if (cmp_max_to_max(arg) <= 0)
    {
      new_max=max_value; flag_max=max_flag;
    }
    else
    {
      new_max=arg->max_value; flag_max=arg->max_flag;
    }
    return new SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
		       test(maybe_flag && arg->maybe_flag));
  }
  SEL_ARG *clone_first(SEL_ARG *arg)
  {						// min <= X < arg->min
    return new SEL_ARG(field,part, min_value, arg->min_value,
		       min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
		       maybe_flag | arg->maybe_flag);
  }
  SEL_ARG *clone_last(SEL_ARG *arg)
  {						// min <= X <= key_max
    return new SEL_ARG(field, part, min_value, arg->max_value,
		       min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
  }
  SEL_ARG *clone(SEL_ARG *new_parent,SEL_ARG **next);

  bool copy_min(SEL_ARG* arg)
  {						// Get overlapping range
    if (cmp_min_to_min(arg) > 0)
    {
      min_value=arg->min_value; min_flag=arg->min_flag;
      if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) ==
	  (NO_MAX_RANGE | NO_MIN_RANGE))
	return 1;				// Full range
    }
    maybe_flag|=arg->maybe_flag;
    return 0;
  }
  bool copy_max(SEL_ARG* arg)
  {						// Get overlapping range
    if (cmp_max_to_max(arg) <= 0)
    {
      max_value=arg->max_value; max_flag=arg->max_flag;
      if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) ==
	  (NO_MAX_RANGE | NO_MIN_RANGE))
	return 1;				// Full range
    }
    maybe_flag|=arg->maybe_flag;
    return 0;
  }

  void copy_min_to_min(SEL_ARG *arg)
  {
    min_value=arg->min_value; min_flag=arg->min_flag;
  }
  void copy_min_to_max(SEL_ARG *arg)
  {
    max_value=arg->min_value;
    max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
  }
  void copy_max_to_min(SEL_ARG *arg)
  {
    min_value=arg->max_value;
    min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
  }
213
  void store_min(uint length,char **min_key,uint min_key_flag)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
214
  {
215 216 217
    if ((min_flag & GEOM_FLAG) ||
        (!(min_flag & NO_MIN_RANGE) &&
	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
218 219 220 221
    {
      if (maybe_null && *min_value)
      {
	**min_key=1;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
222
	bzero(*min_key+1,length-1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
223 224
      }
      else
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
225 226
	memcpy(*min_key,min_value,length);
      (*min_key)+= length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
227
    }
228
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
229 230 231
  void store(uint length,char **min_key,uint min_key_flag,
	     char **max_key, uint max_key_flag)
  {
232
    store_min(length, min_key, min_key_flag);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
233 234 235 236 237 238
    if (!(max_flag & NO_MAX_RANGE) &&
	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
    {
      if (maybe_null && *max_value)
      {
	**max_key=1;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
239
	bzero(*max_key+1,length-1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
240 241
      }
      else
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
242 243
	memcpy(*max_key,max_value,length);
      (*max_key)+= length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
244 245 246 247 248 249
    }
  }

  void store_min_key(KEY_PART *key,char **range_key, uint *range_key_flag)
  {
    SEL_ARG *key_tree= first();
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
250
    key_tree->store(key[key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
251 252 253 254 255 256 257 258 259 260 261 262
		    range_key,*range_key_flag,range_key,NO_MAX_RANGE);
    *range_key_flag|= key_tree->min_flag;
    if (key_tree->next_key_part &&
	key_tree->next_key_part->part == key_tree->part+1 &&
	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)) &&
	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
      key_tree->next_key_part->store_min_key(key,range_key, range_key_flag);
  }

  void store_max_key(KEY_PART *key,char **range_key, uint *range_key_flag)
  {
    SEL_ARG *key_tree= last();
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
263
    key_tree->store(key[key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
		    range_key, NO_MIN_RANGE, range_key,*range_key_flag);
    (*range_key_flag)|= key_tree->max_flag;
    if (key_tree->next_key_part &&
	key_tree->next_key_part->part == key_tree->part+1 &&
	!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)) &&
	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
      key_tree->next_key_part->store_max_key(key,range_key, range_key_flag);
  }

  SEL_ARG *insert(SEL_ARG *key);
  SEL_ARG *tree_delete(SEL_ARG *key);
  SEL_ARG *find_range(SEL_ARG *key);
  SEL_ARG *rb_insert(SEL_ARG *leaf);
  friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
#ifdef EXTRA_DEBUG
  friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
  void test_use_count(SEL_ARG *root);
#endif
  SEL_ARG *first();
  SEL_ARG *last();
  void make_root();
  inline bool simple_key()
  {
    return !next_key_part && elements == 1;
  }
  void increment_use_count(long count)
  {
    if (next_key_part)
    {
      next_key_part->use_count+=count;
      count*= (next_key_part->use_count-count);
      for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next)
	if (pos->next_key_part)
	  pos->increment_use_count(count);
    }
  }
  void free_tree()
  {
    for (SEL_ARG *pos=first(); pos ; pos=pos->next)
      if (pos->next_key_part)
      {
	pos->next_key_part->use_count--;
	pos->next_key_part->free_tree();
      }
  }

  inline SEL_ARG **parent_ptr()
  {
    return parent->left == this ? &parent->left : &parent->right;
  }
  SEL_ARG *clone_tree();
315

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332

  /*
    Check if this SEL_ARG object represents a single-point interval

    SYNOPSIS
      is_singlepoint()
    
    DESCRIPTION
      Check if this SEL_ARG object (not tree) represents a single-point
      interval, i.e. if it represents a "keypart = const" or 
      "keypart IS NULL".

    RETURN
      TRUE   This SEL_ARG object represents a singlepoint interval
      FALSE  Otherwise
  */

333 334
  bool is_singlepoint()
  {
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
    /* 
      Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field) 
      flags, and the same for right edge.
    */
    if (min_flag || max_flag)
      return FALSE;
    byte *min_val= min_value;
    byte *max_val= min_value;

    if (maybe_null)
    {
      /* First byte is a NULL value indicator */
      if (*min_val != *max_val)
        return FALSE;

      if (*min_val)
        return TRUE; /* This "x IS NULL" */
      min_val++;
      max_val++;
    }
    return !field->key_cmp(min_val, max_val);
356
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
357 358
};

359
class SEL_IMERGE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
360

361

bk@work.mysql.com's avatar
bk@work.mysql.com committed
362 363 364
class SEL_TREE :public Sql_alloc
{
public:
365 366 367 368 369
  /*
    Starting an effort to document this field:
    (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => 
       (type == SEL_TREE::IMPOSSIBLE)
  */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
370 371
  enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
  SEL_TREE(enum Type type_arg) :type(type_arg) {}
372
  SEL_TREE() :type(KEY)
373
  {
374
    keys_map.clear_all();
375 376
    bzero((char*) keys,sizeof(keys));
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
377
  SEL_ARG *keys[MAX_KEY];
378 379
  key_map keys_map;        /* bitmask of non-NULL elements in keys */

380 381
  /*
    Possible ways to read rows using index_merge. The list is non-empty only
382 383 384
    if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
  */
  List<SEL_IMERGE> merges;
385

386 387
  /* The members below are filled/used only after get_mm_tree is done */
  key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
388
  uint    n_ror_scans;     /* number of set bits in ror_scans_map */
389 390 391 392

  struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
  struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
  /* Note that #records for each key scan is stored in table->quick_rows */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
393 394
};

395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
class RANGE_OPT_PARAM
{
public:
  THD	*thd;   /* Current thread handle */
  TABLE *table; /* Table being analyzed */
  COND *cond;   /* Used inside get_mm_tree(). */
  table_map prev_tables;
  table_map read_tables;
  table_map current_table; /* Bit of the table being analyzed */

  /* Array of parts of all keys for which range analysis is performed */
  KEY_PART *key_parts;
  KEY_PART *key_parts_end;
  MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
  MEM_ROOT *old_root; /* Memory that will last until the query end */
  /*
    Number of indexes used in range analysis (In SEL_TREE::keys only first
    #keys elements are not empty)
  */
  uint keys;
  
  /* 
    If true, the index descriptions describe real indexes (and it is ok to
    call field->optimize_range(real_keynr[...], ...).
    Otherwise index description describes fake indexes.
  */
  bool using_real_indexes;
  
423 424
  bool remove_jump_scans;
  
425 426 427 428 429 430
  /*
    used_key_no -> table_key_no translation table. Only makes sense if
    using_real_indexes==TRUE
  */
  uint real_keynr[MAX_KEY];
};
bk@work.mysql.com's avatar
bk@work.mysql.com committed
431

432 433 434
class PARAM : public RANGE_OPT_PARAM
{
public:
435
  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
436
  uint baseflag, max_key_part, range_count;
437

438

bk@work.mysql.com's avatar
bk@work.mysql.com committed
439 440
  char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
    max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
441
  bool quick;				// Don't calulate possible keys
442

443
  uint fields_bitmap_size;
444 445 446 447
  MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */

  key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */

448 449
  uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
  uint imerge_cost_buff_size; /* size of the buffer */
450

451
  /* TRUE if last checked tree->key can be used for ROR-scan */
452
  bool is_ror_scan;
453
};
bk@work.mysql.com's avatar
bk@work.mysql.com committed
454

455 456 457 458 459
class TABLE_READ_PLAN;
  class TRP_RANGE;
  class TRP_ROR_INTERSECT;
  class TRP_ROR_UNION;
  class TRP_ROR_INDEX_MERGE;
460
  class TRP_GROUP_MIN_MAX;
461 462 463

struct st_ror_scan_info;

464
static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
465 466
			       Item_func::Functype type,Item *value,
			       Item_result cmp_type);
467
static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
468
			    KEY_PART *key_part,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
469
			    Item_func::Functype type,Item *value);
470
static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond);
471 472

static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
473 474 475 476 477
static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
				char *min_key,uint min_key_flag,
				char *max_key, uint max_key_flag);

478
QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
479
                                     SEL_ARG *key_tree,
480
                                     MEM_ROOT *alloc = NULL);
481
static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
482
                                       bool index_read_must_be_used,
483 484 485 486 487 488
                                       double read_time);
static
TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
                                          double read_time,
                                          bool *are_all_covering);
static
489 490
TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
                                                   SEL_TREE *tree,
491 492 493 494
                                                   double read_time);
static
TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
                                         double read_time);
495 496
static
TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree);
497
static int get_index_merge_params(PARAM *param, key_map& needed_reg,
498
                           SEL_IMERGE *imerge, double *read_time,
499
                           ha_rows* imerge_rows);
500
static double get_index_only_read_time(const PARAM* param, ha_rows records,
501 502
                                       int keynr);

bk@work.mysql.com's avatar
bk@work.mysql.com committed
503
#ifndef DBUG_OFF
504 505
static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
                           const char *msg);
506 507
static void print_ror_scans_arr(TABLE *table, const char *msg,
                                struct st_ror_scan_info **start,
508 509 510
                                struct st_ror_scan_info **end);
static void print_rowid(byte* val, int len);
static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
511
#endif
512

513 514
static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
515 516 517 518
static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
519
bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
520 521 522 523 524
			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
			   char *max_key,uint max_key_flag);
static bool eq_tree(SEL_ARG* a,SEL_ARG *b);

static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
525
static bool null_part_in_key(KEY_PART *key_part, const char *key,
526
                             uint length);
527
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
528 529 530


/*
531
  SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
532
  a condition in the following form:
533
   (t_1||t_2||...||t_N) && (next)
534

535
  where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
536 537 538 539 540 541 542 543 544 545 546
  (t_i,t_j) contains SEL_ARGS for the same index.

  SEL_TREE contained in SEL_IMERGE always has merges=NULL.

  This class relies on memory manager to do the cleanup.
*/

class SEL_IMERGE : public Sql_alloc
{
  enum { PREALLOCED_TREES= 10};
public:
547
  SEL_TREE *trees_prealloced[PREALLOCED_TREES];
548 549 550 551 552 553 554 555 556 557 558
  SEL_TREE **trees;             /* trees used to do index_merge   */
  SEL_TREE **trees_next;        /* last of these trees            */
  SEL_TREE **trees_end;         /* end of allocated space         */

  SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */

  SEL_IMERGE() :
    trees(&trees_prealloced[0]),
    trees_next(trees),
    trees_end(trees + PREALLOCED_TREES)
  {}
559 560 561
  int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
  int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
  int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
562 563 564
};


565
/*
566 567
  Add SEL_TREE to this index_merge without any checks,

568 569
  NOTES
    This function implements the following:
570 571 572 573 574 575 576
      (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs

  RETURN
     0 - OK
    -1 - Out of memory.
*/

577
int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
{
  if (trees_next == trees_end)
  {
    const int realloc_ratio= 2;		/* Double size for next round */
    uint old_elements= (trees_end - trees);
    uint old_size= sizeof(SEL_TREE**) * old_elements;
    uint new_size= old_size * realloc_ratio;
    SEL_TREE **new_trees;
    if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
      return -1;
    memcpy(new_trees, trees, old_size);
    trees=      new_trees;
    trees_next= trees + old_elements;
    trees_end=  trees + old_elements * realloc_ratio;
  }
  *(trees_next++)= tree;
  return 0;
}


/*
  Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
  combining new_tree with one of the trees in this SEL_IMERGE if they both
  have SEL_ARGs for the same key.
602

603 604 605 606 607
  SYNOPSIS
    or_sel_tree_with_checks()
      param    PARAM from SQL_SELECT::test_quick_select
      new_tree SEL_TREE with type KEY or KEY_SMALLER.

608
  NOTES
609
    This does the following:
610 611
    (t_1||...||t_k)||new_tree =
     either
612 613 614
       = (t_1||...||t_k||new_tree)
     or
       = (t_1||....||(t_j|| new_tree)||...||t_k),
615

616
     where t_i, y are SEL_TREEs.
617 618
    new_tree is combined with the first t_j it has a SEL_ARG on common
    key with. As a consequence of this, choice of keys to do index_merge
619 620
    read may depend on the order of conditions in WHERE part of the query.

621
  RETURN
622
    0  OK
623
    1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
624 625 626 627
       and (*this) should be discarded.
   -1  An error occurred.
*/

628
int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
{
  for (SEL_TREE** tree = trees;
       tree != trees_next;
       tree++)
  {
    if (sel_trees_can_be_ored(*tree, new_tree, param))
    {
      *tree = tree_or(param, *tree, new_tree);
      if (!*tree)
        return 1;
      if (((*tree)->type == SEL_TREE::MAYBE) ||
          ((*tree)->type == SEL_TREE::ALWAYS))
        return 1;
      /* SEL_TREE::IMPOSSIBLE is impossible here */
      return 0;
    }
  }

647
  /* New tree cannot be combined with any of existing trees. */
648 649 650 651 652 653 654 655 656
  return or_sel_tree(param, new_tree);
}


/*
  Perform OR operation on this index_merge and supplied index_merge list.

  RETURN
    0 - OK
657
    1 - One of conditions in result is always TRUE and this SEL_IMERGE
658 659 660 661
        should be discarded.
   -1 - An error occurred
*/

662
int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
663 664 665 666 667 668 669 670 671 672 673 674
{
  for (SEL_TREE** tree= imerge->trees;
       tree != imerge->trees_next;
       tree++)
  {
    if (or_sel_tree_with_checks(param, *tree))
      return 1;
  }
  return 0;
}


675
/*
676
  Perform AND operation on two index_merge lists and store result in *im1.
677 678 679 680 681 682 683 684 685 686 687
*/

inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
{
  im1->concat(im2);
}


/*
  Perform OR operation on 2 index_merge lists, storing result in first list.

688
  NOTES
689 690 691
    The following conversion is implemented:
     (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
      => (a_1||b_1).
692 693

    i.e. all conjuncts except the first one are currently dropped.
694 695
    This is done to avoid producing N*K ways to do index_merge.

monty@mysql.com's avatar
monty@mysql.com committed
696
    If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
697
    and index_merge is discarded (while it is actually possible to try
698
    harder).
699

700 701
    As a consequence of this, choice of keys to do index_merge read may depend
    on the order of conditions in WHERE part of the query.
702 703

  RETURN
704
    0     OK, result is stored in *im1
705 706 707
    other Error, both passed lists are unusable
*/

708
int imerge_list_or_list(RANGE_OPT_PARAM *param,
709 710 711 712 713 714
                        List<SEL_IMERGE> *im1,
                        List<SEL_IMERGE> *im2)
{
  SEL_IMERGE *imerge= im1->head();
  im1->empty();
  im1->push_back(imerge);
715

716 717 718 719 720 721 722 723
  return imerge->or_sel_imerge_with_checks(param, im2->head());
}


/*
  Perform OR operation on index_merge list and key tree.

  RETURN
724
    0     OK, result is stored in *im1.
725 726 727
    other Error
*/

728
int imerge_list_or_tree(RANGE_OPT_PARAM *param,
729 730 731 732 733
                        List<SEL_IMERGE> *im1,
                        SEL_TREE *tree)
{
  SEL_IMERGE *imerge;
  List_iterator<SEL_IMERGE> it(*im1);
monty@mishka.local's avatar
monty@mishka.local committed
734
  while ((imerge= it++))
735 736 737 738 739 740
  {
    if (imerge->or_sel_tree_with_checks(param, tree))
      it.remove();
  }
  return im1->is_empty();
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
741 742

/***************************************************************************
743
** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
bk@work.mysql.com's avatar
bk@work.mysql.com committed
744 745 746 747 748 749 750 751 752
***************************************************************************/

	/* make a select from mysql info
	   Error is set as following:
	   0 = ok
	   1 = Got some error (out of memory?)
	   */

SQL_SELECT *make_select(TABLE *head, table_map const_tables,
monty@mysql.com's avatar
monty@mysql.com committed
753 754 755 756
			table_map read_tables, COND *conds,
                        bool allow_null_cond,
                        int *error)
                        
bk@work.mysql.com's avatar
bk@work.mysql.com committed
757 758 759 760 761
{
  SQL_SELECT *select;
  DBUG_ENTER("make_select");

  *error=0;
762 763

  if (!conds && !allow_null_cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
764 765 766
    DBUG_RETURN(0);
  if (!(select= new SQL_SELECT))
  {
767 768
    *error= 1;			// out of memory
    DBUG_RETURN(0);		/* purecov: inspected */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
769 770 771 772 773 774
  }
  select->read_tables=read_tables;
  select->const_tables=const_tables;
  select->head=head;
  select->cond=conds;

igor@hundin.mysql.fi's avatar
igor@hundin.mysql.fi committed
775
  if (head->sort.io_cache)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
776
  {
igor@hundin.mysql.fi's avatar
igor@hundin.mysql.fi committed
777
    select->file= *head->sort.io_cache;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
778 779
    select->records=(ha_rows) (select->file.end_of_file/
			       head->file->ref_length);
igor@hundin.mysql.fi's avatar
igor@hundin.mysql.fi committed
780 781
    my_free((gptr) (head->sort.io_cache),MYF(0));
    head->sort.io_cache=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
782 783 784 785 786 787 788
  }
  DBUG_RETURN(select);
}


SQL_SELECT::SQL_SELECT() :quick(0),cond(0),free_cond(0)
{
serg@serg.mylan's avatar
serg@serg.mylan committed
789
  quick_keys.clear_all(); needed_reg.clear_all();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
790 791 792 793
  my_b_clear(&file);
}


794
void SQL_SELECT::cleanup()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
795 796
{
  delete quick;
797
  quick= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
798
  if (free_cond)
799 800
  {
    free_cond=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
801
    delete cond;
802
    cond= 0;
803
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
804 805 806
  close_cached_file(&file);
}

807 808 809 810 811 812

SQL_SELECT::~SQL_SELECT()
{
  cleanup();
}

813
#undef index					// Fix for Unixware 7
bk@work.mysql.com's avatar
bk@work.mysql.com committed
814

sergefp@mysql.com's avatar
sergefp@mysql.com committed
815 816 817 818 819
QUICK_SELECT_I::QUICK_SELECT_I()
  :max_used_key_length(0),
   used_key_parts(0)
{}

820
QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
sergefp@mysql.com's avatar
sergefp@mysql.com committed
821
                                       bool no_alloc, MEM_ROOT *parent_alloc)
822
  :dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),range(0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
823
{
monty@mysql.com's avatar
monty@mysql.com committed
824
  sorted= 0;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
825 826
  index= key_nr;
  head=  table;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
827
  key_part_info= head->key_info[index].key_part;
828
  my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
829

sergefp@mysql.com's avatar
sergefp@mysql.com committed
830
  /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
ingo@mysql.com's avatar
ingo@mysql.com committed
831 832 833 834 835 836
  multi_range_bufsiz= thd->variables.read_rnd_buff_size;
  multi_range_count= thd->variables.multi_range_count;
  multi_range_length= 0;
  multi_range= NULL;
  multi_range_buff= NULL;

sergefp@mysql.com's avatar
sergefp@mysql.com committed
837
  if (!no_alloc && !parent_alloc)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
838
  {
839 840
    // Allocates everything through the internal memroot
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
841
    thd->mem_root= &alloc;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
842 843 844
  }
  else
    bzero((char*) &alloc,sizeof(alloc));
845 846
  file= head->file;
  record= head->record[0];
bk@work.mysql.com's avatar
bk@work.mysql.com committed
847 848
}

monty@mysql.com's avatar
monty@mysql.com committed
849

850 851
int QUICK_RANGE_SELECT::init()
{
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
852
  DBUG_ENTER("QUICK_RANGE_SELECT::init");
ingo@mysql.com's avatar
ingo@mysql.com committed
853

854 855
  if (file->inited != handler::NONE)
    file->ha_index_or_rnd_end();
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
856
  DBUG_RETURN(error= file->ha_index_init(index, 1));
monty@mysql.com's avatar
monty@mysql.com committed
857 858 859 860 861 862
}


void QUICK_RANGE_SELECT::range_end()
{
  if (file->inited != handler::NONE)
863
    file->ha_index_or_rnd_end();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
864 865
}

monty@mysql.com's avatar
monty@mysql.com committed
866

867
QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
868
{
869
  DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
870 871
  if (!dont_free)
  {
872 873
    /* file is NULL for CPK scan on covering ROR-intersection */
    if (file) 
874
    {
875 876 877 878 879 880
      range_end();
      file->extra(HA_EXTRA_NO_KEYREAD);
      if (free_file)
      {
        DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
                            free_file));
881
        file->ha_reset();
882
        file->external_lock(current_thd, F_UNLCK);
883
        file->close();
884
        delete file;
885
      }
886
    }
887
    delete_dynamic(&ranges); /* ranges are allocated in alloc */
888 889
    free_root(&alloc,MYF(0));
  }
ingo@mysql.com's avatar
ingo@mysql.com committed
890 891 892 893
  if (multi_range)
    my_free((char*) multi_range, MYF(0));
  if (multi_range_buff)
    my_free((char*) multi_range_buff, MYF(0));
894
  DBUG_VOID_RETURN;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
895 896
}

897

898
QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
899
                                                   TABLE *table)
sergefp@mysql.com's avatar
sergefp@mysql.com committed
900
  :pk_quick_select(NULL), thd(thd_param)
901
{
902
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
903 904
  index= MAX_KEY;
  head= table;
905
  bzero(&read_record, sizeof(read_record));
906
  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
907
  DBUG_VOID_RETURN;
908 909 910 911
}

int QUICK_INDEX_MERGE_SELECT::init()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
912 913
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
  DBUG_RETURN(0);
914 915
}

916
int QUICK_INDEX_MERGE_SELECT::reset()
917
{
918
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
sergefp@mysql.com's avatar
sergefp@mysql.com committed
919
  DBUG_RETURN(read_keys_and_merge());
920 921
}

922
bool
923 924
QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
{
925 926
  /*
    Save quick_select that does scan on clustered primary key as it will be
927
    processed separately.
928
  */
929
  if (head->file->primary_key_is_clustered() &&
930
      quick_sel_range->index == head->s->primary_key)
931 932 933 934
    pk_quick_select= quick_sel_range;
  else
    return quick_selects.push_back(quick_sel_range);
  return 0;
935 936 937 938
}

QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
939 940
  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
  QUICK_RANGE_SELECT* quick;
941
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
sergefp@mysql.com's avatar
sergefp@mysql.com committed
942 943 944
  quick_it.rewind();
  while ((quick= quick_it++))
    quick->file= NULL;
945
  quick_selects.delete_elements();
946
  delete pk_quick_select;
947
  free_root(&alloc,MYF(0));
948
  DBUG_VOID_RETURN;
949 950
}

951 952 953 954 955

QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
                                                       TABLE *table,
                                                       bool retrieve_full_rows,
                                                       MEM_ROOT *parent_alloc)
sergefp@mysql.com's avatar
sergefp@mysql.com committed
956
  : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
957
    scans_inited(FALSE)
958 959
{
  index= MAX_KEY;
960
  head= table;
961 962
  record= head->record[0];
  if (!parent_alloc)
963
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
964 965
  else
    bzero(&alloc, sizeof(MEM_ROOT));
966
  last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc,
967 968 969
                                head->file->ref_length);
}

970

971
/*
972 973 974
  Do post-constructor initialization.
  SYNOPSIS
    QUICK_ROR_INTERSECT_SELECT::init()
975

976 977 978 979 980
  RETURN
    0      OK
    other  Error code
*/

981 982
int QUICK_ROR_INTERSECT_SELECT::init()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
983 984 985
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
 /* Check if last_rowid was successfully allocated in ctor */
  DBUG_RETURN(!last_rowid);
986 987 988 989
}


/*
990 991 992 993
  Initialize this quick select to be a ROR-merged scan.

  SYNOPSIS
    QUICK_RANGE_SELECT::init_ror_merged_scan()
monty@mysql.com's avatar
monty@mysql.com committed
994
      reuse_handler If TRUE, use head->file, otherwise create a separate
995 996 997 998
                    handler object

  NOTES
    This function creates and prepares for subsequent use a separate handler
999
    object if it can't reuse head->file. The reason for this is that during
1000 1001 1002
    ROR-merge several key scans are performed simultaneously, and a single
    handler is only capable of preserving context of a single key scan.

1003
    In ROR-merge the quick select doing merge does full records retrieval,
1004
    merged quick selects read only keys.
1005 1006

  RETURN
1007 1008 1009 1010
    0  ROR child scan initialized, ok to use.
    1  error
*/

1011
int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1012 1013
{
  handler *save_file= file;
1014
  THD *thd;
1015
  DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1016

1017 1018 1019 1020
  if (reuse_handler)
  {
    DBUG_PRINT("info", ("Reusing handler %p", file));
    if (file->extra(HA_EXTRA_KEYREAD) ||
1021
        file->ha_retrieve_all_pk() ||
1022 1023 1024 1025
        init() || reset())
    {
      DBUG_RETURN(1);
    }
monty@mysql.com's avatar
monty@mysql.com committed
1026
    DBUG_RETURN(0);
1027 1028 1029 1030 1031 1032 1033 1034
  }

  /* Create a separate handler object for this quick select */
  if (free_file)
  {
    /* already have own 'handler' object. */
    DBUG_RETURN(0);
  }
1035

1036 1037
  thd= head->in_use;
  if (!(file= get_new_handler(head->s, thd->mem_root, head->s->db_type)))
1038 1039
    goto failure;
  DBUG_PRINT("info", ("Allocated new handler %p", file));
1040 1041
  if (file->ha_open(head, head->s->normalized_path.str, head->db_stat,
                    HA_OPEN_IGNORE_IF_LOCKED))
1042
  {
1043
    /* Caller will free the memory */
1044 1045
    goto failure;
  }
1046 1047
  if (file->external_lock(thd, F_RDLCK))
    goto failure;
1048 1049

  if (file->extra(HA_EXTRA_KEYREAD) ||
1050
      file->ha_retrieve_all_pk() ||
1051 1052
      init() || reset())
  {
1053
    file->external_lock(thd, F_UNLCK);
1054 1055 1056
    file->close();
    goto failure;
  }
monty@mysql.com's avatar
monty@mysql.com committed
1057
  free_file= TRUE;
1058 1059 1060 1061
  last_rowid= file->ref;
  DBUG_RETURN(0);

failure:
1062 1063
  if (file)
    delete file;
1064 1065 1066 1067
  file= save_file;
  DBUG_RETURN(1);
}

1068 1069 1070 1071 1072

/*
  Initialize this quick select to be a part of a ROR-merged scan.
  SYNOPSIS
    QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
monty@mysql.com's avatar
monty@mysql.com committed
1073
      reuse_handler If TRUE, use head->file, otherwise create separate
1074
                    handler object.
1075
  RETURN
1076 1077 1078 1079
    0     OK
    other error code
*/
int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1080 1081 1082
{
  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
  QUICK_RANGE_SELECT* quick;
1083
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1084 1085

  /* Initialize all merged "children" quick selects */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1086
  DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1087 1088 1089
  if (!need_to_fetch_row && reuse_handler)
  {
    quick= quick_it++;
1090
    /*
1091
      There is no use of this->file. Use it for the first of merged range
1092 1093
      selects.
    */
monty@mysql.com's avatar
monty@mysql.com committed
1094
    if (quick->init_ror_merged_scan(TRUE))
1095 1096 1097
      DBUG_RETURN(1);
    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
  }
monty@mishka.local's avatar
monty@mishka.local committed
1098
  while ((quick= quick_it++))
1099
  {
monty@mysql.com's avatar
monty@mysql.com committed
1100
    if (quick->init_ror_merged_scan(FALSE))
1101 1102
      DBUG_RETURN(1);
    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1103
    /* All merged scans share the same record buffer in intersection. */
1104 1105 1106
    quick->record= head->record[0];
  }

monty@mysql.com's avatar
monty@mysql.com committed
1107
  if (need_to_fetch_row && head->file->ha_rnd_init(1))
1108 1109 1110 1111 1112 1113 1114
  {
    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
    DBUG_RETURN(1);
  }
  DBUG_RETURN(0);
}

1115

1116
/*
1117 1118 1119 1120 1121 1122 1123 1124
  Initialize quick select for row retrieval.
  SYNOPSIS
    reset()
  RETURN
    0      OK
    other  Error code
*/

1125 1126 1127
int QUICK_ROR_INTERSECT_SELECT::reset()
{
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1128 1129
  if (!scans_inited && init_ror_merged_scan(TRUE))
    DBUG_RETURN(1);
1130
  scans_inited= TRUE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1131 1132 1133 1134 1135
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  QUICK_RANGE_SELECT *quick;
  while ((quick= it++))
    quick->reset();
  DBUG_RETURN(0);
1136 1137
}

1138 1139 1140

/*
  Add a merged quick select to this ROR-intersection quick select.
1141

1142 1143 1144 1145 1146 1147
  SYNOPSIS
    QUICK_ROR_INTERSECT_SELECT::push_quick_back()
      quick Quick select to be added. The quick select must return
            rows in rowid order.
  NOTES
    This call can only be made before init() is called.
1148

1149
  RETURN
1150
    FALSE OK
monty@mysql.com's avatar
monty@mysql.com committed
1151
    TRUE  Out of memory.
1152 1153
*/

1154
bool
1155 1156
QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
{
1157
  return quick_selects.push_back(quick);
1158 1159 1160
}

QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1161
{
1162
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1163
  quick_selects.delete_elements();
1164 1165
  delete cpk_quick;
  free_root(&alloc,MYF(0));
monty@mysql.com's avatar
monty@mysql.com committed
1166 1167
  if (need_to_fetch_row && head->file->inited != handler::NONE)
    head->file->ha_rnd_end();
1168 1169 1170
  DBUG_VOID_RETURN;
}

monty@mysql.com's avatar
monty@mysql.com committed
1171

1172 1173
QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
                                               TABLE *table)
1174
  : thd(thd_param), scans_inited(FALSE)
1175 1176 1177 1178 1179 1180
{
  index= MAX_KEY;
  head= table;
  rowid_length= table->file->ref_length;
  record= head->record[0];
  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
monty@mysql.com's avatar
monty@mysql.com committed
1181
  thd_param->mem_root= &alloc;
1182 1183
}

1184 1185 1186 1187 1188

/*
  Do post-constructor initialization.
  SYNOPSIS
    QUICK_ROR_UNION_SELECT::init()
1189

1190 1191 1192 1193 1194
  RETURN
    0      OK
    other  Error code
*/

1195 1196
int QUICK_ROR_UNION_SELECT::init()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1197
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1198
  if (init_queue(&queue, quick_selects.elements, 0,
monty@mysql.com's avatar
monty@mysql.com committed
1199
                 FALSE , QUICK_ROR_UNION_SELECT::queue_cmp,
1200 1201 1202
                 (void*) this))
  {
    bzero(&queue, sizeof(QUEUE));
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1203
    DBUG_RETURN(1);
1204
  }
1205

1206
  if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length)))
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1207
    DBUG_RETURN(1);
1208
  prev_rowid= cur_rowid + head->file->ref_length;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1209
  DBUG_RETURN(0);
1210 1211
}

1212

1213
/*
1214
  Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1215 1216
  queue.

1217 1218 1219 1220 1221 1222
  SYNPOSIS
    QUICK_ROR_UNION_SELECT::queue_cmp()
      arg   Pointer to QUICK_ROR_UNION_SELECT
      val1  First merged select
      val2  Second merged select
*/
1223

1224 1225
int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2)
{
1226
  QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1227 1228 1229 1230
  return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
                                   ((QUICK_SELECT_I*)val2)->last_rowid);
}

1231

1232
/*
1233 1234 1235
  Initialize quick select for row retrieval.
  SYNOPSIS
    reset()
1236

1237 1238 1239 1240 1241
  RETURN
    0      OK
    other  Error code
*/

1242 1243 1244 1245 1246
int QUICK_ROR_UNION_SELECT::reset()
{
  QUICK_SELECT_I* quick;
  int error;
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
monty@mysql.com's avatar
monty@mysql.com committed
1247
  have_prev_rowid= FALSE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1248 1249 1250 1251 1252 1253 1254 1255 1256
  if (!scans_inited)
  {
    QUICK_SELECT_I *quick;
    List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
    while ((quick= it++))
    {
      if (quick->init_ror_merged_scan(FALSE))
        DBUG_RETURN(1);
    }
1257
    scans_inited= TRUE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1258 1259
  }
  queue_remove_all(&queue);
1260 1261
  /*
    Initialize scans for merged quick selects and put all merged quick
1262 1263 1264 1265 1266
    selects into the queue.
  */
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  while ((quick= it++))
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1267
    if (quick->reset())
1268
      DBUG_RETURN(1);
1269 1270 1271 1272
    if ((error= quick->get_next()))
    {
      if (error == HA_ERR_END_OF_FILE)
        continue;
monty@mysql.com's avatar
monty@mysql.com committed
1273
      DBUG_RETURN(error);
1274 1275 1276 1277 1278
    }
    quick->save_last_pos();
    queue_insert(&queue, (byte*)quick);
  }

monty@mysql.com's avatar
monty@mysql.com committed
1279
  if (head->file->ha_rnd_init(1))
1280 1281 1282 1283 1284 1285 1286 1287 1288
  {
    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
    DBUG_RETURN(1);
  }

  DBUG_RETURN(0);
}


1289
bool
1290 1291 1292 1293 1294 1295 1296 1297 1298
QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
{
  return quick_selects.push_back(quick_sel_range);
}

QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
{
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
  delete_queue(&queue);
1299
  quick_selects.delete_elements();
1300 1301
  if (head->file->inited != handler::NONE)
    head->file->ha_rnd_end();
1302 1303
  free_root(&alloc,MYF(0));
  DBUG_VOID_RETURN;
1304 1305
}

1306

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
QUICK_RANGE::QUICK_RANGE()
  :min_key(0),max_key(0),min_length(0),max_length(0),
   flag(NO_MIN_RANGE | NO_MAX_RANGE)
{}

SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
{
  type=arg.type;
  min_flag=arg.min_flag;
  max_flag=arg.max_flag;
  maybe_flag=arg.maybe_flag;
  maybe_null=arg.maybe_null;
  part=arg.part;
  field=arg.field;
  min_value=arg.min_value;
  max_value=arg.max_value;
  next_key_part=arg.next_key_part;
  use_count=1; elements=1;
}


inline void SEL_ARG::make_root()
{
  left=right= &null_element;
  color=BLACK;
  next=prev=0;
  use_count=0; elements=1;
}

SEL_ARG::SEL_ARG(Field *f,const char *min_value_arg,const char *max_value_arg)
  :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
   elements(1), use_count(1), field(f), min_value((char*) min_value_arg),
   max_value((char*) max_value_arg), next(0),prev(0),
   next_key_part(0),color(BLACK),type(KEY_RANGE)
{
  left=right= &null_element;
}

SEL_ARG::SEL_ARG(Field *field_,uint8 part_,char *min_value_,char *max_value_,
		 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
  :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
   part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
   field(field_), min_value(min_value_), max_value(max_value_),
   next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
{
  left=right= &null_element;
}

SEL_ARG *SEL_ARG::clone(SEL_ARG *new_parent,SEL_ARG **next_arg)
{
  SEL_ARG *tmp;
  if (type != KEY_RANGE)
  {
1360 1361
    if (!(tmp= new SEL_ARG(type)))
      return 0;					// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1362 1363 1364 1365 1366 1367
    tmp->prev= *next_arg;			// Link into next/prev chain
    (*next_arg)->next=tmp;
    (*next_arg)= tmp;
  }
  else
  {
1368 1369 1370
    if (!(tmp= new SEL_ARG(field,part, min_value,max_value,
			   min_flag, max_flag, maybe_flag)))
      return 0;					// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
    tmp->parent=new_parent;
    tmp->next_key_part=next_key_part;
    if (left != &null_element)
      tmp->left=left->clone(tmp,next_arg);

    tmp->prev= *next_arg;			// Link into next/prev chain
    (*next_arg)->next=tmp;
    (*next_arg)= tmp;

    if (right != &null_element)
1381 1382
      if (!(tmp->right= right->clone(tmp,next_arg)))
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1383 1384
  }
  increment_use_count(1);
1385
  tmp->color= color;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
  return tmp;
}

SEL_ARG *SEL_ARG::first()
{
  SEL_ARG *next_arg=this;
  if (!next_arg->left)
    return 0;					// MAYBE_KEY
  while (next_arg->left != &null_element)
    next_arg=next_arg->left;
  return next_arg;
}

SEL_ARG *SEL_ARG::last()
{
  SEL_ARG *next_arg=this;
  if (!next_arg->right)
    return 0;					// MAYBE_KEY
  while (next_arg->right != &null_element)
    next_arg=next_arg->right;
  return next_arg;
}

1409

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1410 1411 1412
/*
  Check if a compare is ok, when one takes ranges in account
  Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
1413
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436

static int sel_cmp(Field *field, char *a,char *b,uint8 a_flag,uint8 b_flag)
{
  int cmp;
  /* First check if there was a compare to a min or max element */
  if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
  {
    if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
	(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
      return 0;
    return (a_flag & NO_MIN_RANGE) ? -1 : 1;
  }
  if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
    return (b_flag & NO_MIN_RANGE) ? 1 : -1;

  if (field->real_maybe_null())			// If null is part of key
  {
    if (*a != *b)
    {
      return *a ? -1 : 1;
    }
    if (*a)
      goto end;					// NULL where equal
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1437
    a++; b++;					// Skip NULL marker
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
  }
  cmp=field->key_cmp((byte*) a,(byte*) b);
  if (cmp) return cmp < 0 ? -1 : 1;		// The values differed

  // Check if the compared equal arguments was defined with open/closed range
 end:
  if (a_flag & (NEAR_MIN | NEAR_MAX))
  {
    if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
      return 0;
    if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
      return (a_flag & NEAR_MIN) ? 2 : -2;
    return (a_flag & NEAR_MIN) ? 1 : -1;
  }
  if (b_flag & (NEAR_MIN | NEAR_MAX))
    return (b_flag & NEAR_MIN) ? -2 : 2;
  return 0;					// The elements where equal
}


SEL_ARG *SEL_ARG::clone_tree()
{
  SEL_ARG tmp_link,*next_arg,*root;
  next_arg= &tmp_link;
1462
  root= clone((SEL_ARG *) 0, &next_arg);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1463 1464
  next_arg->next=0;				// Fix last link
  tmp_link.next->prev=0;			// Fix first link
1465 1466
  if (root)					// If not OOM
    root->use_count= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1467 1468 1469
  return root;
}

1470

1471
/*
1472
  Find the best index to retrieve first N records in given order
1473 1474 1475 1476 1477 1478 1479 1480

  SYNOPSIS
    get_index_for_order()
      table  Table to be accessed
      order  Required ordering
      limit  Number of records that will be retrieved

  DESCRIPTION
1481 1482 1483 1484
    Find the best index that allows to retrieve first #limit records in the 
    given order cheaper then one would retrieve them using full table scan.

  IMPLEMENTATION
1485
    Run through all table indexes and find the shortest index that allows
1486 1487
    records to be retrieved in given order. We look for the shortest index
    as we will have fewer index pages to read with it.
1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509

    This function is used only by UPDATE/DELETE, so we take into account how
    the UPDATE/DELETE code will work:
     * index can only be scanned in forward direction
     * HA_EXTRA_KEYREAD will not be used
    Perhaps these assumptions could be relaxed

  RETURN
    index number
    MAX_KEY if no such index was found.
*/

uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit)
{
  uint idx;
  uint match_key= MAX_KEY, match_key_len= MAX_KEY_LENGTH + 1;
  ORDER *ord;
  
  for (ord= order; ord; ord= ord->next)
    if (!ord->asc)
      return MAX_KEY;

sergefp@mysql.com's avatar
sergefp@mysql.com committed
1510
  for (idx= 0; idx < table->s->keys; idx++)
1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559
  {
    if (!(table->keys_in_use_for_query.is_set(idx)))
      continue;
    KEY_PART_INFO *keyinfo= table->key_info[idx].key_part;
    uint partno= 0;
    
    /* 
      The below check is sufficient considering we now have either BTREE 
      indexes (records are returned in order for any index prefix) or HASH 
      indexes (records are not returned in order for any index prefix).
    */
    if (!(table->file->index_flags(idx, 0, 1) & HA_READ_ORDER))
      continue;
    for (ord= order; ord; ord= ord->next, partno++)
    {
      Item *item= order->item[0];
      if (!(item->type() == Item::FIELD_ITEM &&
           ((Item_field*)item)->field->eq(keyinfo[partno].field)))
        break;
    }
    
    if (!ord && table->key_info[idx].key_length < match_key_len)
    {
      /* 
        Ok, the ordering is compatible and this key is shorter then
        previous match (we want shorter keys as we'll have to read fewer
        index pages for the same number of records)
      */
      match_key= idx;
      match_key_len= table->key_info[idx].key_length;
    }
  }

  if (match_key != MAX_KEY)
  {
    /* 
      Found an index that allows records to be retrieved in the requested 
      order. Now we'll check if using the index is cheaper then doing a table
      scan.
    */
    double full_scan_time= table->file->scan_time();
    double index_scan_time= table->file->read_time(match_key, 1, limit);
    if (index_scan_time > full_scan_time)
      match_key= MAX_KEY;
  }
  return match_key;
}


serg@serg.mylan's avatar
serg@serg.mylan committed
1560
/*
1561
  Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
1562 1563 1564 1565 1566
  objects from table read plans.
*/
class TABLE_READ_PLAN
{
public:
1567 1568
  /*
    Plan read cost, with or without cost of full row retrieval, depending
1569 1570
    on plan creation parameters.
  */
1571
  double read_cost;
1572
  ha_rows records; /* estimate of #rows to be examined */
serg@serg.mylan's avatar
serg@serg.mylan committed
1573

1574 1575
  /*
    If TRUE, the scan returns rows in rowid order. This is used only for
1576 1577
    scans that can be both ROR and non-ROR.
  */
1578
  bool is_ror;
1579

1580 1581 1582 1583 1584
  /*
    Create quick select for this plan.
    SYNOPSIS
     make_quick()
       param               Parameter from test_quick_select
monty@mysql.com's avatar
monty@mysql.com committed
1585
       retrieve_full_rows  If TRUE, created quick select will do full record
1586 1587
                           retrieval.
       parent_alloc        Memory pool to use, if any.
1588

1589 1590
    NOTES
      retrieve_full_rows is ignored by some implementations.
1591 1592

    RETURN
1593 1594 1595
      created quick select
      NULL on any error.
  */
1596 1597 1598 1599
  virtual QUICK_SELECT_I *make_quick(PARAM *param,
                                     bool retrieve_full_rows,
                                     MEM_ROOT *parent_alloc=NULL) = 0;

1600
  /* Table read plans are allocated on MEM_ROOT and are never deleted */
1601 1602
  static void *operator new(size_t size, MEM_ROOT *mem_root)
  { return (void*) alloc_root(mem_root, (uint) size); }
1603
  static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
1604
  static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
1605 1606 1607 1608 1609 1610 1611
};

class TRP_ROR_INTERSECT;
class TRP_ROR_UNION;
class TRP_INDEX_MERGE;


1612
/*
1613
  Plan for a QUICK_RANGE_SELECT scan.
1614 1615 1616
  TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
  QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
  record retrieval scans.
serg@serg.mylan's avatar
serg@serg.mylan committed
1617
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1618

1619
class TRP_RANGE : public TABLE_READ_PLAN
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1620
{
1621
public:
1622 1623
  SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
  uint     key_idx; /* key number in PARAM::key */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1624

1625
  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
1626 1627
   : key(key_arg), key_idx(idx_arg)
  {}
1628

1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
                             MEM_ROOT *parent_alloc)
  {
    DBUG_ENTER("TRP_RANGE::make_quick");
    QUICK_RANGE_SELECT *quick;
    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
    {
      quick->records= records;
      quick->read_time= read_cost;
    }
    DBUG_RETURN(quick);
  }
};
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1642 1643


1644 1645
/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */

1646 1647 1648
class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
{
public:
1649
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
1650
                             MEM_ROOT *parent_alloc);
1651

1652
  /* Array of pointers to ROR range scans used in this intersection */
1653
  struct st_ror_scan_info **first_scan;
1654 1655
  struct st_ror_scan_info **last_scan; /* End of the above array */
  struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
monty@mysql.com's avatar
monty@mysql.com committed
1656
  bool is_covering; /* TRUE if no row retrieval phase is necessary */
1657
  double index_scan_costs; /* SUM(cost(index_scan)) */
1658 1659
};

1660

1661
/*
1662 1663
  Plan for QUICK_ROR_UNION_SELECT scan.
  QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
1664
  is ignored by make_quick.
1665
*/
1666

1667 1668 1669
class TRP_ROR_UNION : public TABLE_READ_PLAN
{
public:
1670
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
1671
                             MEM_ROOT *parent_alloc);
1672 1673
  TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
  TABLE_READ_PLAN **last_ror;  /* end of the above array */
1674 1675
};

1676 1677 1678 1679

/*
  Plan for QUICK_INDEX_MERGE_SELECT scan.
  QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
1680
  is ignored by make_quick.
1681 1682
*/

1683 1684 1685
class TRP_INDEX_MERGE : public TABLE_READ_PLAN
{
public:
1686
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
1687
                             MEM_ROOT *parent_alloc);
1688 1689
  TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
  TRP_RANGE **range_scans_end; /* end of the array */
1690 1691 1692
};


1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715
/*
  Plan for a QUICK_GROUP_MIN_MAX_SELECT scan. 
*/

class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
{
private:
  bool have_min, have_max;
  KEY_PART_INFO *min_max_arg_part;
  uint group_prefix_len;
  uint used_key_parts;
  uint group_key_parts;
  KEY *index_info;
  uint index;
  uint key_infix_len;
  byte key_infix[MAX_KEY_LENGTH];
  SEL_TREE *range_tree; /* Represents all range predicates in the query. */
  SEL_ARG  *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
  uint param_idx; /* Index of used key in param->key. */
  /* Number of records selected by the ranges in index_tree. */
public:
  ha_rows quick_prefix_records;
public:
1716 1717 1718 1719
  TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
                    KEY_PART_INFO *min_max_arg_part_arg,
                    uint group_prefix_len_arg, uint used_key_parts_arg,
                    uint group_key_parts_arg, KEY *index_info_arg,
1720 1721
                    uint index_arg, uint key_infix_len_arg,
                    byte *key_infix_arg,
1722 1723 1724 1725 1726 1727 1728 1729 1730
                    SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
                    uint param_idx_arg, ha_rows quick_prefix_records_arg)
  : have_min(have_min_arg), have_max(have_max_arg),
    min_max_arg_part(min_max_arg_part_arg),
    group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
    group_key_parts(group_key_parts_arg), index_info(index_info_arg),
    index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
    index_tree(index_tree_arg), param_idx(param_idx_arg),
    quick_prefix_records(quick_prefix_records_arg)
1731 1732 1733 1734
    {
      if (key_infix_len)
        memcpy(this->key_infix, key_infix_arg, key_infix_len);
    }
1735 1736 1737 1738 1739 1740

  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
                             MEM_ROOT *parent_alloc);
};


1741
/*
1742
  Fill param->needed_fields with bitmap of fields used in the query.
1743
  SYNOPSIS
1744 1745
    fill_used_fields_bitmap()
      param Parameter from test_quick_select function.
1746

1747 1748 1749
  NOTES
    Clustered PK members are not put into the bitmap as they are implicitly
    present in all keys (and it is impossible to avoid reading them).
1750 1751 1752
  RETURN
    0  Ok
    1  Out of memory.
1753 1754 1755 1756 1757
*/

static int fill_used_fields_bitmap(PARAM *param)
{
  TABLE *table= param->table;
monty@mysql.com's avatar
monty@mysql.com committed
1758
  param->fields_bitmap_size= bitmap_buffer_size(table->s->fields+1);
1759
  uint32 *tmp;
1760
  uint pk;
monty@mysql.com's avatar
monty@mysql.com committed
1761
  if (!(tmp= (uint32*) alloc_root(param->mem_root,param->fields_bitmap_size)) ||
1762
      bitmap_init(&param->needed_fields, tmp, param->fields_bitmap_size*8,
monty@mysql.com's avatar
monty@mysql.com committed
1763
                  FALSE))
1764
    return 1;
1765

1766
  bitmap_clear_all(&param->needed_fields);
1767
  for (uint i= 0; i < table->s->fields; i++)
1768 1769 1770 1771 1772
  {
    if (param->thd->query_id == table->field[i]->query_id)
      bitmap_set_bit(&param->needed_fields, i+1);
  }

1773
  pk= param->table->s->primary_key;
1774 1775
  if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY)
  {
1776
    /* The table uses clustered PK and it is not internally generated */
1777
    KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
1778
    KEY_PART_INFO *key_part_end= key_part +
1779
                                 param->table->key_info[pk].key_parts;
1780
    for (;key_part != key_part_end; ++key_part)
1781 1782 1783 1784 1785 1786 1787 1788
    {
      bitmap_clear_bit(&param->needed_fields, key_part->fieldnr);
    }
  }
  return 0;
}


serg@serg.mylan's avatar
serg@serg.mylan committed
1789
/*
1790
  Test if a key can be used in different ranges
serg@serg.mylan's avatar
serg@serg.mylan committed
1791 1792

  SYNOPSIS
1793 1794 1795 1796 1797
    SQL_SELECT::test_quick_select()
      thd               Current thread
      keys_to_use       Keys to use for range retrieval
      prev_tables       Tables assumed to be already read when the scan is
                        performed (but not read at the moment of this call)
1798 1799 1800
      limit             Query limit
      force_quick_range Prefer to use range (instead of full table scan) even
                        if it is more expensive.
1801 1802 1803 1804 1805

  NOTES
    Updates the following in the select parameter:
      needed_reg - Bits for keys with may be used if all prev regs are read
      quick      - Parameter to use when reading records.
1806

1807 1808 1809
    In the table struct the following information is updated:
      quick_keys - Which keys can be used
      quick_rows - How many rows the key matches
serg@serg.mylan's avatar
serg@serg.mylan committed
1810

1811 1812 1813 1814
  TODO
   Check if this function really needs to modify keys_to_use, and change the
   code to pass it by reference if it doesn't.

1815
   In addition to force_quick_range other means can be (an usually are) used
1816 1817
   to make this function prefer range over full table scan. Figure out if
   force_quick_range is really needed.
1818

1819 1820 1821 1822
  RETURN
   -1 if impossible select (i.e. certainly no rows will be selected)
    0 if can't use quick_select
    1 if found usable ranges and quick select has been successfully created.
serg@serg.mylan's avatar
serg@serg.mylan committed
1823
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1824

1825 1826
int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
				  table_map prev_tables,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1827 1828 1829 1830
				  ha_rows limit, bool force_quick_range)
{
  uint idx;
  double scan_time;
1831
  DBUG_ENTER("SQL_SELECT::test_quick_select");
serg@serg.mylan's avatar
serg@serg.mylan committed
1832 1833 1834
  DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
		      keys_to_use.to_ulonglong(), (ulong) prev_tables,
		      (ulong) const_tables));
1835
  DBUG_PRINT("info", ("records=%lu", (ulong)head->file->records));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1836 1837
  delete quick;
  quick=0;
1838 1839 1840
  needed_reg.clear_all();
  quick_keys.clear_all();
  if ((specialflag & SPECIAL_SAFE_MODE) && ! force_quick_range ||
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1841 1842
      !limit)
    DBUG_RETURN(0); /* purecov: inspected */
1843 1844
  if (keys_to_use.is_clear_all())
    DBUG_RETURN(0);
1845
  records= head->file->records;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1846 1847
  if (!records)
    records++;					/* purecov: inspected */
1848 1849
  scan_time= (double) records / TIME_FOR_COMPARE + 1;
  read_time= (double) head->file->scan_time() + scan_time + 1.1;
1850 1851
  if (head->force_index)
    scan_time= read_time= DBL_MAX;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1852
  if (limit < records)
1853
    read_time= (double) records + scan_time + 1; // Force to use index
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1854
  else if (read_time <= 2.0 && !force_quick_range)
1855
    DBUG_RETURN(0);				/* No need for quick select */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1856

1857
  DBUG_PRINT("info",("Time to scan table: %g", read_time));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1858

1859 1860
  keys_to_use.intersect(head->keys_in_use_for_query);
  if (!keys_to_use.is_clear_all())
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1861
  {
1862
    MEM_ROOT alloc;
1863
    SEL_TREE *tree= NULL;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1864
    KEY_PART *key_parts;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1865
    KEY *key_info;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1866
    PARAM param;
serg@serg.mylan's avatar
serg@serg.mylan committed
1867

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1868
    /* set up parameter that is passed to all functions */
1869
    param.thd= thd;
monty@mysql.com's avatar
monty@mysql.com committed
1870
    param.baseflag=head->file->table_flags();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1871 1872 1873 1874 1875
    param.prev_tables=prev_tables | const_tables;
    param.read_tables=read_tables;
    param.current_table= head->map;
    param.table=head;
    param.keys=0;
1876
    param.mem_root= &alloc;
1877
    param.old_root= thd->mem_root;
1878
    param.needed_reg= &needed_reg;
1879
    param.imerge_cost_buff_size= 0;
1880
    param.using_real_indexes= TRUE;
1881
    param.remove_jump_scans= TRUE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1882

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1883
    thd->no_errors=1;				// Don't warn about NULL
1884
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1885 1886 1887 1888
    if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
                                                  sizeof(KEY_PART)*
                                                  head->s->key_parts)) ||
        fill_used_fields_bitmap(&param))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1889
    {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1890
      thd->no_errors=0;
1891
      free_root(&alloc,MYF(0));			// Return memory & allocator
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1892 1893 1894
      DBUG_RETURN(0);				// Can't use range
    }
    key_parts= param.key_parts;
1895
    thd->mem_root= &alloc;
1896 1897 1898 1899

    /*
      Make an array with description of all key parts of all table keys.
      This is used in get_mm_parts function.
1900
    */
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1901
    key_info= head->key_info;
1902
    for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1903
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1904
      KEY_PART_INFO *key_part_info;
1905
      if (!keys_to_use.is_set(idx))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1906 1907 1908 1909 1910
	continue;
      if (key_info->flags & HA_FULLTEXT)
	continue;    // ToDo: ft-keys in non-ft ranges, if possible   SerG

      param.key[param.keys]=key_parts;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1911 1912 1913
      key_part_info= key_info->key_part;
      for (uint part=0 ; part < key_info->key_parts ;
	   part++, key_parts++, key_part_info++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1914
      {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1915 1916 1917 1918 1919 1920
	key_parts->key=		 param.keys;
	key_parts->part=	 part;
	key_parts->length=       key_part_info->length;
	key_parts->store_length= key_part_info->store_length;
	key_parts->field=	 key_part_info->field;
	key_parts->null_bit=	 key_part_info->null_bit;
1921
        key_parts->image_type =
1922
          (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1923 1924 1925 1926 1927
      }
      param.real_keynr[param.keys++]=idx;
    }
    param.key_parts_end=key_parts;

sergefp@mysql.com's avatar
sergefp@mysql.com committed
1928 1929 1930 1931
    /* Calculate cost of full index read for the shortest covering index */
    if (!head->used_keys.is_clear_all())
    {
      int key_for_use= find_shortest_key(head, &head->used_keys);
1932 1933 1934
      double key_read_time= (get_index_only_read_time(&param, records,
                                                     key_for_use) +
                             (double) records / TIME_FOR_COMPARE);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1935 1936 1937 1938 1939
      DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                           "read time %g", key_for_use, key_read_time));
      if (key_read_time < read_time)
        read_time= key_read_time;
    }
1940

1941 1942 1943 1944 1945
    TABLE_READ_PLAN *best_trp= NULL;
    TRP_GROUP_MIN_MAX *group_trp;
    double best_read_time= read_time;

    if (cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1946
    {
1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
      if ((tree= get_mm_tree(&param,cond)))
      {
        if (tree->type == SEL_TREE::IMPOSSIBLE)
        {
          records=0L;                      /* Return -1 from this function. */
          read_time= (double) HA_POS_ERROR;
          goto free_mem;
        }
        if (tree->type != SEL_TREE::KEY &&
            tree->type != SEL_TREE::KEY_SMALLER)
          goto free_mem;
      }
1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
    }

    /*
      Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
      Notice that it can be constructed no matter if there is a range tree.
    */
    group_trp= get_best_group_min_max(&param, tree);
    if (group_trp && group_trp->read_cost < best_read_time)
    {
      best_trp= group_trp;
      best_read_time= best_trp->read_cost;
    }

    if (tree)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1973
    {
monty@mysql.com's avatar
monty@mysql.com committed
1974 1975 1976
      /*
        It is possible to use a range-based quick select (but it might be
        slower than 'all' table scan).
1977 1978
      */
      if (tree->merges.is_empty())
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1979
      {
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
        TRP_RANGE         *range_trp;
        TRP_ROR_INTERSECT *rori_trp;
        bool can_build_covering= FALSE;

        /* Get best 'range' plan and prepare data for making other plans */
        if ((range_trp= get_key_scans_params(&param, tree, FALSE,
                                             best_read_time)))
        {
          best_trp= range_trp;
          best_read_time= best_trp->read_cost;
        }

1992
        /*
1993 1994 1995
          Simultaneous key scans and row deletes on several handler
          objects are not allowed so don't use ROR-intersection for
          table deletes.
1996
        */
1997 1998 1999 2000
        if ((thd->lex->sql_command != SQLCOM_DELETE))
#ifdef NOT_USED
          if ((thd->lex->sql_command != SQLCOM_UPDATE))
#endif
2001
        {
2002
          /*
2003 2004
            Get best non-covering ROR-intersection plan and prepare data for
            building covering ROR-intersection.
2005
          */
2006 2007
          if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
                                                &can_build_covering)))
2008
          {
2009 2010
            best_trp= rori_trp;
            best_read_time= best_trp->read_cost;
2011 2012
            /*
              Try constructing covering ROR-intersect only if it looks possible
2013 2014
              and worth doing.
            */
2015 2016 2017 2018
            if (!rori_trp->is_covering && can_build_covering &&
                (rori_trp= get_best_covering_ror_intersect(&param, tree,
                                                           best_read_time)))
              best_trp= rori_trp;
2019 2020
          }
        }
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032
      }
      else
      {
        /* Try creating index_merge/ROR-union scan. */
        SEL_IMERGE *imerge;
        TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
        LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */

        DBUG_PRINT("info",("No range reads possible,"
                           " trying to construct index_merge"));
        List_iterator_fast<SEL_IMERGE> it(tree->merges);
        while ((imerge= it++))
2033
        {
2034 2035 2036 2037
          new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
          if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost <
                                 best_conj_trp->read_cost))
            best_conj_trp= new_conj_trp;
2038
        }
2039 2040 2041 2042
        if (best_conj_trp)
          best_trp= best_conj_trp;
      }
    }
2043

2044
    thd->mem_root= param.old_root;
2045 2046 2047 2048 2049 2050 2051 2052 2053

    /* If we got a read plan, create a quick select from it. */
    if (best_trp)
    {
      records= best_trp->records;
      if (!(quick= best_trp->make_quick(&param, TRUE)) || quick->init())
      {
        delete quick;
        quick= NULL;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2054 2055
      }
    }
2056 2057

  free_mem:
2058
    free_root(&alloc,MYF(0));			// Return memory & allocator
2059
    thd->mem_root= param.old_root;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2060
    thd->no_errors=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2061
  }
2062

2063
  DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
2064

bk@work.mysql.com's avatar
bk@work.mysql.com committed
2065 2066 2067 2068 2069 2070 2071
  /*
    Assume that if the user is using 'limit' we will only need to scan
    limit rows if we are using a key
  */
  DBUG_RETURN(records ? test(quick) : -1);
}

2072
/****************************************************************************
2073
 * Partition pruning module
2074 2075 2076
 ****************************************************************************/
#ifdef WITH_PARTITION_STORAGE_ENGINE

2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117
/*
  PartitionPruningModule

  This part of the code does partition pruning. Partition pruning solves the
  following problem: given a query over partitioned tables, find partitions
  that we will not need to access (i.e. partitions that we can assume to be
  empty) when executing the query.
  The set of partitions to prune doesn't depend on which query execution
  plan will be used to execute the query.
  
  HOW IT WORKS
  
  Partition pruning module makes use of RangeAnalysisModule. The following
  examples show how the problem of partition pruning can be reduced to the 
  range analysis problem:
  
  EXAMPLE 1
    Consider a query:
    
      SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
    
    where table t1 is partitioned using PARTITION BY RANGE(t1.a).  An apparent
    way to find the used (i.e. not pruned away) partitions is as follows:
    
    1. analyze the WHERE clause and extract the list of intervals over t1.a
       for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}

    2. for each interval I
       {
         find partitions that have non-empty intersection with I;
         mark them as used;
       }
       
  EXAMPLE 2
    Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
    we need to:

    1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
       The list of intervals we'll obtain will look like this:
       ((t1.a, t1.b) = (1,'foo')),
       ((t1.a, t1.b) = (2,'bar')), 
2118
       ((t1,a, t1.b) > (10,'zz'))
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147
       
    2. for each interval I 
       {
         if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
         {
           calculate HASH(part_func(t1.a, t1.b));
           find which partition has records with this hash value and mark
             it as used;
         }
         else
         {
           mark all partitions as used; 
           break;
         }
       }

   For both examples the step #1 is exactly what RangeAnalysisModule could
   be used to do, if it was provided with appropriate index description
   (array of KEY_PART structures). 
   In example #1, we need to provide it with description of index(t1.a), 
   in example #2, we need to provide it with description of index(t1.a, t1.b).
   
   These index descriptions are further called "partitioning index
   descriptions". Note that it doesn't matter if such indexes really exist,
   as range analysis module only uses the description.
   
   Putting it all together, partitioning module works as follows:
   
   prune_partitions() {
2148
     call create_partition_index_description();
2149 2150 2151 2152 2153 2154 2155 2156 2157

     call get_mm_tree(); // invoke the RangeAnalysisModule
     
     // analyze the obtained interval list and get used partitions 
     call find_used_partitions();
  }

*/

2158 2159 2160 2161 2162 2163 2164 2165 2166 2167
struct st_part_prune_param;
struct st_part_opt_info;

typedef void (*mark_full_part_func)(partition_info*, uint32);

/*
  Partition pruning operation context
*/
typedef struct st_part_prune_param
{
2168
  RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
2169

2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215
  /***************************************************************
   Following fields are filled in based solely on partitioning 
   definition and not modified after that:
   **************************************************************/
  partition_info *part_info; /* Copy of table->part_info */
  /* Function to get partition id from partitioning fields only */
  get_part_id_func get_top_partition_id_func;
  /* Function to mark a partition as used (w/all subpartitions if they exist)*/
  mark_full_part_func mark_full_partition_used;
 
  /* Partitioning 'index' description, array of key parts */
  KEY_PART *key;
  
  /*
    Number of fields in partitioning 'index' definition created for
    partitioning (0 if partitioning 'index' doesn't include partitioning
    fields)
  */
  uint part_fields;
  uint subpart_fields; /* Same as above for subpartitioning */
  
  /* 
    Number of the last partitioning field keypart in the index, or -1 if
    partitioning index definition doesn't include partitioning fields.
  */
  int last_part_partno;
  int last_subpart_partno; /* Same as above for supartitioning */

  /*
    is_part_keypart[i] == test(keypart #i in partitioning index is a member
                               used in partitioning)
    Used to maintain current values of cur_part_fields and cur_subpart_fields
  */
  my_bool *is_part_keypart;
  /* Same as above for subpartitioning */
  my_bool *is_subpart_keypart;

  /***************************************************************
   Following fields form find_used_partitions() recursion context:
   **************************************************************/
  SEL_ARG **arg_stack;     /* "Stack" of SEL_ARGs */
  SEL_ARG **arg_stack_end; /* Top of the stack    */
  /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
  uint cur_part_fields;
  /* Same as cur_part_fields, but for subpartitioning */
  uint cur_subpart_fields;
2216

2217 2218 2219 2220 2221
  /* Iterator to be used to obtain the "current" set of used partitions */
  PARTITION_ITERATOR part_iter;

  /* Initialized bitmap of no_subparts size */
  MY_BITMAP subparts_bitmap;
2222 2223
} PART_PRUNE_PARAM;

2224
static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
2225
static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
2226 2227 2228 2229
static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
                                       SEL_IMERGE *imerge);
static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
                                            List<SEL_IMERGE> &merges);
2230 2231 2232 2233 2234 2235 2236 2237
static void mark_all_partitions_as_used(partition_info *part_info);
static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, 
                                        uint32 num);

#ifndef DBUG_OFF
static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
static void dbug_print_field(Field *field);
static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
2238
static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291
#endif


/*
  Perform partition pruning for a given table and condition.

  SYNOPSIS
    prune_partitions()
      thd           Thread handle
      table         Table to perform partition pruning for
      pprune_cond   Condition to use for partition pruning
  
  DESCRIPTION
    This function assumes that all partitions are marked as unused when it
    is invoked. The function analyzes the condition, finds partitions that
    need to be used to retrieve the records that match the condition, and 
    marks them as used by setting appropriate bit in part_info->used_partitions
    In the worst case all partitions are marked as used.

  NOTE
    This function returns promptly if called for non-partitioned table.

  RETURN
    TRUE   We've inferred that no partitions need to be used (i.e. no table
           records will satisfy pprune_cond)
    FALSE  Otherwise
*/

bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
{
  bool retval= FALSE;
  partition_info *part_info = table->part_info;
  DBUG_ENTER("prune_partitions");

  if (!part_info)
    DBUG_RETURN(FALSE); /* not a partitioned table */
  
  if (!pprune_cond)
  {
    mark_all_partitions_as_used(part_info);
    DBUG_RETURN(FALSE);
  }
  
  PART_PRUNE_PARAM prune_param;
  MEM_ROOT alloc;
  RANGE_OPT_PARAM  *range_par= &prune_param.range_param;

  prune_param.part_info= part_info;

  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
  range_par->mem_root= &alloc;
  range_par->old_root= thd->mem_root;

2292
  if (create_partition_index_description(&prune_param))
2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306
  {
    mark_all_partitions_as_used(part_info);
    free_root(&alloc,MYF(0));		// Return memory & allocator
    DBUG_RETURN(FALSE);
  }
  
  range_par->thd= thd;
  range_par->table= table;
  /* range_par->cond doesn't need initialization */
  range_par->prev_tables= range_par->read_tables= 0;
  range_par->current_table= table->map;

  range_par->keys= 1; // one index
  range_par->using_real_indexes= FALSE;
2307
  range_par->remove_jump_scans= FALSE;
2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318
  range_par->real_keynr[0]= 0;

  thd->no_errors=1;				// Don't warn about NULL
  thd->mem_root=&alloc;
  
  prune_param.key= prune_param.range_param.key_parts;
  SEL_TREE *tree;
  SEL_ARG *arg;
  int res;

  tree= get_mm_tree(range_par, pprune_cond);
2319
  if (!tree)
2320 2321 2322 2323 2324 2325 2326
    goto all_used;

  if (tree->type == SEL_TREE::IMPOSSIBLE)
  {
    retval= TRUE;
    goto end;
  }
2327 2328 2329

  if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
    goto all_used;
2330

2331 2332
  if (tree->merges.is_empty())
  {
2333
    /* Range analysis has produced a single list of intervals. */
2334 2335 2336
    prune_param.arg_stack_end= prune_param.arg_stack;
    prune_param.cur_part_fields= 0;
    prune_param.cur_subpart_fields= 0;
2337
    init_all_partitions_iterator(part_info, &prune_param.part_iter);
2338 2339 2340 2341 2342 2343
    if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
                                                            tree->keys[0]))))
      goto all_used;
  }
  else
  {
2344 2345
    if (tree->merges.elements == 1)
    {
2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356
      /* 
        Range analysis has produced a "merge" of several intervals lists, a 
        SEL_TREE that represents an expression in form         
          sel_imerge = (tree1 OR tree2 OR ... OR treeN)
        that cannot be reduced to one tree. This can only happen when 
        partitioning index has several keyparts and the condition is OR of
        conditions that refer to different key parts. For example, we'll get
        here for "partitioning_field=const1 OR subpartitioning_field=const2"
      */
      if (-1 == (res= find_used_partitions_imerge(&prune_param,
                                                  tree->merges.head())))
2357 2358 2359
        goto all_used;
    }
    else
2360
    {
2361 2362 2363 2364 2365 2366 2367 2368 2369
      /* 
        Range analysis has produced a list of several imerges, i.e. a
        structure that represents a condition in form 
        imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
        This is produced for complicated WHERE clauses that range analyzer
        can't really analyze properly.
      */
      if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
                                                       tree->merges)))
2370 2371 2372 2373
        goto all_used;
    }
  }
  
2374 2375 2376 2377 2378 2379
  /*
    res == 0 => no used partitions => retval=TRUE
    res == 1 => some used partitions => retval=FALSE
    res == -1 - we jump over this line to all_used:
  */
  retval= test(!res);
2380 2381 2382
  goto end;

all_used:
2383
  retval= FALSE; // some partitions are used
2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
  mark_all_partitions_as_used(prune_param.part_info);
end:
  thd->no_errors=0;
  thd->mem_root= range_par->old_root;
  free_root(&alloc,MYF(0));			// Return memory & allocator
  DBUG_RETURN(retval);
}


/*
2394
  Store field key image to table record
2395 2396

  SYNOPSIS
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406
    store_key_image_to_rec()
      field  Field which key image should be stored
      ptr    Field value in key format
      len    Length of the value, in bytes

  DESCRIPTION
    Copy the field value from its key image to the table record. The source
    is the value in key image format, occupying len bytes in buffer pointed
    by ptr. The destination is table record, in "field value in table record"
    format.
2407 2408
*/

2409
void store_key_image_to_rec(Field *field, char *ptr, uint len)
2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430
{
  /* Do the same as print_key() does */ 
  if (field->real_maybe_null())
  {
    if (*ptr)
    {
      field->set_null();
      return;
    }
    ptr++;
  }    
  field->set_key_image(ptr, len); 
}


/*
  For SEL_ARG* array, store sel_arg->min values into table record buffer

  SYNOPSIS
    store_selargs_to_rec()
      ppar   Partition pruning context
2431
      start  Array of SEL_ARG* for which the minimum values should be stored
2432
      num    Number of elements in the array
2433 2434 2435 2436

  DESCRIPTION
    For each SEL_ARG* interval in the specified array, store the left edge
    field value (sel_arg->min, key image format) into the table record.
2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469
*/

static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
                                 int num)
{
  KEY_PART *parts= ppar->range_param.key_parts;
  for (SEL_ARG **end= start + num; start != end; start++)
  {
    SEL_ARG *sel_arg= (*start);
    store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
                           parts[sel_arg->part].length);
  }
}


/* Mark a partition as used in the case when there are no subpartitions */
static void mark_full_partition_used_no_parts(partition_info* part_info,
                                              uint32 part_id)
{
  bitmap_set_bit(&part_info->used_partitions, part_id);
}


/* Mark a partition as used in the case when there are subpartitions */
static void mark_full_partition_used_with_parts(partition_info *part_info,
                                                uint32 part_id)
{
  uint32 start= part_id * part_info->no_subparts;
  uint32 end=   start + part_info->no_subparts; 
  for (; start != end; start++)
    bitmap_set_bit(&part_info->used_partitions, start);
}

2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480
/*
  Find the set of used partitions for List<SEL_IMERGE>
  SYNOPSIS
    find_used_partitions_imerge_list
      ppar      Partition pruning context.
      key_tree  Intervals tree to perform pruning for.
      
  DESCRIPTION
    List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...". 
    The set of used partitions is an intersection of used partitions sets
    for imerge_{i}.
2481
    We accumulate this intersection in a separate bitmap.
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498
 
  RETURN 
    See find_used_partitions()
*/

static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
                                            List<SEL_IMERGE> &merges)
{
  MY_BITMAP all_merges;
  uint bitmap_bytes;
  uint32 *bitmap_buf;
  uint n_bits= ppar->part_info->used_partitions.n_bits;
  bitmap_bytes= bitmap_buffer_size(n_bits);
  if (!(bitmap_buf= (uint32*)alloc_root(ppar->range_param.mem_root,
                                        bitmap_bytes)))
  {
    /* 
2499
      Fallback, process just the first SEL_IMERGE. This can leave us with more
2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547
      partitions marked as used then actually needed.
    */
    return find_used_partitions_imerge(ppar, merges.head());
  }
  bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
  bitmap_set_prefix(&all_merges, n_bits);
  
  List_iterator<SEL_IMERGE> it(merges);
  SEL_IMERGE *imerge;
  while ((imerge=it++))
  {
    int res= find_used_partitions_imerge(ppar, imerge);
    if (!res)
    {
      /* no used partitions on one ANDed imerge => no used partitions at all */
      return 0;
    }
    
    if (res != -1)
      bitmap_intersect(&all_merges, &ppar->part_info->used_partitions);

    if (bitmap_is_clear_all(&all_merges))
      return 0;

    bitmap_clear_all(&ppar->part_info->used_partitions);
  }
  memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap,
         bitmap_bytes);
  return 1;
}


/*
  Find the set of used partitions for SEL_IMERGE structure
  SYNOPSIS
    find_used_partitions_imerge()
      ppar      Partition pruning context.
      key_tree  Intervals tree to perform pruning for.
      
  DESCRIPTION
    SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
    trivial - just use mark used partitions for each tree and bail out early
    if for some tree_{i} all partitions are used.
 
  RETURN 
    See find_used_partitions().
*/

2548
static
2549
int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
2550
{
2551 2552 2553 2554 2555 2556
  int res= 0;
  for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
  {
    ppar->arg_stack_end= ppar->arg_stack;
    ppar->cur_part_fields= 0;
    ppar->cur_subpart_fields= 0;
2557
    init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
2558 2559 2560 2561
    if (-1 == (res |= find_used_partitions(ppar, (*ptree)->keys[0])))
      return -1;
  }
  return res;
2562 2563 2564 2565
}


/*
2566
  Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
2567 2568 2569 2570

  SYNOPSIS
    find_used_partitions()
      ppar      Partition pruning context.
2571
      key_tree  SEL_ARG range tree to perform pruning for
2572 2573 2574

  DESCRIPTION
    This function 
2575 2576
      * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
      * finds the partitions one needs to use to get rows in these intervals
2577
      * marks these partitions as used.
2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594
    The next session desribes the process in greater detail.
 
  IMPLEMENTATION
    TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR    
    We can find out which [sub]partitions to use if we obtain restrictions on 
    [sub]partitioning fields in the following form:
    1.  "partition_field1=const1 AND ... AND partition_fieldN=constN"
    1.1  Same as (1) but for subpartition fields

    If partitioning supports interval analysis (i.e. partitioning is a
    function of a single table field, and partition_info::
    get_part_iter_for_interval != NULL), then we can also use condition in
    this form:
    2.  "const1 <=? partition_field <=? const2"
    2.1  Same as (2) but for subpartition_field

    INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
2595
    
2596
    The below is an example of what SEL_ARG tree may represent:
2597
    
2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625
    (start)
     |                           $
     |   Partitioning keyparts   $  subpartitioning keyparts
     |                           $
     |     ...          ...      $
     |      |            |       $
     | +---------+  +---------+  $  +-----------+  +-----------+
     \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
       +---------+  +---------+  $  +-----------+  +-----------+
            |                    $        |             |
            |                    $        |        +-----------+ 
            |                    $        |        | subpar2=c6|
            |                    $        |        +-----------+ 
            |                    $        |
            |                    $  +-----------+  +-----------+
            |                    $  | subpar1=c4|--| subpar2=c8|
            |                    $  +-----------+  +-----------+
            |                    $         
            |                    $
       +---------+               $  +------------+  +------------+
       | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
       +---------+               $  +------------+  +------------+
            |                    $
           ...                   $

    The up-down connections are connections via SEL_ARG::left and
    SEL_ARG::right. A horizontal connection to the right is the
    SEL_ARG::next_key_part connection.
2626
    
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662
    find_used_partitions() traverses the entire tree via recursion on
     * SEL_ARG::next_key_part (from left to right on the picture)
     * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
       performed for each depth level.
    
    Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
    ppar->arg_stack) constraints on partitioning and subpartitioning fields.
    For the example in the above picture, one of stack states is:
      in find_used_partitions(key_tree = "subpar2=c5") (***)
      in find_used_partitions(key_tree = "subpar1=c3")
      in find_used_partitions(key_tree = "par2=c2")   (**)
      in find_used_partitions(key_tree = "par1=c1")
      in prune_partitions(...)
    We apply partitioning limits as soon as possible, e.g. when we reach the
    depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
    and save them in ppar->part_iter.
    When we reach the depth (***), we find which subpartition(s) correspond to
    "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
    appropriate subpartitions as used.
    
    It is possible that constraints on some partitioning fields are missing.
    For the above example, consider this stack state:
      in find_used_partitions(key_tree = "subpar2=c12") (***)
      in find_used_partitions(key_tree = "subpar1=c10")
      in find_used_partitions(key_tree = "par1=c2")
      in prune_partitions(...)
    Here we don't have constraints for all partitioning fields. Since we've
    never set the ppar->part_iter to contain used set of partitions, we use
    its default "all partitions" value.  We get  subpartition id for 
    "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
    partition.

    The inverse is also possible: we may get constraints on partitioning
    fields, but not constraints on subpartitioning fields. In that case,
    calls to find_used_partitions() with depth below (**) will return -1,
    and we will mark entire partition as used.
2663

2664 2665
  TODO
    Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
2666 2667 2668 2669 2670

  RETURN
    1   OK, one or more [sub]partitions are marked as used.
    0   The passed condition doesn't match any partitions
   -1   Couldn't infer any partition pruning "intervals" from the passed 
2671 2672
        SEL_ARG* tree (which means that all partitions should be marked as
        used) Marking partitions as used is the responsibility of the caller.
2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690
*/

static 
int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
{
  int res, left_res=0, right_res=0;
  int partno= (int)key_tree->part;
  bool pushed= FALSE;
  bool set_full_part_if_bad_ret= FALSE;

  if (key_tree->left != &null_element)
  {
    if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
      return -1;
  }

  if (key_tree->type == SEL_ARG::KEY_RANGE)
  {
2691
    if (partno == 0 && (NULL != ppar->part_info->get_part_iter_for_interval))
2692 2693 2694
    {
      /* 
        Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and
2695
        we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change
2696 2697 2698 2699
      */
      DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
                                                    ppar->range_param.
                                                    key_parts););
2700 2701 2702 2703 2704 2705 2706 2707 2708 2709
      res= ppar->part_info->
           get_part_iter_for_interval(ppar->part_info,
                                      FALSE,
                                      key_tree->min_value, 
                                      key_tree->max_value,
                                      key_tree->min_flag | key_tree->max_flag,
                                      &ppar->part_iter);
      if (!res)
        goto go_right; /* res=0 --> no satisfying partitions */
      if (res == -1)
2710
      {
2711 2712
        //get a full range iterator
        init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
2713 2714
      }
      /* 
2715
        Save our intent to mark full partition as used if we will not be able
2716 2717 2718 2719 2720 2721
        to obtain further limits on subpartitions
      */
      set_full_part_if_bad_ret= TRUE;
      goto process_next_key_part;
    }

2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757
    if (partno == ppar->last_subpart_partno && 
        (NULL != ppar->part_info->get_subpart_iter_for_interval))
    {
      PARTITION_ITERATOR subpart_iter;
      DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
                                                    ppar->range_param.
                                                    key_parts););
      res= ppar->part_info->
           get_subpart_iter_for_interval(ppar->part_info,
                                         TRUE,
                                         key_tree->min_value, 
                                         key_tree->max_value,
                                         key_tree->min_flag | key_tree->max_flag,
                                         &subpart_iter);
      DBUG_ASSERT(res); /* We can't get "no satisfying subpartitions" */
      if (res == -1)
        return -1; /* all subpartitions satisfy */
        
      uint32 subpart_id;
      bitmap_clear_all(&ppar->subparts_bitmap);
      while ((subpart_id= subpart_iter.get_next(&subpart_iter)) != NOT_A_PARTITION_ID)
        bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);

      /* Mark each partition as used in each subpartition.  */
      uint32 part_id;
      while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
              NOT_A_PARTITION_ID)
      {
        for (uint i= 0; i < ppar->part_info->no_subparts; i++)
          if (bitmap_is_set(&ppar->subparts_bitmap, i))
            bitmap_set_bit(&ppar->part_info->used_partitions,
                           part_id * ppar->part_info->no_subparts + i);
      }
      goto go_right;
    }

2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772
    if (key_tree->is_singlepoint())
    {
      pushed= TRUE;
      ppar->cur_part_fields+=    ppar->is_part_keypart[partno];
      ppar->cur_subpart_fields+= ppar->is_subpart_keypart[partno];
      *(ppar->arg_stack_end++) = key_tree;

      if (partno == ppar->last_part_partno &&
          ppar->cur_part_fields == ppar->part_fields)
      {
        /* 
          Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
          fields. Save all constN constants into table record buffer.
        */
        store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
2773
        DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
2774 2775
                                                       ppar->part_fields););
        uint32 part_id;
2776
        longlong func_value;
2777
        /* Find in which partition the {const1, ...,constN} tuple goes */
2778 2779
        if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
                                            &func_value))
2780 2781 2782 2783 2784
        {
          res= 0; /* No satisfying partitions */
          goto pop_and_go_right;
        }
        /* Rembember the limit we got - single partition #part_id */
2785
        init_single_partition_iterator(part_id, &ppar->part_iter);
2786 2787 2788 2789 2790 2791 2792 2793 2794
        
        /*
          If there are no subpartitions/we fail to get any limit for them, 
          then we'll mark full partition as used. 
        */
        set_full_part_if_bad_ret= TRUE;
        goto process_next_key_part;
      }

2795 2796
      if (partno == ppar->last_subpart_partno &&
          ppar->cur_subpart_fields == ppar->subpart_fields)
2797 2798 2799 2800 2801 2802 2803
      {
        /* 
          Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
          fields. Save all constN constants into table record buffer.
        */
        store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
                             ppar->subpart_fields);
2804
        DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end- 
2805 2806 2807 2808 2809 2810 2811
                                                       ppar->subpart_fields,
                                                       ppar->subpart_fields););
        /* Find the subpartition (it's HASH/KEY so we always have one) */
        partition_info *part_info= ppar->part_info;
        uint32 subpart_id= part_info->get_subpartition_id(part_info);
        
        /* Mark this partition as used in each subpartition. */
2812 2813 2814
        uint32 part_id;
        while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
                NOT_A_PARTITION_ID)
2815 2816
        {
          bitmap_set_bit(&part_info->used_partitions,
2817
                         part_id * part_info->no_subparts + subpart_id);
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837
        }
        res= 1; /* Some partitions were marked as used */
        goto pop_and_go_right;
      }
    }
    else
    {
      /* 
        Can't handle condition on current key part. If we're that deep that 
        we're processing subpartititoning's key parts, this means we'll not be
        able to infer any suitable condition, so bail out.
      */
      if (partno >= ppar->last_part_partno)
        return -1;
    }
  }

process_next_key_part:
  if (key_tree->next_key_part)
    res= find_used_partitions(ppar, key_tree->next_key_part);
2838
  else
2839
    res= -1;
2840 2841
 
  if (set_full_part_if_bad_ret)
2842
  {
2843
    if (res == -1)
2844
    {
2845 2846 2847 2848
      /* Got "full range" for subpartitioning fields */
      uint32 part_id;
      bool found= FALSE;
      while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != NOT_A_PARTITION_ID)
2849
      {
2850 2851
        ppar->mark_full_partition_used(ppar->part_info, part_id);
        found= TRUE;
2852
      }
2853
      res= test(found);
2854
    }
2855 2856 2857 2858
    /*
      Restore the "used partitions iterator" to the default setting that
      specifies iteration over all partitions.
    */
2859
    init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
2860 2861 2862 2863 2864 2865 2866 2867 2868 2869
  }

  if (pushed)
  {
pop_and_go_right:
    /* Pop this key part info off the "stack" */
    ppar->arg_stack_end--;
    ppar->cur_part_fields-=    ppar->is_part_keypart[partno];
    ppar->cur_subpart_fields-= ppar->is_subpart_keypart[partno];
  }
2870 2871 2872 2873

  if (res == -1)
    return -1;
go_right:
2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930
  if (key_tree->right != &null_element)
  {
    if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
      return -1;
  }
  return (left_res || right_res || res);
}
 

static void mark_all_partitions_as_used(partition_info *part_info)
{
  bitmap_set_all(&part_info->used_partitions);
}


/*
  Check if field types allow to construct partitioning index description
 
  SYNOPSIS
    fields_ok_for_partition_index()
      pfield  NULL-terminated array of pointers to fields.

  DESCRIPTION
    For an array of fields, check if we can use all of the fields to create
    partitioning index description.
    
    We can't process GEOMETRY fields - for these fields singlepoint intervals
    cant be generated, and non-singlepoint are "special" kinds of intervals
    to which our processing logic can't be applied.

    It is not known if we could process ENUM fields, so they are disabled to be
    on the safe side.

  RETURN 
    TRUE   Yes, fields can be used in partitioning index
    FALSE  Otherwise
*/

static bool fields_ok_for_partition_index(Field **pfield)
{
  if (!pfield)
    return FALSE;
  for (; (*pfield); pfield++)
  {
    enum_field_types ftype= (*pfield)->real_type();
    if (ftype == FIELD_TYPE_ENUM || ftype == FIELD_TYPE_GEOMETRY)
      return FALSE;
  }
  return TRUE;
}


/*
  Create partition index description and fill related info in the context
  struct

  SYNOPSIS
2931
    create_partition_index_description()
2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947
      prune_par  INOUT Partition pruning context

  DESCRIPTION
    Create partition index description. Partition index description is:

      part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))

    If partitioning/sub-partitioning uses BLOB or Geometry fields, then
    corresponding fields_list(...) is not included into index description
    and we don't perform partition pruning for partitions/subpartitions.

  RETURN
    TRUE   Out of memory or can't do partition pruning at all
    FALSE  OK
*/

2948
static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991
{
  RANGE_OPT_PARAM *range_par= &(ppar->range_param);
  partition_info *part_info= ppar->part_info;
  uint used_part_fields, used_subpart_fields;

  used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
                      part_info->no_part_fields : 0;
  used_subpart_fields= 
    fields_ok_for_partition_index(part_info->subpart_field_array)? 
      part_info->no_subpart_fields : 0;
  
  uint total_parts= used_part_fields + used_subpart_fields;

  ppar->part_fields=      used_part_fields;
  ppar->last_part_partno= (int)used_part_fields - 1;

  ppar->subpart_fields= used_subpart_fields;
  ppar->last_subpart_partno= 
    used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;

  if (is_sub_partitioned(part_info))
  {
    ppar->mark_full_partition_used=  mark_full_partition_used_with_parts;
    ppar->get_top_partition_id_func= part_info->get_part_partition_id;
  }
  else
  {
    ppar->mark_full_partition_used=  mark_full_partition_used_no_parts;
    ppar->get_top_partition_id_func= part_info->get_partition_id;
  }

  KEY_PART *key_part;
  MEM_ROOT *alloc= range_par->mem_root;
  if (!total_parts || 
      !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
                                               total_parts)) ||
      !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* 
                                                      total_parts)) ||
      !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
                                                           total_parts)) ||
      !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
                                                           total_parts)))
    return TRUE;
2992 2993 2994 2995 2996 2997 2998 2999 3000
 
  if (ppar->subpart_fields)
  {
    uint32 *buf;
    uint32 bufsize= bitmap_buffer_size(ppar->part_info->no_subparts);
    if (!(buf= (uint32*)alloc_root(alloc, bufsize)))
      return TRUE;
    bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->no_subparts, FALSE);
  }
3001 3002 3003
  range_par->key_parts= key_part;
  Field **field= (ppar->part_fields)? part_info->part_field_array :
                                           part_info->subpart_field_array;
3004
  bool in_subpart_fields= FALSE;
3005 3006 3007 3008 3009 3010 3011
  for (uint part= 0; part < total_parts; part++, key_part++)
  {
    key_part->key=          0;
    key_part->part=	    part;
    key_part->length=       (*field)->pack_length_in_rec();
    /* 
      psergey-todo: check yet again if this is correct for tricky field types,
3012
      e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm()
3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024
    */
    key_part->store_length= (*field)->pack_length();
    if ((*field)->real_maybe_null())
      key_part->store_length+= HA_KEY_NULL_LENGTH;
    if ((*field)->type() == FIELD_TYPE_BLOB || 
        (*field)->real_type() == MYSQL_TYPE_VARCHAR)
      key_part->store_length+= HA_KEY_BLOB_LENGTH;

    key_part->field=        (*field);
    key_part->image_type =  Field::itRAW;
    /* We don't set key_parts->null_bit as it will not be used */

3025 3026
    ppar->is_part_keypart[part]= !in_subpart_fields;
    ppar->is_subpart_keypart[part]= in_subpart_fields;
3027 3028 3029 3030
 
    if (!*(++field))
    {
      field= part_info->subpart_field_array;
3031
      in_subpart_fields= TRUE;
3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052
    }
  }
  range_par->key_parts_end= key_part;

  DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
                                                range_par->key_parts_end););
  return FALSE;
}


#ifndef DBUG_OFF

static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
{
  DBUG_ENTER("print_partitioning_index");
  DBUG_LOCK_FILE;
  fprintf(DBUG_FILE, "partitioning INDEX(");
  for (KEY_PART *p=parts; p != parts_end; p++)
  {
    fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
  }
3053
  fputs(");\n", DBUG_FILE);
3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064
  DBUG_UNLOCK_FILE;
  DBUG_VOID_RETURN;
}

/* Print field value into debug trace, in NULL-aware way. */
static void dbug_print_field(Field *field)
{
  if (field->is_real_null())
    fprintf(DBUG_FILE, "NULL");
  else
  {
3065 3066 3067
    char buf[256];
    String str(buf, sizeof(buf), &my_charset_bin);
    str.length(0);
3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081
    String *pstr;
    pstr= field->val_str(&str);
    fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
  }
}


/* Print a "c1 < keypartX < c2" - type interval into debug trace. */
static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
{
  DBUG_ENTER("dbug_print_segment_range");
  DBUG_LOCK_FILE;
  if (!(arg->min_flag & NO_MIN_RANGE))
  {
3082
    store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length);
3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097
    dbug_print_field(part->field);
    if (arg->min_flag & NEAR_MIN)
      fputs(" < ", DBUG_FILE);
    else
      fputs(" <= ", DBUG_FILE);
  }

  fprintf(DBUG_FILE, "%s", part->field->field_name);

  if (!(arg->max_flag & NO_MAX_RANGE))
  {
    if (arg->max_flag & NEAR_MAX)
      fputs(" < ", DBUG_FILE);
    else
      fputs(" <= ", DBUG_FILE);
3098
    store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length);
3099 3100
    dbug_print_field(part->field);
  }
3101
  fputs("\n", DBUG_FILE);
3102 3103 3104 3105 3106 3107 3108 3109 3110
  DBUG_UNLOCK_FILE;
  DBUG_VOID_RETURN;
}


/*
  Print a singlepoint multi-keypart range interval to debug trace
 
  SYNOPSIS
3111
    dbug_print_singlepoint_range()
3112 3113 3114 3115 3116 3117 3118 3119
      start  Array of SEL_ARG* ptrs representing conditions on key parts
      num    Number of elements in the array.

  DESCRIPTION
    This function prints a "keypartN=constN AND ... AND keypartK=constK"-type 
    interval to debug trace.
*/

3120
static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
3121
{
3122
  DBUG_ENTER("dbug_print_singlepoint_range");
3123 3124
  DBUG_LOCK_FILE;
  SEL_ARG **end= start + num;
3125

3126 3127 3128 3129 3130 3131
  for (SEL_ARG **arg= start; arg != end; arg++)
  {
    Field *field= (*arg)->field;
    fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
    dbug_print_field(field);
  }
3132
  fputs("\n", DBUG_FILE);
3133 3134 3135 3136 3137 3138 3139 3140 3141 3142
  DBUG_UNLOCK_FILE;
  DBUG_VOID_RETURN;
}
#endif

/****************************************************************************
 * Partition pruning code ends
 ****************************************************************************/
#endif

3143

3144
/*
3145 3146 3147 3148
  Get cost of 'sweep' full records retrieval.
  SYNOPSIS
    get_sweep_read_cost()
      param            Parameter from test_quick_select
3149
      records          # of records to be retrieved
3150
  RETURN
3151
    cost of sweep
3152
*/
3153

3154
double get_sweep_read_cost(const PARAM *param, ha_rows records)
3155
{
3156
  double result;
3157
  DBUG_ENTER("get_sweep_read_cost");
3158 3159
  if (param->table->file->primary_key_is_clustered())
  {
3160
    result= param->table->file->read_time(param->table->s->primary_key,
3161
                                          records, records);
3162 3163
  }
  else
3164
  {
3165
    double n_blocks=
3166
      ceil(ulonglong2double(param->table->file->data_file_length) / IO_SIZE);
3167 3168 3169 3170
    double busy_blocks=
      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
    if (busy_blocks < 1.0)
      busy_blocks= 1.0;
3171
    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
3172
                       busy_blocks));
3173
    /*
3174
      Disabled: Bail out if # of blocks to read is bigger than # of blocks in
3175 3176 3177 3178 3179 3180 3181 3182
      table data file.
    if (max_cost != DBL_MAX  && (busy_blocks+index_reads_cost) >= n_blocks)
      return 1;
    */
    JOIN *join= param->thd->lex->select_lex.join;
    if (!join || join->tables == 1)
    {
      /* No join, assume reading is done in one 'sweep' */
3183
      result= busy_blocks*(DISK_SEEK_BASE_COST +
3184 3185 3186 3187
                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
    }
    else
    {
3188
      /*
3189 3190 3191
        Possibly this is a join with source table being non-last table, so
        assume that disk seeks are random here.
      */
3192
      result= busy_blocks;
3193 3194
    }
  }
3195
  DBUG_PRINT("info",("returning cost=%g", result));
3196
  DBUG_RETURN(result);
3197
}
3198 3199


3200 3201 3202 3203
/*
  Get best plan for a SEL_IMERGE disjunctive expression.
  SYNOPSIS
    get_best_disjunct_quick()
3204 3205
      param     Parameter from check_quick_select function
      imerge    Expression to use
3206
      read_time Don't create scans with cost > read_time
3207

3208
  NOTES
3209
    index_merge cost is calculated as follows:
3210
    index_merge_cost =
3211 3212 3213 3214 3215
      cost(index_reads) +         (see #1)
      cost(rowid_to_row_scan) +   (see #2)
      cost(unique_use)            (see #3)

    1. cost(index_reads) =SUM_i(cost(index_read_i))
3216 3217
       For non-CPK scans,
         cost(index_read_i) = {cost of ordinary 'index only' scan}
3218 3219 3220 3221 3222
       For CPK scan,
         cost(index_read_i) = {cost of non-'index only' scan}

    2. cost(rowid_to_row_scan)
      If table PK is clustered then
3223
        cost(rowid_to_row_scan) =
3224
          {cost of ordinary clustered PK scan with n_ranges=n_rows}
3225 3226

      Otherwise, we use the following model to calculate costs:
3227
      We need to retrieve n_rows rows from file that occupies n_blocks blocks.
3228
      We assume that offsets of rows we need are independent variates with
3229
      uniform distribution in [0..max_file_offset] range.
3230

3231 3232
      We'll denote block as "busy" if it contains row(s) we need to retrieve
      and "empty" if doesn't contain rows we need.
3233

3234
      Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
3235
      applies to any block in file). Let x_i be a variate taking value 1 if
3236
      block #i is empty and 0 otherwise.
3237

3238 3239
      Then E(x_i) = (1 - 1/n_blocks)^n_rows;

3240 3241
      E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
        = n_blocks * ((1 - 1/n_blocks)^n_rows) =
3242 3243 3244 3245
       ~= n_blocks * exp(-n_rows/n_blocks).

      E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
       ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
3246

3247 3248
      Average size of "hole" between neighbor non-empty blocks is
           E(hole_size) = n_blocks/E(n_busy_blocks).
3249

3250 3251 3252 3253 3254 3255
      The total cost of reading all needed blocks in one "sweep" is:

      E(n_busy_blocks)*
       (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).

    3. Cost of Unique use is calculated in Unique::get_use_cost function.
3256 3257 3258 3259 3260

  ROR-union cost is calculated in the same way index_merge, but instead of
  Unique a priority queue is used.

  RETURN
3261 3262
    Created read plan
    NULL - Out of memory or no read scan could be built.
3263
*/
3264

3265 3266
static
TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
3267
                                         double read_time)
3268 3269 3270 3271 3272 3273 3274
{
  SEL_TREE **ptree;
  TRP_INDEX_MERGE *imerge_trp= NULL;
  uint n_child_scans= imerge->trees_next - imerge->trees;
  TRP_RANGE **range_scans;
  TRP_RANGE **cur_child;
  TRP_RANGE **cpk_scan= NULL;
monty@mysql.com's avatar
monty@mysql.com committed
3275
  bool imerge_too_expensive= FALSE;
3276 3277 3278 3279
  double imerge_cost= 0.0;
  ha_rows cpk_scan_records= 0;
  ha_rows non_cpk_scan_records= 0;
  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
monty@mysql.com's avatar
monty@mysql.com committed
3280 3281
  bool all_scans_ror_able= TRUE;
  bool all_scans_rors= TRUE;
3282 3283 3284 3285 3286 3287 3288 3289 3290
  uint unique_calc_buff_size;
  TABLE_READ_PLAN **roru_read_plans;
  TABLE_READ_PLAN **cur_roru_plan;
  double roru_index_costs;
  ha_rows roru_total_records;
  double roru_intersect_part= 1.0;
  DBUG_ENTER("get_best_disjunct_quick");
  DBUG_PRINT("info", ("Full table scan cost =%g", read_time));

3291
  if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
3292 3293 3294
                                             sizeof(TRP_RANGE*)*
                                             n_child_scans)))
    DBUG_RETURN(NULL);
3295
  /*
3296 3297 3298
    Collect best 'range' scan for each of disjuncts, and, while doing so,
    analyze possibility of ROR scans. Also calculate some values needed by
    other parts of the code.
3299
  */
3300
  for (ptree= imerge->trees, cur_child= range_scans;
3301
       ptree != imerge->trees_next;
3302
       ptree++, cur_child++)
3303
  {
3304 3305
    DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
                                        "tree in SEL_IMERGE"););
monty@mysql.com's avatar
monty@mysql.com committed
3306
    if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, read_time)))
3307 3308
    {
      /*
3309
        One of index scans in this index_merge is more expensive than entire
3310 3311 3312
        table read for another available option. The entire index_merge (and
        any possible ROR-union) will be more expensive then, too. We continue
        here only to update SQL_SELECT members.
3313
      */
monty@mysql.com's avatar
monty@mysql.com committed
3314
      imerge_too_expensive= TRUE;
3315 3316 3317
    }
    if (imerge_too_expensive)
      continue;
3318

3319 3320 3321
    imerge_cost += (*cur_child)->read_cost;
    all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
    all_scans_rors &= (*cur_child)->is_ror;
3322
    if (pk_is_clustered &&
3323 3324
        param->real_keynr[(*cur_child)->key_idx] ==
        param->table->s->primary_key)
3325
    {
3326 3327
      cpk_scan= cur_child;
      cpk_scan_records= (*cur_child)->records;
3328 3329
    }
    else
3330
      non_cpk_scan_records += (*cur_child)->records;
3331
  }
3332

3333
  DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost));
3334
  if (imerge_too_expensive || (imerge_cost > read_time) ||
3335 3336
      (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) &&
      read_time != DBL_MAX)
3337
  {
3338 3339
    /*
      Bail out if it is obvious that both index_merge and ROR-union will be
3340
      more expensive
3341
    */
3342 3343
    DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
                        "full table scan, bailing out"));
3344
    DBUG_RETURN(NULL);
3345
  }
3346
  if (all_scans_rors)
3347
  {
3348 3349
    roru_read_plans= (TABLE_READ_PLAN**)range_scans;
    goto skip_to_ror_scan;
3350
  }
3351 3352
  if (cpk_scan)
  {
3353 3354
    /*
      Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
3355 3356 3357
      is done in QUICK_RANGE_SELECT::row_in_ranges)
     */
    imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
3358 3359 3360
  }

  /* Calculate cost(rowid_to_row_scan) */
3361
  imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
3362
  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
3363
                     imerge_cost));
3364 3365
  if (imerge_cost > read_time)
    goto build_ror_index_merge;
3366 3367

  /* Add Unique operations cost */
3368 3369
  unique_calc_buff_size=
    Unique::get_cost_calc_buff_size(non_cpk_scan_records,
3370 3371 3372 3373 3374 3375
                                    param->table->file->ref_length,
                                    param->thd->variables.sortbuff_size);
  if (param->imerge_cost_buff_size < unique_calc_buff_size)
  {
    if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
                                                     unique_calc_buff_size)))
3376
      DBUG_RETURN(NULL);
3377 3378 3379
    param->imerge_cost_buff_size= unique_calc_buff_size;
  }

3380
  imerge_cost +=
3381
    Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records,
3382 3383
                         param->table->file->ref_length,
                         param->thd->variables.sortbuff_size);
3384
  DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
3385 3386 3387 3388 3389 3390 3391
                     imerge_cost, read_time));
  if (imerge_cost < read_time)
  {
    if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
    {
      imerge_trp->read_cost= imerge_cost;
      imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
3392
      imerge_trp->records= min(imerge_trp->records,
3393 3394 3395 3396 3397 3398
                               param->table->file->records);
      imerge_trp->range_scans= range_scans;
      imerge_trp->range_scans_end= range_scans + n_child_scans;
      read_time= imerge_cost;
    }
  }
3399

3400
build_ror_index_merge:
3401 3402
  if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE)
    DBUG_RETURN(imerge_trp);
3403

3404 3405
  /* Ok, it is possible to build a ROR-union, try it. */
  bool dummy;
3406
  if (!(roru_read_plans=
3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419
          (TABLE_READ_PLAN**)alloc_root(param->mem_root,
                                        sizeof(TABLE_READ_PLAN*)*
                                        n_child_scans)))
    DBUG_RETURN(imerge_trp);
skip_to_ror_scan:
  roru_index_costs= 0.0;
  roru_total_records= 0;
  cur_roru_plan= roru_read_plans;

  /* Find 'best' ROR scan for each of trees in disjunction */
  for (ptree= imerge->trees, cur_child= range_scans;
       ptree != imerge->trees_next;
       ptree++, cur_child++, cur_roru_plan++)
3420
  {
3421 3422
    /*
      Assume the best ROR scan is the one that has cheapest full-row-retrieval
3423 3424
      scan cost.
      Also accumulate index_only scan costs as we'll need them to calculate
3425 3426 3427 3428 3429 3430 3431
      overall index_intersection cost.
    */
    double cost;
    if ((*cur_child)->is_ror)
    {
      /* Ok, we have index_only cost, now get full rows scan cost */
      cost= param->table->file->
3432
              read_time(param->real_keynr[(*cur_child)->key_idx], 1,
3433 3434 3435 3436 3437 3438 3439
                        (*cur_child)->records) +
              rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
    }
    else
      cost= read_time;

    TABLE_READ_PLAN *prev_plan= *cur_child;
3440
    if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost,
3441 3442 3443 3444 3445 3446 3447 3448 3449
                                                 &dummy)))
    {
      if (prev_plan->is_ror)
        *cur_roru_plan= prev_plan;
      else
        DBUG_RETURN(imerge_trp);
      roru_index_costs += (*cur_roru_plan)->read_cost;
    }
    else
3450 3451
      roru_index_costs +=
        ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
3452
    roru_total_records += (*cur_roru_plan)->records;
3453
    roru_intersect_part *= (*cur_roru_plan)->records /
3454
                           param->table->file->records;
3455
  }
3456

3457 3458
  /*
    rows to retrieve=
3459
      SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
3460
    This is valid because index_merge construction guarantees that conditions
3461 3462 3463
    in disjunction do not share key parts.
  */
  roru_total_records -= (ha_rows)(roru_intersect_part*
3464 3465 3466
                                  param->table->file->records);
  /* ok, got a ROR read plan for each of the disjuncts
    Calculate cost:
3467 3468 3469 3470 3471 3472
    cost(index_union_scan(scan_1, ... scan_n)) =
      SUM_i(cost_of_index_only_scan(scan_i)) +
      queue_use_cost(rowid_len, n) +
      cost_of_row_retrieval
    See get_merge_buffers_cost function for queue_use_cost formula derivation.
  */
3473

3474
  double roru_total_cost;
3475 3476 3477
  roru_total_cost= roru_index_costs +
                   rows2double(roru_total_records)*log((double)n_child_scans) /
                   (TIME_FOR_COMPARE_ROWID * M_LN2) +
3478 3479
                   get_sweep_read_cost(param, roru_total_records);

3480
  DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost,
3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494
                      n_child_scans));
  TRP_ROR_UNION* roru;
  if (roru_total_cost < read_time)
  {
    if ((roru= new (param->mem_root) TRP_ROR_UNION))
    {
      roru->first_ror= roru_read_plans;
      roru->last_ror= roru_read_plans + n_child_scans;
      roru->read_cost= roru_total_cost;
      roru->records= roru_total_records;
      DBUG_RETURN(roru);
    }
  }
  DBUG_RETURN(imerge_trp);
3495 3496 3497 3498 3499 3500 3501
}


/*
  Calculate cost of 'index only' scan for given index and number of records.

  SYNOPSIS
3502
    get_index_only_read_time()
3503 3504 3505 3506 3507
      param    parameters structure
      records  #of records to read
      keynr    key to read

  NOTES
3508
    It is assumed that we will read trough the whole key range and that all
3509 3510 3511 3512
    key blocks are half full (normally things are much better). It is also
    assumed that each time we read the next key from the index, the handler
    performs a random seek, thus the cost is proportional to the number of
    blocks read.
3513 3514 3515 3516 3517 3518

  TODO:
    Move this to handler->read_time() by adding a flag 'index-only-read' to
    this call. The reason for doing this is that the current function doesn't
    handle the case when the row is stored in the b-tree (like in innodb
    clustered index)
3519 3520
*/

3521
static double get_index_only_read_time(const PARAM* param, ha_rows records,
3522
                                       int keynr)
3523 3524 3525 3526 3527 3528 3529
{
  double read_time;
  uint keys_per_block= (param->table->file->block_size/2/
			(param->table->key_info[keynr].key_length+
			 param->table->file->ref_length) + 1);
  read_time=((double) (records+keys_per_block-1)/
             (double) keys_per_block);
3530
  return read_time;
3531 3532
}

3533

3534 3535
typedef struct st_ror_scan_info
{
3536 3537 3538 3539 3540
  uint      idx;      /* # of used key in param->keys */
  uint      keynr;    /* # of used key in table */
  ha_rows   records;  /* estimate of # records this scan will return */

  /* Set of intervals over key fields that will be used for row retrieval. */
3541
  SEL_ARG   *sel_arg;
3542 3543

  /* Fields used in the query and covered by this ROR scan. */
3544 3545
  MY_BITMAP covered_fields;
  uint      used_fields_covered; /* # of set bits in covered_fields */
3546
  int       key_rec_length; /* length of key record (including rowid) */
3547 3548

  /*
3549 3550
    Cost of reading all index records with values in sel_arg intervals set
    (assuming there is no need to access full table records)
3551 3552
  */
  double    index_read_cost;
3553 3554 3555
  uint      first_uncovered_field; /* first unused bit in covered_fields */
  uint      key_components; /* # of parts in the key */
} ROR_SCAN_INFO;
3556 3557 3558


/*
3559
  Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
3560
  sel_arg set of intervals.
3561

3562 3563
  SYNOPSIS
    make_ror_scan()
3564 3565 3566
      param    Parameter from test_quick_select function
      idx      Index of key in param->keys
      sel_arg  Set of intervals for a given key
3567

3568
  RETURN
3569
    NULL - out of memory
3570
    ROR scan structure containing a scan for {idx, sel_arg}
3571 3572 3573 3574 3575 3576
*/

static
ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
{
  ROR_SCAN_INFO *ror_scan;
3577
  uint32 *bitmap_buf;
3578 3579
  uint keynr;
  DBUG_ENTER("make_ror_scan");
3580

3581 3582 3583 3584 3585 3586
  if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
                                             sizeof(ROR_SCAN_INFO))))
    DBUG_RETURN(NULL);

  ror_scan->idx= idx;
  ror_scan->keynr= keynr= param->real_keynr[idx];
3587 3588
  ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
                             param->table->file->ref_length);
3589 3590
  ror_scan->sel_arg= sel_arg;
  ror_scan->records= param->table->quick_rows[keynr];
3591

3592
  if (!(bitmap_buf= (uint32*)alloc_root(param->mem_root,
monty@mysql.com's avatar
monty@mysql.com committed
3593
                                        param->fields_bitmap_size)))
3594
    DBUG_RETURN(NULL);
3595

3596
  if (bitmap_init(&ror_scan->covered_fields, bitmap_buf,
monty@mysql.com's avatar
monty@mysql.com committed
3597
                  param->fields_bitmap_size*8, FALSE))
3598 3599
    DBUG_RETURN(NULL);
  bitmap_clear_all(&ror_scan->covered_fields);
3600

3601
  KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
3602
  KEY_PART_INFO *key_part_end= key_part +
3603 3604 3605 3606 3607 3608
                               param->table->key_info[keynr].key_parts;
  for (;key_part != key_part_end; ++key_part)
  {
    if (bitmap_is_set(&param->needed_fields, key_part->fieldnr))
      bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
  }
3609
  ror_scan->index_read_cost=
3610 3611 3612 3613 3614 3615
    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
                             ror_scan->keynr);
  DBUG_RETURN(ror_scan);
}


3616
/*
3617 3618 3619 3620 3621 3622 3623
  Compare two ROR_SCAN_INFO** by  E(#records_matched) * key_record_length.
  SYNOPSIS
    cmp_ror_scan_info()
      a ptr to first compared value
      b ptr to second compared value

  RETURN
3624
   -1 a < b
3625 3626
    0 a = b
    1 a > b
3627
*/
3628

3629
static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
3630 3631 3632 3633 3634 3635 3636
{
  double val1= rows2double((*a)->records) * (*a)->key_rec_length;
  double val2= rows2double((*b)->records) * (*b)->key_rec_length;
  return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
}

/*
3637 3638 3639
  Compare two ROR_SCAN_INFO** by
   (#covered fields in F desc,
    #components asc,
3640
    number of first not covered component asc)
3641 3642 3643 3644 3645 3646 3647

  SYNOPSIS
    cmp_ror_scan_info_covering()
      a ptr to first compared value
      b ptr to second compared value

  RETURN
3648
   -1 a < b
3649 3650
    0 a = b
    1 a > b
3651
*/
3652

3653
static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669
{
  if ((*a)->used_fields_covered > (*b)->used_fields_covered)
    return -1;
  if ((*a)->used_fields_covered < (*b)->used_fields_covered)
    return 1;
  if ((*a)->key_components < (*b)->key_components)
    return -1;
  if ((*a)->key_components > (*b)->key_components)
    return 1;
  if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
    return -1;
  if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
    return 1;
  return 0;
}

3670

3671
/* Auxiliary structure for incremental ROR-intersection creation */
3672
typedef struct
3673 3674 3675
{
  const PARAM *param;
  MY_BITMAP covered_fields; /* union of fields covered by all scans */
3676
  /*
3677
    Fraction of table records that satisfies conditions of all scans.
3678
    This is the number of full records that will be retrieved if a
3679 3680
    non-index_only index intersection will be employed.
  */
3681 3682 3683 3684
  double out_rows;
  /* TRUE if covered_fields is a superset of needed_fields */
  bool is_covering;

3685
  ha_rows index_records; /* sum(#records to look in indexes) */
3686 3687
  double index_scan_costs; /* SUM(cost of 'index-only' scans) */
  double total_cost;
3688
} ROR_INTERSECT_INFO;
3689 3690


3691 3692 3693 3694
/*
  Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.

  SYNOPSIS
3695 3696 3697
    ror_intersect_init()
      param         Parameter from test_quick_select

3698 3699 3700 3701 3702 3703
  RETURN
    allocated structure
    NULL on error
*/

static
3704
ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
3705 3706
{
  ROR_INTERSECT_INFO *info;
3707
  uint32* buf;
3708
  if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
3709 3710 3711
                                              sizeof(ROR_INTERSECT_INFO))))
    return NULL;
  info->param= param;
3712
  if (!(buf= (uint32*)alloc_root(param->mem_root,
monty@mysql.com's avatar
monty@mysql.com committed
3713
                                 param->fields_bitmap_size)))
3714 3715
    return NULL;
  if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
monty@mysql.com's avatar
monty@mysql.com committed
3716
                  FALSE))
3717
    return NULL;
3718
  info->is_covering= FALSE;
3719
  info->index_scan_costs= 0.0;
3720 3721 3722
  info->index_records= 0;
  info->out_rows= param->table->file->records;
  bitmap_clear_all(&info->covered_fields);
3723 3724 3725
  return info;
}

3726 3727 3728 3729
void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
{
  dst->param= src->param;
  memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, 
3730
         no_bytes_in_map(&src->covered_fields));
3731 3732 3733 3734 3735 3736
  dst->out_rows= src->out_rows;
  dst->is_covering= src->is_covering;
  dst->index_records= src->index_records;
  dst->index_scan_costs= src->index_scan_costs;
  dst->total_cost= src->total_cost;
}
3737 3738


3739
/*
3740
  Get selectivity of a ROR scan wrt ROR-intersection.
3741

3742
  SYNOPSIS
3743 3744 3745 3746
    ror_scan_selectivity()
      info  ROR-interection 
      scan  ROR scan
      
3747
  NOTES
3748
    Suppose we have a condition on several keys
3749 3750
    cond=k_11=c_11 AND k_12=c_12 AND ...  // parts of first key
         k_21=c_21 AND k_22=c_22 AND ...  // parts of second key
3751
          ...
3752
         k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //parts of the key used by *scan
3753

3754 3755
    where k_ij may be the same as any k_pq (i.e. keys may have common parts).

3756
    A full row is retrieved if entire condition holds.
3757 3758

    The recursive procedure for finding P(cond) is as follows:
3759

3760
    First step:
3761
    Pick 1st part of 1st key and break conjunction (1) into two parts:
3762 3763
      cond= (k_11=c_11 AND R)

3764
    Here R may still contain condition(s) equivalent to k_11=c_11.
3765 3766
    Nevertheless, the following holds:

3767
      P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
3768 3769 3770 3771 3772

    Mark k_11 as fixed field (and satisfied condition) F, save P(F),
    save R to be cond and proceed to recursion step.

    Recursion step:
3773
    We have a set of fixed fields/satisfied conditions) F, probability P(F),
3774 3775 3776
    and remaining conjunction R
    Pick next key part on current key and its condition "k_ij=c_ij".
    We will add "k_ij=c_ij" into F and update P(F).
3777
    Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
3778

3779
     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
3780 3781 3782 3783 3784 3785 3786

    (where '|' mean conditional probability, not "or")

    Consider the first multiplier in (2). One of the following holds:
    a) F contains condition on field used in t (i.e. t AND F = F).
      Then P(t|F) = 1

3787 3788
    b) F doesn't contain condition on field used in t. Then F and t are
     considered independent.
3789

3790
     P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
3791 3792
          = P(t|fields_before_t_in_key).

3793 3794
     P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
                                   #records(fields_before_t_in_key, t)
3795 3796

    The second multiplier is calculated by applying this step recursively.
3797

3798 3799 3800 3801 3802
  IMPLEMENTATION
    This function calculates the result of application of the "recursion step"
    described above for all fixed key members of a single key, accumulating set
    of covered fields, selectivity, etc.

3803
    The calculation is conducted as follows:
3804
    Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
3805

3806 3807
     n_{k1}      n_{k_2}
    --------- * ---------  * .... (3)
3808
     n_{k1-1}    n_{k2_1}
3809

3810 3811 3812 3813
    where k1,k2,... are key parts which fields were not yet marked as fixed
    ( this is result of application of option b) of the recursion step for
      parts of a single key).
    Since it is reasonable to expect that most of the fields are not marked
3814
    as fixed, we calculate (3) as
3815 3816 3817

                                  n_{i1}      n_{i_2}
    (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
3818 3819 3820 3821
                                  n_{i1-1}    n_{i2_1}

    where i1,i2, .. are key parts that were already marked as fixed.

3822 3823
    In order to minimize number of expensive records_in_range calls we group
    and reduce adjacent fractions.
3824

3825
  RETURN
3826 3827
    Selectivity of given ROR scan.
    
3828 3829
*/

3830 3831
static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, 
                                   const ROR_SCAN_INFO *scan)
3832 3833
{
  double selectivity_mult= 1.0;
3834
  KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
3835
  byte key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
3836
  char *key_ptr= (char*) key_val;
3837 3838
  SEL_ARG *sel_arg, *tuple_arg= NULL;
  bool cur_covered;
3839 3840
  bool prev_covered= test(bitmap_is_set(&info->covered_fields,
                                        key_part->fieldnr));
sergefp@mysql.com's avatar
sergefp@mysql.com committed
3841 3842 3843 3844 3845 3846
  key_range min_range;
  key_range max_range;
  min_range.key= (byte*) key_val;
  min_range.flag= HA_READ_KEY_EXACT;
  max_range.key= (byte*) key_val;
  max_range.flag= HA_READ_AFTER_KEY;
3847 3848
  ha_rows prev_records= info->param->table->file->records;
  DBUG_ENTER("ror_intersect_selectivity");
3849 3850 3851

  for (sel_arg= scan->sel_arg; sel_arg;
       sel_arg= sel_arg->next_key_part)
3852
  {
3853
    DBUG_PRINT("info",("sel_arg step"));
3854
    cur_covered= test(bitmap_is_set(&info->covered_fields,
3855
                                    key_part[sel_arg->part].fieldnr));
3856
    if (cur_covered != prev_covered)
3857
    {
3858
      /* create (part1val, ..., part{n-1}val) tuple. */
3859 3860
      ha_rows records;
      if (!tuple_arg)
3861
      {
3862 3863
        tuple_arg= scan->sel_arg;
        /* Here we use the length of the first key part */
3864
        tuple_arg->store_min(key_part->store_length, &key_ptr, 0);
3865 3866 3867 3868
      }
      while (tuple_arg->next_key_part != sel_arg)
      {
        tuple_arg= tuple_arg->next_key_part;
3869
        tuple_arg->store_min(key_part[tuple_arg->part].store_length, &key_ptr, 0);
3870
      }
3871
      min_range.length= max_range.length= ((char*) key_ptr - (char*) key_val);
3872 3873
      records= (info->param->table->file->
                records_in_range(scan->keynr, &min_range, &max_range));
3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884
      if (cur_covered)
      {
        /* uncovered -> covered */
        double tmp= rows2double(records)/rows2double(prev_records);
        DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
        selectivity_mult *= tmp;
        prev_records= HA_POS_ERROR;
      }
      else
      {
        /* covered -> uncovered */
3885
        prev_records= records;
3886
      }
3887
    }
3888 3889 3890 3891
    prev_covered= cur_covered;
  }
  if (!prev_covered)
  {
3892
    double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) /
3893 3894
                rows2double(prev_records);
    DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
3895
    selectivity_mult *= tmp;
3896
  }
3897 3898 3899
  DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
  DBUG_RETURN(selectivity_mult);
}
3900

3901

3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938
/*
  Check if adding a ROR scan to a ROR-intersection reduces its cost of
  ROR-intersection and if yes, update parameters of ROR-intersection,
  including its cost.

  SYNOPSIS
    ror_intersect_add()
      param        Parameter from test_quick_select
      info         ROR-intersection structure to add the scan to.
      ror_scan     ROR scan info to add.
      is_cpk_scan  If TRUE, add the scan as CPK scan (this can be inferred
                   from other parameters and is passed separately only to
                   avoid duplicating the inference code)

  NOTES
    Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
    intersection decreases. The cost of ROR-intersection is calculated as
    follows:

    cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval

    When we add a scan the first increases and the second decreases.

    cost_of_full_rows_retrieval=
      (union of indexes used covers all needed fields) ?
        cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
        0

    E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
                           ror_scan_selectivity({scan1}, scan2) * ... *
                           ror_scan_selectivity({scan1,...}, scanN). 
  RETURN
    TRUE   ROR scan added to ROR-intersection, cost updated.
    FALSE  It doesn't make sense to add this ROR scan to this ROR-intersection.
*/

static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
3939
                              ROR_SCAN_INFO* ror_scan, bool is_cpk_scan)
3940 3941 3942 3943 3944 3945 3946 3947 3948 3949
{
  double selectivity_mult= 1.0;

  DBUG_ENTER("ror_intersect_add");
  DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
  DBUG_PRINT("info", ("Adding scan on %s",
                      info->param->table->key_info[ror_scan->keynr].name));
  DBUG_PRINT("info", ("is_cpk_scan=%d",is_cpk_scan));

  selectivity_mult = ror_scan_selectivity(info, ror_scan);
3950 3951 3952
  if (selectivity_mult == 1.0)
  {
    /* Don't add this scan if it doesn't improve selectivity. */
3953
    DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
3954
    DBUG_RETURN(FALSE);
3955
  }
3956 3957 3958 3959
  
  info->out_rows *= selectivity_mult;
  DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
  
3960
  if (is_cpk_scan)
3961
  {
3962 3963 3964 3965 3966 3967
    /*
      CPK scan is used to filter out rows. We apply filtering for 
      each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
      per check this gives us:
    */
    info->index_scan_costs += rows2double(info->index_records) / 
3968 3969 3970 3971
                              TIME_FOR_COMPARE_ROWID;
  }
  else
  {
3972
    info->index_records += info->param->table->quick_rows[ror_scan->keynr];
3973 3974
    info->index_scan_costs += ror_scan->index_read_cost;
    bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
3975 3976 3977 3978 3979 3980
    if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
                                               &info->covered_fields))
    {
      DBUG_PRINT("info", ("ROR-intersect is covering now"));
      info->is_covering= TRUE;
    }
3981
  }
3982

3983
  info->total_cost= info->index_scan_costs;
3984
  DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
3985 3986
  if (!info->is_covering)
  {
3987 3988 3989
    info->total_cost += 
      get_sweep_read_cost(info->param, double2rows(info->out_rows));
    DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
3990
  }
3991
  DBUG_PRINT("info", ("New out_rows= %g", info->out_rows));
3992
  DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost,
3993
                      info->is_covering?"" : "non-"));
3994
  DBUG_RETURN(TRUE);
3995 3996
}

3997

3998 3999
/*
  Get best ROR-intersection plan using non-covering ROR-intersection search
4000 4001 4002 4003
  algorithm. The returned plan may be covering.

  SYNOPSIS
    get_best_ror_intersect()
4004 4005 4006
      param            Parameter from test_quick_select function.
      tree             Transformed restriction condition to be used to look
                       for ROR scans.
4007
      read_time        Do not return read plans with cost > read_time.
4008
      are_all_covering [out] set to TRUE if union of all scans covers all
4009 4010
                       fields needed by the query (and it is possible to build
                       a covering ROR-intersection)
4011

4012
  NOTES
4013 4014 4015 4016 4017
    get_key_scans_params must be called before this function can be called.
    
    When this function is called by ROR-union construction algorithm it
    assumes it is building an uncovered ROR-intersection (and thus # of full
    records to be retrieved is wrong here). This is a hack.
4018

4019
  IMPLEMENTATION
4020
    The approximate best non-covering plan search algorithm is as follows:
4021

4022 4023 4024 4025
    find_min_ror_intersection_scan()
    {
      R= select all ROR scans;
      order R by (E(#records_matched) * key_record_length).
4026

4027 4028 4029 4030 4031 4032
      S= first(R); -- set of scans that will be used for ROR-intersection
      R= R-first(S);
      min_cost= cost(S);
      min_scan= make_scan(S);
      while (R is not empty)
      {
4033 4034
        firstR= R - first(R);
        if (!selectivity(S + firstR < selectivity(S)))
4035
          continue;
4036
          
4037 4038 4039 4040 4041 4042 4043 4044 4045
        S= S + first(R);
        if (cost(S) < min_cost)
        {
          min_cost= cost(S);
          min_scan= make_scan(S);
        }
      }
      return min_scan;
    }
4046

4047
    See ror_intersect_add function for ROR intersection costs.
4048

4049
    Special handling for Clustered PK scans
4050 4051
    Clustered PK contains all table fields, so using it as a regular scan in
    index intersection doesn't make sense: a range scan on CPK will be less
4052 4053
    expensive in this case.
    Clustered PK scan has special handling in ROR-intersection: it is not used
4054
    to retrieve rows, instead its condition is used to filter row references
4055
    we get from scans on other keys.
4056 4057

  RETURN
4058
    ROR-intersection table read plan
4059
    NULL if out of memory or no suitable plan found.
4060 4061
*/

4062 4063 4064 4065 4066 4067
static
TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
                                          double read_time,
                                          bool *are_all_covering)
{
  uint idx;
4068
  double min_cost= DBL_MAX;
4069
  DBUG_ENTER("get_best_ror_intersect");
4070

4071
  if ((tree->n_ror_scans < 2) || !param->table->file->records)
4072
    DBUG_RETURN(NULL);
4073 4074

  /*
4075 4076
    Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of 
    them. Also find and save clustered PK scan if there is one.
4077
  */
4078
  ROR_SCAN_INFO **cur_ror_scan;
4079
  ROR_SCAN_INFO *cpk_scan= NULL;
4080
  uint cpk_no;
monty@mysql.com's avatar
monty@mysql.com committed
4081
  bool cpk_scan_used= FALSE;
4082

4083 4084 4085 4086
  if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
                                                     param->keys)))
    return NULL;
4087 4088
  cpk_no= ((param->table->file->primary_key_is_clustered()) ?
           param->table->s->primary_key : MAX_KEY);
4089

4090
  for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
4091
  {
4092
    ROR_SCAN_INFO *scan;
4093
    if (!tree->ror_scans_map.is_set(idx))
4094
      continue;
4095
    if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
4096
      return NULL;
4097
    if (param->real_keynr[idx] == cpk_no)
4098
    {
4099 4100
      cpk_scan= scan;
      tree->n_ror_scans--;
4101 4102
    }
    else
4103
      *(cur_ror_scan++)= scan;
4104
  }
4105

4106
  tree->ror_scans_end= cur_ror_scan;
4107 4108
  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
                                          tree->ror_scans,
4109 4110
                                          tree->ror_scans_end););
  /*
4111
    Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
4112 4113
    ROR_SCAN_INFO's.
    Step 2: Get best ROR-intersection using an approximate algorithm.
4114 4115 4116
  */
  qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
        (qsort_cmp)cmp_ror_scan_info);
4117 4118
  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
                                          tree->ror_scans,
4119
                                          tree->ror_scans_end););
4120

4121 4122 4123 4124 4125 4126 4127 4128 4129
  ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
  ROR_SCAN_INFO **intersect_scans_end;
  if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
                                                     tree->n_ror_scans)))
    return NULL;
  intersect_scans_end= intersect_scans;

  /* Create and incrementally update ROR intersection. */
4130 4131 4132
  ROR_INTERSECT_INFO *intersect, *intersect_best;
  if (!(intersect= ror_intersect_init(param)) || 
      !(intersect_best= ror_intersect_init(param)))
4133
    return NULL;
4134

4135
  /* [intersect_scans,intersect_scans_best) will hold the best intersection */
4136
  ROR_SCAN_INFO **intersect_scans_best;
4137
  cur_ror_scan= tree->ror_scans;
4138
  intersect_scans_best= intersect_scans;
4139
  while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
4140
  {
4141
    /* S= S + first(R);  R= R - first(R); */
4142
    if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE))
4143 4144 4145 4146 4147 4148
    {
      cur_ror_scan++;
      continue;
    }
    
    *(intersect_scans_end++)= *(cur_ror_scan++);
4149

4150
    if (intersect->total_cost < min_cost)
4151
    {
4152
      /* Local minimum found, save it */
4153
      ror_intersect_cpy(intersect_best, intersect);
4154
      intersect_scans_best= intersect_scans_end;
4155
      min_cost = intersect->total_cost;
4156 4157
    }
  }
4158

4159 4160 4161 4162 4163 4164
  if (intersect_scans_best == intersect_scans)
  {
    DBUG_PRINT("info", ("None of scans increase selectivity"));
    DBUG_RETURN(NULL);
  }
    
4165 4166 4167 4168
  DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
                                          "best ROR-intersection",
                                          intersect_scans,
                                          intersect_scans_best););
4169

4170
  *are_all_covering= intersect->is_covering;
4171
  uint best_num= intersect_scans_best - intersect_scans;
4172 4173
  ror_intersect_cpy(intersect, intersect_best);

4174 4175
  /*
    Ok, found the best ROR-intersection of non-CPK key scans.
4176 4177
    Check if we should add a CPK scan. If the obtained ROR-intersection is 
    covering, it doesn't make sense to add CPK scan.
4178 4179
  */
  if (cpk_scan && !intersect->is_covering)
4180
  {
4181
    if (ror_intersect_add(intersect, cpk_scan, TRUE) && 
4182
        (intersect->total_cost < min_cost))
4183
    {
monty@mysql.com's avatar
monty@mysql.com committed
4184
      cpk_scan_used= TRUE;
4185
      intersect_best= intersect; //just set pointer here
4186 4187
    }
  }
4188

4189
  /* Ok, return ROR-intersect plan if we have found one */
4190
  TRP_ROR_INTERSECT *trp= NULL;
4191
  if (min_cost < read_time && (cpk_scan_used || best_num > 1))
4192
  {
4193 4194
    if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
      DBUG_RETURN(trp);
4195 4196
    if (!(trp->first_scan=
           (ROR_SCAN_INFO**)alloc_root(param->mem_root,
4197 4198 4199 4200
                                       sizeof(ROR_SCAN_INFO*)*best_num)))
      DBUG_RETURN(NULL);
    memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
    trp->last_scan=  trp->first_scan + best_num;
4201 4202 4203 4204 4205 4206
    trp->is_covering= intersect_best->is_covering;
    trp->read_cost= intersect_best->total_cost;
    /* Prevent divisons by zero */
    ha_rows best_rows = double2rows(intersect_best->out_rows);
    if (!best_rows)
      best_rows= 1;
4207
    trp->records= best_rows;
4208 4209 4210 4211 4212
    trp->index_scan_costs= intersect_best->index_scan_costs;
    trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
    DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
                        "cost %g, records %lu",
                        trp->read_cost, (ulong) trp->records));
4213
  }
4214
  DBUG_RETURN(trp);
4215 4216 4217 4218
}


/*
4219
  Get best covering ROR-intersection.
4220
  SYNOPSIS
4221
    get_best_covering_ror_intersect()
4222 4223 4224
      param     Parameter from test_quick_select function.
      tree      SEL_TREE with sets of intervals for different keys.
      read_time Don't return table read plans with cost > read_time.
4225

4226 4227
  RETURN
    Best covering ROR-intersection plan
4228
    NULL if no plan found.
4229 4230

  NOTES
4231
    get_best_ror_intersect must be called for a tree before calling this
4232
    function for it.
4233
    This function invalidates tree->ror_scans member values.
4234

4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247
  The following approximate algorithm is used:
    I=set of all covering indexes
    F=set of all fields to cover
    S={}

    do {
      Order I by (#covered fields in F desc,
                  #components asc,
                  number of first not covered component asc);
      F=F-covered by first(I);
      S=S+first(I);
      I=I-first(I);
    } while F is not empty.
4248 4249
*/

4250
static
4251 4252
TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
                                                   SEL_TREE *tree,
4253
                                                   double read_time)
4254
{
4255
  ROR_SCAN_INFO **ror_scan_mark;
4256
  ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;
4257 4258 4259 4260
  DBUG_ENTER("get_best_covering_ror_intersect");
  uint nbits= param->fields_bitmap_size*8;

  for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
4261
    (*scan)->key_components=
4262
      param->table->key_info[(*scan)->keynr].key_parts;
4263

4264 4265
  /*
    Run covering-ROR-search algorithm.
4266
    Assume set I is [ror_scan .. ror_scans_end)
4267
  */
4268

4269 4270
  /*I=set of all covering indexes */
  ror_scan_mark= tree->ror_scans;
4271

mronstrom@mysql.com's avatar
mronstrom@mysql.com committed
4272
  uint32 int_buf[MAX_KEY/32+1];
4273
  MY_BITMAP covered_fields;
4274
  if (bitmap_init(&covered_fields, int_buf, nbits, FALSE))
4275 4276 4277 4278 4279
    DBUG_RETURN(0);
  bitmap_clear_all(&covered_fields);

  double total_cost= 0.0f;
  ha_rows records=0;
4280 4281
  bool all_covered;

4282 4283 4284 4285 4286 4287
  DBUG_PRINT("info", ("Building covering ROR-intersection"));
  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                           "building covering ROR-I",
                                           ror_scan_mark, ror_scans_end););
  do {
    /*
4288
      Update changed sorting info:
4289
        #covered fields,
4290
	number of first not covered component
4291 4292 4293 4294 4295
      Calculate and save these values for each of remaining scans.
    */
    for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
    {
      bitmap_subtract(&(*scan)->covered_fields, &covered_fields);
4296
      (*scan)->used_fields_covered=
4297
        bitmap_bits_set(&(*scan)->covered_fields);
4298
      (*scan)->first_uncovered_field=
4299 4300 4301 4302 4303 4304 4305 4306 4307
        bitmap_get_first(&(*scan)->covered_fields);
    }

    qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
          (qsort_cmp)cmp_ror_scan_info_covering);

    DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                             "remaining scans",
                                             ror_scan_mark, ror_scans_end););
4308

4309 4310 4311
    /* I=I-first(I) */
    total_cost += (*ror_scan_mark)->index_read_cost;
    records += (*ror_scan_mark)->records;
4312
    DBUG_PRINT("info", ("Adding scan on %s",
4313 4314 4315 4316 4317 4318
                        param->table->key_info[(*ror_scan_mark)->keynr].name));
    if (total_cost > read_time)
      DBUG_RETURN(NULL);
    /* F=F-covered by first(I) */
    bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields);
    all_covered= bitmap_is_subset(&param->needed_fields, &covered_fields);
4319 4320 4321 4322
  } while ((++ror_scan_mark < ror_scans_end) && !all_covered);
  
  if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1)
    DBUG_RETURN(NULL);
4323 4324 4325 4326 4327 4328 4329 4330 4331

  /*
    Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
    cost total_cost.
  */
  DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                           "creating covering ROR-intersect",
                                           tree->ror_scans, ror_scan_mark););
4332

4333
  /* Add priority queue use cost. */
4334 4335
  total_cost += rows2double(records)*
                log((double)(ror_scan_mark - tree->ror_scans)) /
4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351
                (TIME_FOR_COMPARE_ROWID * M_LN2);
  DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));

  if (total_cost > read_time)
    DBUG_RETURN(NULL);

  TRP_ROR_INTERSECT *trp;
  if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
    DBUG_RETURN(trp);
  uint best_num= (ror_scan_mark - tree->ror_scans);
  if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
                                                     best_num)))
    DBUG_RETURN(NULL);
  memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*));
  trp->last_scan=  trp->first_scan + best_num;
monty@mysql.com's avatar
monty@mysql.com committed
4352
  trp->is_covering= TRUE;
4353 4354
  trp->read_cost= total_cost;
  trp->records= records;
4355
  trp->cpk_scan= NULL;
4356

4357 4358 4359
  DBUG_PRINT("info",
             ("Returning covering ROR-intersect plan: cost %g, records %lu",
              trp->read_cost, (ulong) trp->records));
4360
  DBUG_RETURN(trp);
4361 4362 4363
}


4364
/*
4365
  Get best "range" table read plan for given SEL_TREE.
4366
  Also update PARAM members and store ROR scans info in the SEL_TREE.
4367
  SYNOPSIS
4368
    get_key_scans_params
4369
      param        parameters from test_quick_select
4370
      tree         make range select for this SEL_TREE
monty@mysql.com's avatar
monty@mysql.com committed
4371
      index_read_must_be_used if TRUE, assume 'index only' option will be set
4372
                             (except for clustered PK indexes)
4373 4374
      read_time    don't create read plans with cost > read_time.
  RETURN
4375
    Best range read plan
4376
    NULL if no plan found or error occurred
4377 4378
*/

4379
static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
4380
                                       bool index_read_must_be_used,
4381
                                       double read_time)
4382 4383
{
  int idx;
4384 4385 4386
  SEL_ARG **key,**end, **key_to_read= NULL;
  ha_rows best_records;
  TRP_RANGE* read_plan= NULL;
4387
  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4388 4389
  DBUG_ENTER("get_key_scans_params");
  LINT_INIT(best_records); /* protected by key_to_read */
4390
  /*
4391 4392
    Note that there may be trees that have type SEL_TREE::KEY but contain no
    key reads at all, e.g. tree for expression "key1 is not null" where key1
4393
    is defined as "not null".
4394 4395
  */
  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
4396 4397 4398 4399
                                      "tree scans"););
  tree->ror_scans_map.clear_all();
  tree->n_ror_scans= 0;
  for (idx= 0,key=tree->keys, end=key+param->keys;
4400 4401 4402 4403 4404 4405 4406
       key != end ;
       key++,idx++)
  {
    ha_rows found_records;
    double found_read_time;
    if (*key)
    {
4407
      uint keynr= param->real_keynr[idx];
4408 4409
      if ((*key)->type == SEL_ARG::MAYBE_KEY ||
          (*key)->maybe_flag)
4410
        param->needed_reg->set_bit(keynr);
4411

monty@mysql.com's avatar
monty@mysql.com committed
4412 4413
      bool read_index_only= index_read_must_be_used ? TRUE :
                            (bool) param->table->used_keys.is_set(keynr);
4414

4415 4416 4417 4418 4419 4420
      found_records= check_quick_select(param, idx, *key);
      if (param->is_ror_scan)
      {
        tree->n_ror_scans++;
        tree->ror_scans_map.set_bit(idx);
      }
4421
      double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
4422
      if (found_records != HA_POS_ERROR && found_records > 2 &&
sergefp@mysql.com's avatar
sergefp@mysql.com committed
4423
          read_index_only &&
monty@mysql.com's avatar
monty@mysql.com committed
4424
          (param->table->file->index_flags(keynr, param->max_key_part,1) &
monty@mysql.com's avatar
monty@mysql.com committed
4425
           HA_KEYREAD_ONLY) &&
4426
          !(pk_is_clustered && keynr == param->table->s->primary_key))
4427 4428 4429 4430 4431
      {
        /*
          We can resolve this by only reading through this key. 
          0.01 is added to avoid races between range and 'index' scan.
        */
4432
        found_read_time= get_index_only_read_time(param,found_records,keynr) +
4433 4434
                         cpu_cost + 0.01;
      }
4435
      else
4436
      {
4437
        /*
4438 4439 4440
          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
        */
4441 4442 4443
	found_read_time= param->table->file->read_time(keynr,
                                                       param->range_count,
                                                       found_records) +
4444 4445
			 cpu_cost + 0.01;
      }
4446 4447 4448
      DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
                         param->table->key_info[keynr].name, found_read_time,
                         read_time));
4449

4450 4451
      if (read_time > found_read_time && found_records != HA_POS_ERROR
          /*|| read_time == DBL_MAX*/ )
4452
      {
4453
        read_time=    found_read_time;
4454
        best_records= found_records;
4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470
        key_to_read=  key;
      }

    }
  }

  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
                                      "ROR scans"););
  if (key_to_read)
  {
    idx= key_to_read - tree->keys;
    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
    {
      read_plan->records= best_records;
      read_plan->is_ror= tree->ror_scans_map.is_set(idx);
      read_plan->read_cost= read_time;
4471 4472 4473 4474
      DBUG_PRINT("info",
                 ("Returning range plan for key %s, cost %g, records %lu",
                  param->table->key_info[param->real_keynr[idx]].name,
                  read_plan->read_cost, (ulong) read_plan->records));
4475 4476 4477 4478 4479 4480 4481 4482 4483
    }
  }
  else
    DBUG_PRINT("info", ("No 'range' table read plan found"));

  DBUG_RETURN(read_plan);
}


4484
QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495
                                            bool retrieve_full_rows,
                                            MEM_ROOT *parent_alloc)
{
  QUICK_INDEX_MERGE_SELECT *quick_imerge;
  QUICK_RANGE_SELECT *quick;
  /* index_merge always retrieves full rows, ignore retrieve_full_rows */
  if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
    return NULL;

  quick_imerge->records= records;
  quick_imerge->read_time= read_cost;
4496 4497
  for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
       range_scan++)
4498 4499
  {
    if (!(quick= (QUICK_RANGE_SELECT*)
4500
          ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
4501 4502 4503 4504 4505 4506 4507 4508 4509 4510
        quick_imerge->push_quick_back(quick))
    {
      delete quick;
      delete quick_imerge;
      return NULL;
    }
  }
  return quick_imerge;
}

4511
QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
4512 4513 4514 4515 4516 4517 4518
                                              bool retrieve_full_rows,
                                              MEM_ROOT *parent_alloc)
{
  QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
  QUICK_RANGE_SELECT *quick;
  DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
  MEM_ROOT *alloc;
4519 4520

  if ((quick_intrsect=
4521
         new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
monty@mysql.com's avatar
monty@mysql.com committed
4522
                                        retrieve_full_rows? (!is_covering):FALSE,
4523 4524
                                        parent_alloc)))
  {
4525
    DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
4526 4527 4528
                                             "creating ROR-intersect",
                                             first_scan, last_scan););
    alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
4529
    for (; first_scan != last_scan;++first_scan)
4530 4531 4532 4533
    {
      if (!(quick= get_quick_select(param, (*first_scan)->idx,
                                    (*first_scan)->sel_arg, alloc)) ||
          quick_intrsect->push_quick_back(quick))
4534
      {
4535 4536
        delete quick_intrsect;
        DBUG_RETURN(NULL);
4537 4538
      }
    }
4539 4540 4541 4542
    if (cpk_scan)
    {
      if (!(quick= get_quick_select(param, cpk_scan->idx,
                                    cpk_scan->sel_arg, alloc)))
4543
      {
4544 4545
        delete quick_intrsect;
        DBUG_RETURN(NULL);
4546
      }
4547
      quick->file= NULL; 
4548
      quick_intrsect->cpk_quick= quick;
4549
    }
4550
    quick_intrsect->records= records;
4551
    quick_intrsect->read_time= read_cost;
4552
  }
4553 4554 4555
  DBUG_RETURN(quick_intrsect);
}

4556

4557
QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
4558 4559 4560 4561 4562 4563 4564
                                          bool retrieve_full_rows,
                                          MEM_ROOT *parent_alloc)
{
  QUICK_ROR_UNION_SELECT *quick_roru;
  TABLE_READ_PLAN **scan;
  QUICK_SELECT_I *quick;
  DBUG_ENTER("TRP_ROR_UNION::make_quick");
4565 4566
  /*
    It is impossible to construct a ROR-union that will not retrieve full
4567
    rows, ignore retrieve_full_rows parameter.
4568 4569 4570
  */
  if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
  {
4571
    for (scan= first_ror; scan != last_ror; scan++)
4572
    {
4573
      if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
4574 4575 4576 4577 4578
          quick_roru->push_quick_back(quick))
        DBUG_RETURN(NULL);
    }
    quick_roru->records= records;
    quick_roru->read_time= read_cost;
4579
  }
4580
  DBUG_RETURN(quick_roru);
4581 4582
}

4583

4584
/*
monty@mysql.com's avatar
monty@mysql.com committed
4585
  Build a SEL_TREE for <> or NOT BETWEEN predicate
4586 4587 4588 4589 4590 4591
 
  SYNOPSIS
    get_ne_mm_tree()
      param       PARAM from SQL_SELECT::test_quick_select
      cond_func   item for the predicate
      field       field in the predicate
monty@mysql.com's avatar
monty@mysql.com committed
4592 4593
      lt_value    constant that field should be smaller
      gt_value    constant that field should be greaterr
4594 4595 4596
      cmp_type    compare type for the field

  RETURN 
monty@mysql.com's avatar
monty@mysql.com committed
4597 4598
    #  Pointer to tree built tree
    0  on error
4599 4600
*/

4601
static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, 
monty@mysql.com's avatar
monty@mysql.com committed
4602 4603
                                Field *field,
                                Item *lt_value, Item *gt_value,
4604 4605
                                Item_result cmp_type)
{
monty@mysql.com's avatar
monty@mysql.com committed
4606
  SEL_TREE *tree;
4607
  tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
monty@mysql.com's avatar
monty@mysql.com committed
4608
                     lt_value, cmp_type);
4609 4610 4611 4612
  if (tree)
  {
    tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
					    Item_func::GT_FUNC,
monty@mysql.com's avatar
monty@mysql.com committed
4613
					    gt_value, cmp_type));
4614 4615 4616 4617 4618
  }
  return tree;
}
   

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4619 4620 4621 4622 4623 4624 4625 4626 4627 4628
/*
  Build a SEL_TREE for a simple predicate
 
  SYNOPSIS
    get_func_mm_tree()
      param       PARAM from SQL_SELECT::test_quick_select
      cond_func   item for the predicate
      field       field in the predicate
      value       constant in the predicate
      cmp_type    compare type for the field
4629
      inv         TRUE <> NOT cond_func is considered
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4630
                  (makes sense only when cond_func is BETWEEN or IN) 
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4631 4632

  RETURN 
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4633
    Pointer to the tree built tree
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4634 4635
*/

4636
static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, 
4637
                                  Field *field, Item *value,
4638
                                  Item_result cmp_type, bool inv)
4639 4640 4641 4642
{
  SEL_TREE *tree= 0;
  DBUG_ENTER("get_func_mm_tree");

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4643
  switch (cond_func->functype()) {
4644

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4645
  case Item_func::NE_FUNC:
monty@mysql.com's avatar
monty@mysql.com committed
4646
    tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4647
    break;
4648

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4649
  case Item_func::BETWEEN:
4650
    if (inv)
4651
    {
monty@mysql.com's avatar
monty@mysql.com committed
4652 4653
      tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1],
                           cond_func->arguments()[2], cmp_type);
4654 4655
    }
    else
4656
    {
4657 4658 4659 4660 4661 4662 4663 4664 4665
      tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
		         cond_func->arguments()[1],cmp_type);
      if (tree)
      {
        tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
					         Item_func::LE_FUNC,
					         cond_func->arguments()[2],
                                                 cmp_type));
      }
4666
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4667
    break;
4668

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4669
  case Item_func::IN_FUNC:
4670 4671
  {
    Item_func_in *func=(Item_func_in*) cond_func;
4672 4673

    if (inv)
4674
    {
4675
      tree= get_ne_mm_tree(param, cond_func, field,
monty@mysql.com's avatar
monty@mysql.com committed
4676 4677
                           func->arguments()[1], func->arguments()[1],
                           cmp_type);
4678
      if (tree)
4679
      {
4680 4681 4682 4683 4684
        Item **arg, **end;
        for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
             arg < end ; arg++)
        {
          tree=  tree_and(param, tree, get_ne_mm_tree(param, cond_func, field, 
monty@mysql.com's avatar
monty@mysql.com committed
4685
                                                      *arg, *arg, cmp_type));
4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702
        }
      }
    }
    else
    {    
      tree= get_mm_parts(param, cond_func, field, Item_func::EQ_FUNC,
                         func->arguments()[1], cmp_type);
      if (tree)
      {
        Item **arg, **end;
        for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
             arg < end ; arg++)
        {
          tree= tree_or(param, tree, get_mm_parts(param, cond_func, field, 
                                                  Item_func::EQ_FUNC,
                                                  *arg, cmp_type));
        }
4703 4704
      }
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4705
    break;
4706
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4707
  default: 
4708
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4709 4710 4711 4712 4713 4714 4715
    /* 
       Here the function for the following predicates are processed:
       <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL.
       If the predicate is of the form (value op field) it is handled
       as the equivalent predicate (field rev_op value), e.g.
       2 <= a is handled as a >= 2.
    */
4716 4717 4718
    Item_func::Functype func_type=
      (value != cond_func->arguments()[0]) ? cond_func->functype() :
        ((Item_bool_func2*) cond_func)->rev_functype();
4719
    tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
4720
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4721 4722
  }

4723
  DBUG_RETURN(tree);
4724

4725 4726
}

bk@work.mysql.com's avatar
bk@work.mysql.com committed
4727 4728
	/* make a select tree of all keys in condition */

4729
static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4730 4731
{
  SEL_TREE *tree=0;
4732 4733
  SEL_TREE *ftree= 0;
  Item_field *field_item= 0;
4734
  bool inv= FALSE;
4735
  Item *value;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748
  DBUG_ENTER("get_mm_tree");

  if (cond->type() == Item::COND_ITEM)
  {
    List_iterator<Item> li(*((Item_cond*) cond)->argument_list());

    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
    {
      tree=0;
      Item *item;
      while ((item=li++))
      {
	SEL_TREE *new_tree=get_mm_tree(param,item);
4749
	if (param->thd->is_fatal_error)
4750
	  DBUG_RETURN(0);	// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765
	tree=tree_and(param,tree,new_tree);
	if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
	  break;
      }
    }
    else
    {						// COND OR
      tree=get_mm_tree(param,li++);
      if (tree)
      {
	Item *item;
	while ((item=li++))
	{
	  SEL_TREE *new_tree=get_mm_tree(param,item);
	  if (!new_tree)
4766
	    DBUG_RETURN(0);	// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781
	  tree=tree_or(param,tree,new_tree);
	  if (!tree || tree->type == SEL_TREE::ALWAYS)
	    break;
	}
      }
    }
    DBUG_RETURN(tree);
  }
  /* Here when simple cond */
  if (cond->const_item())
  {
    if (cond->val_int())
      DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
    DBUG_RETURN(new SEL_TREE(SEL_TREE::IMPOSSIBLE));
  }
4782

4783 4784 4785
  table_map ref_tables= 0;
  table_map param_comp= ~(param->prev_tables | param->read_tables |
		          param->current_table);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4786 4787
  if (cond->type() != Item::FUNC_ITEM)
  {						// Should be a field
4788
    ref_tables= cond->used_tables();
4789 4790
    if ((ref_tables & param->current_table) ||
	(ref_tables & ~(param->prev_tables | param->read_tables)))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4791 4792 4793
      DBUG_RETURN(0);
    DBUG_RETURN(new SEL_TREE(SEL_TREE::MAYBE));
  }
4794

bk@work.mysql.com's avatar
bk@work.mysql.com committed
4795
  Item_func *cond_func= (Item_func*) cond;
4796 4797 4798
  if (cond_func->functype() == Item_func::BETWEEN ||
      cond_func->functype() == Item_func::IN_FUNC)
    inv= ((Item_func_opt_neg *) cond_func)->negated;
4799
  else if (cond_func->select_optimize() == Item_func::OPTIMIZE_NONE)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4800
    DBUG_RETURN(0);			       
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4801

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4802 4803
  param->cond= cond;

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4804 4805
  switch (cond_func->functype()) {
  case Item_func::BETWEEN:
4806
    if (cond_func->arguments()[0]->real_item()->type() != Item::FIELD_ITEM)
4807
      DBUG_RETURN(0);
4808
    field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4809 4810 4811
    value= NULL;
    break;
  case Item_func::IN_FUNC:
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4812 4813
  {
    Item_func_in *func=(Item_func_in*) cond_func;
4814
    if (func->key_item()->real_item()->type() != Item::FIELD_ITEM)
4815
      DBUG_RETURN(0);
4816
    field_item= (Item_field*) (func->key_item()->real_item());
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4817 4818
    value= NULL;
    break;
4819
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4820
  case Item_func::MULT_EQUAL_FUNC:
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4821
  {
4822 4823
    Item_equal *item_equal= (Item_equal *) cond;    
    if (!(value= item_equal->get_const()))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4824 4825 4826 4827
      DBUG_RETURN(0);
    Item_equal_iterator it(*item_equal);
    ref_tables= value->used_tables();
    while ((field_item= it++))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4828
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4829 4830 4831
      Field *field= field_item->field;
      Item_result cmp_type= field->cmp_type();
      if (!((ref_tables | field->table->map) & param_comp))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4832
      {
4833
        tree= get_mm_parts(param, cond, field, Item_func::EQ_FUNC,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4834 4835
		           value,cmp_type);
        ftree= !ftree ? tree : tree_and(param, ftree, tree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4836 4837
      }
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4838
    
4839
    DBUG_RETURN(ftree);
4840 4841
  }
  default:
4842
    if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4843
    {
4844
      field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
4845
      value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4846
    }
4847
    else if (cond_func->have_rev_func() &&
4848 4849
             cond_func->arguments()[1]->real_item()->type() ==
                                                            Item::FIELD_ITEM)
4850
    {
4851
      field_item= (Item_field*) (cond_func->arguments()[1]->real_item());
4852 4853 4854 4855
      value= cond_func->arguments()[0];
    }
    else
      DBUG_RETURN(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4856
  }
4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871

  /* 
     If the where condition contains a predicate (ti.field op const),
     then not only SELL_TREE for this predicate is built, but
     the trees for the results of substitution of ti.field for
     each tj.field belonging to the same multiple equality as ti.field
     are built as well.
     E.g. for WHERE t1.a=t2.a AND t2.a > 10 
     a SEL_TREE for t2.a > 10 will be built for quick select from t2
     and   
     a SEL_TREE for t1.a > 10 will be built for quick select from t1.
  */
     
  for (uint i= 0; i < cond_func->arg_count; i++)
  {
4872
    Item *arg= cond_func->arguments()[i]->real_item();
4873 4874 4875 4876 4877 4878
    if (arg != field_item)
      ref_tables|= arg->used_tables();
  }
  Field *field= field_item->field;
  Item_result cmp_type= field->cmp_type();
  if (!((ref_tables | field->table->map) & param_comp))
4879
    ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv);
4880 4881 4882 4883 4884 4885
  Item_equal *item_equal= field_item->item_equal;
  if (item_equal)
  {
    Item_equal_iterator it(*item_equal);
    Item_field *item;
    while ((item= it++))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4886
    {
4887 4888 4889 4890
      Field *f= item->field;
      if (field->eq(f))
        continue;
      if (!((ref_tables | f->table->map) & param_comp))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4891
      {
4892
        tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv);
4893
        ftree= !ftree ? tree : tree_and(param, ftree, tree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4894 4895 4896
      }
    }
  }
4897
  DBUG_RETURN(ftree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4898 4899 4900 4901
}


static SEL_TREE *
4902
get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field,
4903
	     Item_func::Functype type,
4904
	     Item *value, Item_result cmp_type)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4905 4906 4907 4908 4909
{
  DBUG_ENTER("get_mm_parts");
  if (field->table != param->table)
    DBUG_RETURN(0);

4910 4911
  KEY_PART *key_part = param->key_parts;
  KEY_PART *end = param->key_parts_end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4912 4913 4914 4915
  SEL_TREE *tree=0;
  if (value &&
      value->used_tables() & ~(param->prev_tables | param->read_tables))
    DBUG_RETURN(0);
4916
  for (; key_part != end ; key_part++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4917 4918 4919 4920
  {
    if (field->eq(key_part->field))
    {
      SEL_ARG *sel_arg=0;
4921
      if (!tree && !(tree=new SEL_TREE()))
4922
	DBUG_RETURN(0);				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4923 4924
      if (!value || !(value->used_tables() & ~param->read_tables))
      {
4925 4926
	sel_arg=get_mm_leaf(param,cond_func,
			    key_part->field,key_part,type,value);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4927 4928 4929 4930 4931 4932 4933 4934
	if (!sel_arg)
	  continue;
	if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
	{
	  tree->type=SEL_TREE::IMPOSSIBLE;
	  DBUG_RETURN(tree);
	}
      }
4935 4936
      else
      {
4937
	// This key may be used later
4938
	if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
4939
	  DBUG_RETURN(0);			// OOM
4940
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4941 4942
      sel_arg->part=(uchar) key_part->part;
      tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
4943
      tree->keys_map.set_bit(key_part->key);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4944 4945
    }
  }
4946

bk@work.mysql.com's avatar
bk@work.mysql.com committed
4947 4948 4949 4950 4951
  DBUG_RETURN(tree);
}


static SEL_ARG *
4952
get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4953 4954
	    Item_func::Functype type,Item *value)
{
4955
  uint maybe_null=(uint) field->real_maybe_null();
4956
  bool optimize_range;
4957 4958
  SEL_ARG *tree= 0;
  MEM_ROOT *alloc= param->mem_root;
4959
  char *str;
evgen@moonbone.local's avatar
evgen@moonbone.local committed
4960
  ulong orig_sql_mode;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4961 4962
  DBUG_ENTER("get_mm_leaf");

4963 4964
  /*
    We need to restore the runtime mem_root of the thread in this
konstantin@mysql.com's avatar
konstantin@mysql.com committed
4965
    function because it evaluates the value of its argument, while
4966 4967 4968 4969 4970 4971
    the argument can be any, e.g. a subselect. The subselect
    items, in turn, assume that all the memory allocated during
    the evaluation has the same life span as the item itself.
    TODO: opt_range.cc should not reset thd->mem_root at all.
  */
  param->thd->mem_root= param->old_root;
4972 4973
  if (!value)					// IS NULL or IS NOT NULL
  {
4974
    if (field->table->maybe_null)		// Can't use a key on this
4975
      goto end;
4976
    if (!maybe_null)				// Not null field
4977 4978 4979 4980 4981 4982 4983
    {
      if (type == Item_func::ISNULL_FUNC)
        tree= &null_element;
      goto end;
    }
    if (!(tree= new (alloc) SEL_ARG(field,is_null_string,is_null_string)))
      goto end;                                 // out of memory
4984 4985 4986 4987 4988
    if (type == Item_func::ISNOTNULL_FUNC)
    {
      tree->min_flag=NEAR_MIN;		    /* IS NOT NULL ->  X > NULL */
      tree->max_flag=NO_MAX_RANGE;
    }
4989
    goto end;
4990 4991 4992
  }

  /*
4993 4994 4995 4996 4997 4998 4999 5000 5001 5002
    1. Usually we can't use an index if the column collation
       differ from the operation collation.

    2. However, we can reuse a case insensitive index for
       the binary searches:

       WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;

       WHERE latin1_swedish_ci_colimn = BINARY 'a '

5003 5004 5005 5006
  */
  if (field->result_type() == STRING_RESULT &&
      value->result_type() == STRING_RESULT &&
      key_part->image_type == Field::itRAW &&
5007 5008
      ((Field_str*)field)->charset() != conf_func->compare_collation() &&
      !(conf_func->compare_collation()->state & MY_CS_BINSORT))
5009
    goto end;
5010

5011 5012 5013 5014 5015
  if (param->using_real_indexes)
    optimize_range= field->optimize_range(param->real_keynr[key_part->key],
                                          key_part->part);
  else
    optimize_range= TRUE;
5016

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5017 5018 5019 5020
  if (type == Item_func::LIKE_FUNC)
  {
    bool like_error;
    char buff1[MAX_FIELD_WIDTH],*min_str,*max_str;
5021
    String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5022
    uint length,offset,min_length,max_length;
5023
    uint field_length= field->pack_length()+maybe_null;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5024

5025
    if (!optimize_range)
5026
      goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5027
    if (!(res= value->val_str(&tmp)))
5028 5029 5030 5031
    {
      tree= &null_element;
      goto end;
    }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5032

5033 5034 5035 5036 5037
    /*
      TODO:
      Check if this was a function. This should have be optimized away
      in the sql_select.cc
    */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5038 5039 5040 5041 5042 5043
    if (res != &tmp)
    {
      tmp.copy(*res);				// Get own copy
      res= &tmp;
    }
    if (field->cmp_type() != STRING_RESULT)
5044
      goto end;                                 // Can only optimize strings
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5045 5046

    offset=maybe_null;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5047 5048 5049
    length=key_part->store_length;

    if (length != key_part->length  + maybe_null)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5050
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5051 5052 5053
      /* key packed with length prefix */
      offset+= HA_KEY_BLOB_LENGTH;
      field_length= length - HA_KEY_BLOB_LENGTH;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5054 5055 5056
    }
    else
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5057 5058 5059 5060 5061 5062 5063 5064
      if (unlikely(length < field_length))
      {
	/*
	  This can only happen in a table created with UNIREG where one key
	  overlaps many fields
	*/
	length= field_length;
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5065
      else
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5066
	field_length= length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5067 5068
    }
    length+=offset;
5069 5070
    if (!(min_str= (char*) alloc_root(alloc, length*2)))
      goto end;
5071

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5072 5073 5074
    max_str=min_str+length;
    if (maybe_null)
      max_str[0]= min_str[0]=0;
5075

5076
    field_length-= maybe_null;
5077
    like_error= my_like_range(field->charset(),
monty@mysql.com's avatar
monty@mysql.com committed
5078
			      res->ptr(), res->length(),
monty@mysql.com's avatar
monty@mysql.com committed
5079 5080
			      ((Item_func_like*)(param->cond))->escape,
			      wild_one, wild_many,
5081
			      field_length,
monty@mysql.com's avatar
monty@mysql.com committed
5082 5083
			      min_str+offset, max_str+offset,
			      &min_length, &max_length);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5084
    if (like_error)				// Can't optimize with LIKE
5085
      goto end;
monty@mysql.com's avatar
monty@mysql.com committed
5086

5087
    if (offset != maybe_null)			// BLOB or VARCHAR
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5088 5089 5090 5091
    {
      int2store(min_str+maybe_null,min_length);
      int2store(max_str+maybe_null,max_length);
    }
5092 5093
    tree= new (alloc) SEL_ARG(field, min_str, max_str);
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5094 5095
  }

5096
  if (!optimize_range &&
5097
      type != Item_func::EQ_FUNC &&
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5098
      type != Item_func::EQUAL_FUNC)
5099
    goto end;                                   // Can't optimize this
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5100

5101 5102 5103 5104
  /*
    We can't always use indexes when comparing a string index to a number
    cmp_type() is checked to allow compare of dates to numbers
  */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5105 5106 5107
  if (field->result_type() == STRING_RESULT &&
      value->result_type() != STRING_RESULT &&
      field->cmp_type() != value->result_type())
5108
    goto end;
5109
  /* For comparison purposes allow invalid dates like 2000-01-32 */
evgen@moonbone.local's avatar
evgen@moonbone.local committed
5110
  orig_sql_mode= field->table->in_use->variables.sql_mode;
5111 5112 5113 5114
  if (value->real_item()->type() == Item::STRING_ITEM &&
      (field->type() == FIELD_TYPE_DATE ||
       field->type() == FIELD_TYPE_DATETIME))
    field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
5115
  if (value->save_in_field_no_warnings(field, 1) < 0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5116
  {
5117
    field->table->in_use->variables.sql_mode= orig_sql_mode;
5118
    /* This happens when we try to insert a NULL field in a not null column */
5119 5120
    tree= &null_element;                        // cmp with NULL is never TRUE
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5121
  }
5122
  field->table->in_use->variables.sql_mode= orig_sql_mode;
5123
  str= (char*) alloc_root(alloc, key_part->store_length+1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5124
  if (!str)
5125
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5126
  if (maybe_null)
5127
    *str= (char) field->is_real_null();		// Set to 1 if null
5128
  field->get_key_image(str+maybe_null, key_part->length, key_part->image_type);
5129 5130
  if (!(tree= new (alloc) SEL_ARG(field, str, str)))
    goto end;                                   // out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5131

timour@mysql.com's avatar
timour@mysql.com committed
5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142
  /*
    Check if we are comparing an UNSIGNED integer with a negative constant.
    In this case we know that:
    (a) (unsigned_int [< | <=] negative_constant) == FALSE
    (b) (unsigned_int [> | >=] negative_constant) == TRUE
    In case (a) the condition is false for all values, and in case (b) it
    is true for all values, so we can avoid unnecessary retrieval and condition
    testing, and we also get correct comparison of unsinged integers with
    negative integers (which otherwise fails because at query execution time
    negative integers are cast to unsigned if compared with unsigned).
   */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
5143 5144
  if (field->result_type() == INT_RESULT &&
      value->result_type() == INT_RESULT &&
timour@mysql.com's avatar
timour@mysql.com committed
5145 5146 5147 5148 5149 5150 5151 5152
      ((Field_num*)field)->unsigned_flag && !((Item_int*)value)->unsigned_flag)
  {
    longlong item_val= value->val_int();
    if (item_val < 0)
    {
      if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
      {
        tree->type= SEL_ARG::IMPOSSIBLE;
5153
        goto end;
timour@mysql.com's avatar
timour@mysql.com committed
5154 5155
      }
      if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
5156 5157 5158 5159
      {
        tree= 0;
        goto end;
      }
timour@mysql.com's avatar
timour@mysql.com committed
5160 5161
    }
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183

  switch (type) {
  case Item_func::LT_FUNC:
    if (field_is_equal_to_item(field,value))
      tree->max_flag=NEAR_MAX;
    /* fall through */
  case Item_func::LE_FUNC:
    if (!maybe_null)
      tree->min_flag=NO_MIN_RANGE;		/* From start */
    else
    {						// > NULL
      tree->min_value=is_null_string;
      tree->min_flag=NEAR_MIN;
    }
    break;
  case Item_func::GT_FUNC:
    if (field_is_equal_to_item(field,value))
      tree->min_flag=NEAR_MIN;
    /* fall through */
  case Item_func::GE_FUNC:
    tree->max_flag=NO_MAX_RANGE;
    break;
5184
  case Item_func::SP_EQUALS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5185 5186 5187
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5188
  case Item_func::SP_DISJOINT_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5189 5190 5191
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5192
  case Item_func::SP_INTERSECTS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5193 5194 5195
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5196
  case Item_func::SP_TOUCHES_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5197 5198 5199
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5200 5201

  case Item_func::SP_CROSSES_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5202 5203 5204
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5205
  case Item_func::SP_WITHIN_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5206 5207 5208
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5209 5210

  case Item_func::SP_CONTAINS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5211 5212 5213
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5214
  case Item_func::SP_OVERLAPS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5215 5216 5217
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5218

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5219 5220 5221
  default:
    break;
  }
5222 5223 5224

end:
  param->thd->mem_root= alloc;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5225 5226 5227 5228 5229 5230 5231 5232 5233
  DBUG_RETURN(tree);
}


/******************************************************************************
** Tree manipulation functions
** If tree is 0 it means that the condition can't be tested. It refers
** to a non existent table or to a field in current table with isn't a key.
** The different tree flags:
monty@mysql.com's avatar
monty@mysql.com committed
5234 5235
** IMPOSSIBLE:	 Condition is never TRUE
** ALWAYS:	 Condition is always TRUE
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5236 5237 5238 5239 5240 5241
** MAYBE:	 Condition may exists when tables are read
** MAYBE_KEY:	 Condition refers to a key that may be used in join loop
** KEY_RANGE:	 Condition uses a key
******************************************************************************/

/*
5242 5243
  Add a new key test to a key when scanning through all keys
  This will never be called for same key parts.
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281
*/

static SEL_ARG *
sel_add(SEL_ARG *key1,SEL_ARG *key2)
{
  SEL_ARG *root,**key_link;

  if (!key1)
    return key2;
  if (!key2)
    return key1;

  key_link= &root;
  while (key1 && key2)
  {
    if (key1->part < key2->part)
    {
      *key_link= key1;
      key_link= &key1->next_key_part;
      key1=key1->next_key_part;
    }
    else
    {
      *key_link= key2;
      key_link= &key2->next_key_part;
      key2=key2->next_key_part;
    }
  }
  *key_link=key1 ? key1 : key2;
  return root;
}

#define CLONE_KEY1_MAYBE 1
#define CLONE_KEY2_MAYBE 2
#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)


static SEL_TREE *
5282
tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304
{
  DBUG_ENTER("tree_and");
  if (!tree1)
    DBUG_RETURN(tree2);
  if (!tree2)
    DBUG_RETURN(tree1);
  if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree1);
  if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree2);
  if (tree1->type == SEL_TREE::MAYBE)
  {
    if (tree2->type == SEL_TREE::KEY)
      tree2->type=SEL_TREE::KEY_SMALLER;
    DBUG_RETURN(tree2);
  }
  if (tree2->type == SEL_TREE::MAYBE)
  {
    tree1->type=SEL_TREE::KEY_SMALLER;
    DBUG_RETURN(tree1);
  }

sergefp@mysql.com's avatar
sergefp@mysql.com committed
5305 5306
  key_map  result_keys;
  result_keys.clear_all();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319
  /* Join the trees key per key */
  SEL_ARG **key1,**key2,**end;
  for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
       key1 != end ; key1++,key2++)
  {
    uint flag=0;
    if (*key1 || *key2)
    {
      if (*key1 && !(*key1)->simple_key())
	flag|=CLONE_KEY1_MAYBE;
      if (*key2 && !(*key2)->simple_key())
	flag|=CLONE_KEY2_MAYBE;
      *key1=key_and(*key1,*key2,flag);
5320
      if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5321 5322
      {
	tree1->type= SEL_TREE::IMPOSSIBLE;
5323
        DBUG_RETURN(tree1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5324
      }
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5325
      result_keys.set_bit(key1 - tree1->keys);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5326
#ifdef EXTRA_DEBUG
5327 5328
      if (*key1)
        (*key1)->test_use_count(*key1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5329 5330 5331
#endif
    }
  }
5332 5333
  tree1->keys_map= result_keys;
  /* dispose index_merge if there is a "range" option */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5334
  if (!result_keys.is_clear_all())
5335 5336 5337 5338 5339 5340 5341
  {
    tree1->merges.empty();
    DBUG_RETURN(tree1);
  }

  /* ok, both trees are index_merge trees */
  imerge_list_and_list(&tree1->merges, &tree2->merges);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5342 5343 5344 5345
  DBUG_RETURN(tree1);
}


5346
/*
5347 5348
  Check if two SEL_TREES can be combined into one (i.e. a single key range
  read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
5349
  using index_merge.
5350 5351
*/

5352 5353
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, 
                           RANGE_OPT_PARAM* param)
5354
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5355
  key_map common_keys= tree1->keys_map;
5356
  DBUG_ENTER("sel_trees_can_be_ored");
5357
  common_keys.intersect(tree2->keys_map);
5358

sergefp@mysql.com's avatar
sergefp@mysql.com committed
5359
  if (common_keys.is_clear_all())
monty@mysql.com's avatar
monty@mysql.com committed
5360
    DBUG_RETURN(FALSE);
5361 5362

  /* trees have a common key, check if they refer to same key part */
5363
  SEL_ARG **key1,**key2;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5364
  for (uint key_no=0; key_no < param->keys; key_no++)
5365
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5366
    if (common_keys.is_set(key_no))
5367 5368 5369 5370 5371
    {
      key1= tree1->keys + key_no;
      key2= tree2->keys + key_no;
      if ((*key1)->part == (*key2)->part)
      {
monty@mysql.com's avatar
monty@mysql.com committed
5372
        DBUG_RETURN(TRUE);
5373 5374 5375
      }
    }
  }
monty@mysql.com's avatar
monty@mysql.com committed
5376
  DBUG_RETURN(FALSE);
5377
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5378

5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442

/*
  Remove the trees that are not suitable for record retrieval.
  SYNOPSIS
    param  Range analysis parameter
    tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
 
  DESCRIPTION
    This function walks through tree->keys[] and removes the SEL_ARG* trees
    that are not "maybe" trees (*) and cannot be used to construct quick range
    selects.
    (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
          these types here as well.

    A SEL_ARG* tree cannot be used to construct quick select if it has
    tree->part != 0. (e.g. it could represent "keypart2 < const").

    WHY THIS FUNCTION IS NEEDED
    
    Normally we allow construction of SEL_TREE objects that have SEL_ARG
    trees that do not allow quick range select construction. For example for
    " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
    tree1= SEL_TREE { SEL_ARG{keypart1=1} }
    tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
                                               from this
    call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
                                   tree.
    
    There is an exception though: when we construct index_merge SEL_TREE,
    any SEL_ARG* tree that cannot be used to construct quick range select can
    be removed, because current range analysis code doesn't provide any way
    that tree could be later combined with another tree.
    Consider an example: we should not construct
    st1 = SEL_TREE { 
      merges = SEL_IMERGE { 
                            SEL_TREE(t.key1part1 = 1), 
                            SEL_TREE(t.key2part2 = 2)   -- (*)
                          } 
                   };
    because 
     - (*) cannot be used to construct quick range select, 
     - There is no execution path that would cause (*) to be converted to 
       a tree that could be used.

    The latter is easy to verify: first, notice that the only way to convert
    (*) into a usable tree is to call tree_and(something, (*)).

    Second look at what tree_and/tree_or function would do when passed a
    SEL_TREE that has the structure like st1 tree has, and conlcude that 
    tree_and(something, (*)) will not be called.

  RETURN
    0  Ok, some suitable trees left
    1  No tree->keys[] left.
*/

static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
{
  bool res= FALSE;
  for (uint i=0; i < param->keys; i++)
  {
    if (tree->keys[i])
    {
      if (tree->keys[i]->part)
5443
      {
5444
        tree->keys[i]= NULL;
5445 5446
        tree->keys_map.clear_bit(i);
      }
5447 5448 5449 5450
      else
        res= TRUE;
    }
  }
5451
  return !res;
5452 5453 5454
}


bk@work.mysql.com's avatar
bk@work.mysql.com committed
5455
static SEL_TREE *
5456
tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469
{
  DBUG_ENTER("tree_or");
  if (!tree1 || !tree2)
    DBUG_RETURN(0);
  if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree2);
  if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree1);
  if (tree1->type == SEL_TREE::MAYBE)
    DBUG_RETURN(tree1);				// Can't use this
  if (tree2->type == SEL_TREE::MAYBE)
    DBUG_RETURN(tree2);

5470
  SEL_TREE *result= 0;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5471 5472
  key_map  result_keys;
  result_keys.clear_all();
5473
  if (sel_trees_can_be_ored(tree1, tree2, param))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5474
  {
5475 5476 5477 5478
    /* Join the trees key per key */
    SEL_ARG **key1,**key2,**end;
    for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
         key1 != end ; key1++,key2++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5479
    {
5480 5481 5482 5483
      *key1=key_or(*key1,*key2);
      if (*key1)
      {
        result=tree1;				// Added to tree1
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5484
        result_keys.set_bit(key1 - tree1->keys);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5485
#ifdef EXTRA_DEBUG
5486
        (*key1)->test_use_count(*key1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5487
#endif
5488 5489 5490 5491 5492 5493 5494 5495 5496 5497
      }
    }
    if (result)
      result->keys_map= result_keys;
  }
  else
  {
    /* ok, two trees have KEY type but cannot be used without index merge */
    if (tree1->merges.is_empty() && tree2->merges.is_empty())
    {
5498 5499 5500 5501 5502 5503 5504
      if (param->remove_jump_scans)
      {
        bool no_trees= remove_nonrange_trees(param, tree1);
        no_trees= no_trees || remove_nonrange_trees(param, tree2);
        if (no_trees)
          DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
      }
5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525
      SEL_IMERGE *merge;
      /* both trees are "range" trees, produce new index merge structure */
      if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
          (result->merges.push_back(merge)) ||
          (merge->or_sel_tree(param, tree1)) ||
          (merge->or_sel_tree(param, tree2)))
        result= NULL;
      else
        result->type= tree1->type;
    }
    else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
    {
      if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
        result= new SEL_TREE(SEL_TREE::ALWAYS);
      else
        result= tree1;
    }
    else
    {
      /* one tree is index merge tree and another is range tree */
      if (tree1->merges.is_empty())
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5526
        swap_variables(SEL_TREE*, tree1, tree2);
5527 5528 5529
      
      if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
         DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
5530 5531 5532 5533 5534
      /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
      if (imerge_list_or_tree(param, &tree1->merges, tree2))
        result= new SEL_TREE(SEL_TREE::ALWAYS);
      else
        result= tree1;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555
    }
  }
  DBUG_RETURN(result);
}


/* And key trees where key1->part < key2 -> part */

static SEL_ARG *
and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
{
  SEL_ARG *next;
  ulong use_count=key1->use_count;

  if (key1->elements != 1)
  {
    key2->use_count+=key1->elements-1;
    key2->increment_use_count((int) key1->elements-1);
  }
  if (key1->type == SEL_ARG::MAYBE_KEY)
  {
5556 5557
    key1->right= key1->left= &null_element;
    key1->next= key1->prev= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593
  }
  for (next=key1->first(); next ; next=next->next)
  {
    if (next->next_key_part)
    {
      SEL_ARG *tmp=key_and(next->next_key_part,key2,clone_flag);
      if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
      {
	key1=key1->tree_delete(next);
	continue;
      }
      next->next_key_part=tmp;
      if (use_count)
	next->increment_use_count(use_count);
    }
    else
      next->next_key_part=key2;
  }
  if (!key1)
    return &null_element;			// Impossible ranges
  key1->use_count++;
  return key1;
}


static SEL_ARG *
key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
{
  if (!key1)
    return key2;
  if (!key2)
    return key1;
  if (key1->part != key2->part)
  {
    if (key1->part > key2->part)
    {
5594
      swap_variables(SEL_ARG *, key1, key2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5595 5596 5597 5598 5599
      clone_flag=swap_clone_flag(clone_flag);
    }
    // key1->part < key2->part
    key1->use_count--;
    if (key1->use_count > 0)
5600 5601
      if (!(key1= key1->clone_tree()))
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5602 5603 5604 5605
    return and_all_keys(key1,key2,clone_flag);
  }

  if (((clone_flag & CLONE_KEY2_MAYBE) &&
5606 5607
       !(clone_flag & CLONE_KEY1_MAYBE) &&
       key2->type != SEL_ARG::MAYBE_KEY) ||
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5608 5609
      key1->type == SEL_ARG::MAYBE_KEY)
  {						// Put simple key in key2
5610
    swap_variables(SEL_ARG *, key1, key2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5611 5612 5613
    clone_flag=swap_clone_flag(clone_flag);
  }

5614
  /* If one of the key is MAYBE_KEY then the found region may be smaller */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5615 5616 5617 5618 5619
  if (key2->type == SEL_ARG::MAYBE_KEY)
  {
    if (key1->use_count > 1)
    {
      key1->use_count--;
5620 5621
      if (!(key1=key1->clone_tree()))
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635
      key1->use_count++;
    }
    if (key1->type == SEL_ARG::MAYBE_KEY)
    {						// Both are maybe key
      key1->next_key_part=key_and(key1->next_key_part,key2->next_key_part,
				 clone_flag);
      if (key1->next_key_part &&
	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
	return key1;
    }
    else
    {
      key1->maybe_smaller();
      if (key2->next_key_part)
5636 5637
      {
	key1->use_count--;			// Incremented in and_all_keys
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5638
	return and_all_keys(key1,key2,clone_flag);
5639
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5640 5641 5642 5643 5644
      key2->use_count--;			// Key2 doesn't have a tree
    }
    return key1;
  }

5645 5646 5647 5648 5649 5650 5651
  if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
  {
    key1->free_tree();
    key2->free_tree();
    return 0;					// Can't optimize this
  }

5652 5653 5654
  if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
  {
    key1->free_tree();
5655 5656 5657 5658
    key2->free_tree();
    return 0;					// Can't optimize this
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678
  key1->use_count--;
  key2->use_count--;
  SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;

  while (e1 && e2)
  {
    int cmp=e1->cmp_min_to_min(e2);
    if (cmp < 0)
    {
      if (get_range(&e1,&e2,key1))
	continue;
    }
    else if (get_range(&e2,&e1,key2))
      continue;
    SEL_ARG *next=key_and(e1->next_key_part,e2->next_key_part,clone_flag);
    e1->increment_use_count(1);
    e2->increment_use_count(1);
    if (!next || next->type != SEL_ARG::IMPOSSIBLE)
    {
      SEL_ARG *new_arg= e1->clone_and(e2);
5679 5680
      if (!new_arg)
	return &null_element;			// End of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731
      new_arg->next_key_part=next;
      if (!new_tree)
      {
	new_tree=new_arg;
      }
      else
	new_tree=new_tree->insert(new_arg);
    }
    if (e1->cmp_max_to_max(e2) < 0)
      e1=e1->next;				// e1 can't overlapp next e2
    else
      e2=e2->next;
  }
  key1->free_tree();
  key2->free_tree();
  if (!new_tree)
    return &null_element;			// Impossible range
  return new_tree;
}


static bool
get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
{
  (*e1)=root1->find_range(*e2);			// first e1->min < e2->min
  if ((*e1)->cmp_max_to_min(*e2) < 0)
  {
    if (!((*e1)=(*e1)->next))
      return 1;
    if ((*e1)->cmp_min_to_max(*e2) > 0)
    {
      (*e2)=(*e2)->next;
      return 1;
    }
  }
  return 0;
}


static SEL_ARG *
key_or(SEL_ARG *key1,SEL_ARG *key2)
{
  if (!key1)
  {
    if (key2)
    {
      key2->use_count--;
      key2->free_tree();
    }
    return 0;
  }
5732
  if (!key2)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5733 5734 5735 5736 5737 5738 5739 5740
  {
    key1->use_count--;
    key1->free_tree();
    return 0;
  }
  key1->use_count--;
  key2->use_count--;

5741 5742
  if (key1->part != key2->part || 
      (key1->min_flag | key2->min_flag) & GEOM_FLAG)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766
  {
    key1->free_tree();
    key2->free_tree();
    return 0;					// Can't optimize this
  }

  // If one of the key is MAYBE_KEY then the found region may be bigger
  if (key1->type == SEL_ARG::MAYBE_KEY)
  {
    key2->free_tree();
    key1->use_count++;
    return key1;
  }
  if (key2->type == SEL_ARG::MAYBE_KEY)
  {
    key1->free_tree();
    key2->use_count++;
    return key2;
  }

  if (key1->use_count > 0)
  {
    if (key2->use_count == 0 || key1->elements > key2->elements)
    {
5767
      swap_variables(SEL_ARG *,key1,key2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5768
    }
5769
    if (key1->use_count > 0 || !(key1=key1->clone_tree()))
5770
      return 0;					// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795
  }

  // Add tree at key2 to tree at key1
  bool key2_shared=key2->use_count != 0;
  key1->maybe_flag|=key2->maybe_flag;

  for (key2=key2->first(); key2; )
  {
    SEL_ARG *tmp=key1->find_range(key2);	// Find key1.min <= key2.min
    int cmp;

    if (!tmp)
    {
      tmp=key1->first();			// tmp.min > key2.min
      cmp= -1;
    }
    else if ((cmp=tmp->cmp_max_to_min(key2)) < 0)
    {						// Found tmp.max < key2.min
      SEL_ARG *next=tmp->next;
      if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
      {
	// Join near ranges like tmp.max < 0 and key2.min >= 0
	SEL_ARG *key2_next=key2->next;
	if (key2_shared)
	{
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5796
	  if (!(key2=new SEL_ARG(*key2)))
5797
	    return 0;		// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837
	  key2->increment_use_count(key1->use_count+1);
	  key2->next=key2_next;			// New copy of key2
	}
	key2->copy_min(tmp);
	if (!(key1=key1->tree_delete(tmp)))
	{					// Only one key in tree
	  key1=key2;
	  key1->make_root();
	  key2=key2_next;
	  break;
	}
      }
      if (!(tmp=next))				// tmp.min > key2.min
	break;					// Copy rest of key2
    }
    if (cmp < 0)
    {						// tmp.min > key2.min
      int tmp_cmp;
      if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max
      {
	if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
	{					// ranges are connected
	  tmp->copy_min_to_min(key2);
	  key1->merge_flags(key2);
	  if (tmp->min_flag & NO_MIN_RANGE &&
	      tmp->max_flag & NO_MAX_RANGE)
	  {
	    if (key1->maybe_flag)
	      return new SEL_ARG(SEL_ARG::MAYBE_KEY);
	    return 0;
	  }
	  key2->increment_use_count(-1);	// Free not used tree
	  key2=key2->next;
	  continue;
	}
	else
	{
	  SEL_ARG *next=key2->next;		// Keys are not overlapping
	  if (key2_shared)
	  {
5838 5839
	    SEL_ARG *cpy= new SEL_ARG(*key2);	// Must make copy
	    if (!cpy)
5840
	      return 0;				// OOM
5841
	    key1=key1->insert(cpy);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869
	    key2->increment_use_count(key1->use_count+1);
	  }
	  else
	    key1=key1->insert(key2);		// Will destroy key2_root
	  key2=next;
	  continue;
	}
      }
    }

    // tmp.max >= key2.min && tmp.min <= key.max  (overlapping ranges)
    if (eq_tree(tmp->next_key_part,key2->next_key_part))
    {
      if (tmp->is_same(key2))
      {
	tmp->merge_flags(key2);			// Copy maybe flags
	key2->increment_use_count(-1);		// Free not used tree
      }
      else
      {
	SEL_ARG *last=tmp;
	while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
	       eq_tree(last->next->next_key_part,key2->next_key_part))
	{
	  SEL_ARG *save=last;
	  last=last->next;
	  key1=key1->tree_delete(save);
	}
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5870
        last->copy_min(tmp);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887
	if (last->copy_min(key2) || last->copy_max(key2))
	{					// Full range
	  key1->free_tree();
	  for (; key2 ; key2=key2->next)
	    key2->increment_use_count(-1);	// Free not used tree
	  if (key1->maybe_flag)
	    return new SEL_ARG(SEL_ARG::MAYBE_KEY);
	  return 0;
	}
      }
      key2=key2->next;
      continue;
    }

    if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
    {						// tmp.min <= x < key2.min
      SEL_ARG *new_arg=tmp->clone_first(key2);
5888 5889
      if (!new_arg)
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902
      if ((new_arg->next_key_part= key1->next_key_part))
	new_arg->increment_use_count(key1->use_count+1);
      tmp->copy_min_to_min(key2);
      key1=key1->insert(new_arg);
    }

    // tmp.min >= key2.min && tmp.min <= key2.max
    SEL_ARG key(*key2);				// Get copy we can modify
    for (;;)
    {
      if (tmp->cmp_min_to_min(&key) > 0)
      {						// key.min <= x < tmp.min
	SEL_ARG *new_arg=key.clone_first(tmp);
5903 5904
	if (!new_arg)
	  return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918
	if ((new_arg->next_key_part=key.next_key_part))
	  new_arg->increment_use_count(key1->use_count+1);
	key1=key1->insert(new_arg);
      }
      if ((cmp=tmp->cmp_max_to_max(&key)) <= 0)
      {						// tmp.min. <= x <= tmp.max
	tmp->maybe_flag|= key.maybe_flag;
	key.increment_use_count(key1->use_count+1);
	tmp->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
	if (!cmp)				// Key2 is ready
	  break;
	key.copy_max_to_min(tmp);
	if (!(tmp=tmp->next))
	{
5919 5920 5921 5922
	  SEL_ARG *tmp2= new SEL_ARG(key);
	  if (!tmp2)
	    return 0;				// OOM
	  key1=key1->insert(tmp2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5923 5924 5925 5926 5927
	  key2=key2->next;
	  goto end;
	}
	if (tmp->cmp_min_to_max(&key) > 0)
	{
5928 5929 5930 5931
	  SEL_ARG *tmp2= new SEL_ARG(key);
	  if (!tmp2)
	    return 0;				// OOM
	  key1=key1->insert(tmp2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5932 5933 5934 5935 5936 5937
	  break;
	}
      }
      else
      {
	SEL_ARG *new_arg=tmp->clone_last(&key); // tmp.min <= x <= key.max
5938 5939
	if (!new_arg)
	  return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5940 5941
	tmp->copy_max_to_min(&key);
	tmp->increment_use_count(key1->use_count+1);
5942 5943
	/* Increment key count as it may be used for next loop */
	key.increment_use_count(1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957
	new_arg->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
	key1=key1->insert(new_arg);
	break;
      }
    }
    key2=key2->next;
  }

end:
  while (key2)
  {
    SEL_ARG *next=key2->next;
    if (key2_shared)
    {
5958 5959 5960
      SEL_ARG *tmp=new SEL_ARG(*key2);		// Must make copy
      if (!tmp)
	return 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5961
      key2->increment_use_count(key1->use_count+1);
5962
      key1=key1->insert(tmp);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009
    }
    else
      key1=key1->insert(key2);			// Will destroy key2_root
    key2=next;
  }
  key1->use_count++;
  return key1;
}


/* Compare if two trees are equal */

static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
{
  if (a == b)
    return 1;
  if (!a || !b || !a->is_same(b))
    return 0;
  if (a->left != &null_element && b->left != &null_element)
  {
    if (!eq_tree(a->left,b->left))
      return 0;
  }
  else if (a->left != &null_element || b->left != &null_element)
    return 0;
  if (a->right != &null_element && b->right != &null_element)
  {
    if (!eq_tree(a->right,b->right))
      return 0;
  }
  else if (a->right != &null_element || b->right != &null_element)
    return 0;
  if (a->next_key_part != b->next_key_part)
  {						// Sub range
    if (!a->next_key_part != !b->next_key_part ||
	!eq_tree(a->next_key_part, b->next_key_part))
      return 0;
  }
  return 1;
}


SEL_ARG *
SEL_ARG::insert(SEL_ARG *key)
{
  SEL_ARG *element,**par,*last_element;
  LINT_INIT(par); LINT_INIT(last_element);
6010

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077
  for (element= this; element != &null_element ; )
  {
    last_element=element;
    if (key->cmp_min_to_min(element) > 0)
    {
      par= &element->right; element= element->right;
    }
    else
    {
      par = &element->left; element= element->left;
    }
  }
  *par=key;
  key->parent=last_element;
	/* Link in list */
  if (par == &last_element->left)
  {
    key->next=last_element;
    if ((key->prev=last_element->prev))
      key->prev->next=key;
    last_element->prev=key;
  }
  else
  {
    if ((key->next=last_element->next))
      key->next->prev=key;
    key->prev=last_element;
    last_element->next=key;
  }
  key->left=key->right= &null_element;
  SEL_ARG *root=rb_insert(key);			// rebalance tree
  root->use_count=this->use_count;		// copy root info
  root->elements= this->elements+1;
  root->maybe_flag=this->maybe_flag;
  return root;
}


/*
** Find best key with min <= given key
** Because the call context this should never return 0 to get_range
*/

SEL_ARG *
SEL_ARG::find_range(SEL_ARG *key)
{
  SEL_ARG *element=this,*found=0;

  for (;;)
  {
    if (element == &null_element)
      return found;
    int cmp=element->cmp_min_to_min(key);
    if (cmp == 0)
      return element;
    if (cmp < 0)
    {
      found=element;
      element=element->right;
    }
    else
      element=element->left;
  }
}


/*
6078 6079 6080 6081 6082
  Remove a element from the tree

  SYNOPSIS
    tree_delete()
    key		Key that is to be deleted from tree (this)
6083

6084 6085 6086 6087 6088
  NOTE
    This also frees all sub trees that is used by the element

  RETURN
    root of new tree (with key deleted)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6089 6090 6091 6092 6093 6094 6095
*/

SEL_ARG *
SEL_ARG::tree_delete(SEL_ARG *key)
{
  enum leaf_color remove_color;
  SEL_ARG *root,*nod,**par,*fix_par;
6096 6097 6098 6099
  DBUG_ENTER("tree_delete");

  root=this;
  this->parent= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145

  /* Unlink from list */
  if (key->prev)
    key->prev->next=key->next;
  if (key->next)
    key->next->prev=key->prev;
  key->increment_use_count(-1);
  if (!key->parent)
    par= &root;
  else
    par=key->parent_ptr();

  if (key->left == &null_element)
  {
    *par=nod=key->right;
    fix_par=key->parent;
    if (nod != &null_element)
      nod->parent=fix_par;
    remove_color= key->color;
  }
  else if (key->right == &null_element)
  {
    *par= nod=key->left;
    nod->parent=fix_par=key->parent;
    remove_color= key->color;
  }
  else
  {
    SEL_ARG *tmp=key->next;			// next bigger key (exist!)
    nod= *tmp->parent_ptr()= tmp->right;	// unlink tmp from tree
    fix_par=tmp->parent;
    if (nod != &null_element)
      nod->parent=fix_par;
    remove_color= tmp->color;

    tmp->parent=key->parent;			// Move node in place of key
    (tmp->left=key->left)->parent=tmp;
    if ((tmp->right=key->right) != &null_element)
      tmp->right->parent=tmp;
    tmp->color=key->color;
    *par=tmp;
    if (fix_par == key)				// key->right == key->next
      fix_par=tmp;				// new parent of nod
  }

  if (root == &null_element)
6146
    DBUG_RETURN(0);				// Maybe root later
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6147 6148 6149 6150 6151 6152 6153
  if (remove_color == BLACK)
    root=rb_delete_fixup(root,nod,fix_par);
  test_rb_tree(root,root->parent);

  root->use_count=this->use_count;		// Fix root counters
  root->elements=this->elements-1;
  root->maybe_flag=this->maybe_flag;
6154
  DBUG_RETURN(root);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329
}


	/* Functions to fix up the tree after insert and delete */

static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
{
  SEL_ARG *y=leaf->right;
  leaf->right=y->left;
  if (y->left != &null_element)
    y->left->parent=leaf;
  if (!(y->parent=leaf->parent))
    *root=y;
  else
    *leaf->parent_ptr()=y;
  y->left=leaf;
  leaf->parent=y;
}

static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
{
  SEL_ARG *y=leaf->left;
  leaf->left=y->right;
  if (y->right != &null_element)
    y->right->parent=leaf;
  if (!(y->parent=leaf->parent))
    *root=y;
  else
    *leaf->parent_ptr()=y;
  y->right=leaf;
  leaf->parent=y;
}


SEL_ARG *
SEL_ARG::rb_insert(SEL_ARG *leaf)
{
  SEL_ARG *y,*par,*par2,*root;
  root= this; root->parent= 0;

  leaf->color=RED;
  while (leaf != root && (par= leaf->parent)->color == RED)
  {					// This can't be root or 1 level under
    if (par == (par2= leaf->parent->parent)->left)
    {
      y= par2->right;
      if (y->color == RED)
      {
	par->color=BLACK;
	y->color=BLACK;
	leaf=par2;
	leaf->color=RED;		/* And the loop continues */
      }
      else
      {
	if (leaf == par->right)
	{
	  left_rotate(&root,leaf->parent);
	  par=leaf;			/* leaf is now parent to old leaf */
	}
	par->color=BLACK;
	par2->color=RED;
	right_rotate(&root,par2);
	break;
      }
    }
    else
    {
      y= par2->left;
      if (y->color == RED)
      {
	par->color=BLACK;
	y->color=BLACK;
	leaf=par2;
	leaf->color=RED;		/* And the loop continues */
      }
      else
      {
	if (leaf == par->left)
	{
	  right_rotate(&root,par);
	  par=leaf;
	}
	par->color=BLACK;
	par2->color=RED;
	left_rotate(&root,par2);
	break;
      }
    }
  }
  root->color=BLACK;
  test_rb_tree(root,root->parent);
  return root;
}


SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
{
  SEL_ARG *x,*w;
  root->parent=0;

  x= key;
  while (x != root && x->color == SEL_ARG::BLACK)
  {
    if (x == par->left)
    {
      w=par->right;
      if (w->color == SEL_ARG::RED)
      {
	w->color=SEL_ARG::BLACK;
	par->color=SEL_ARG::RED;
	left_rotate(&root,par);
	w=par->right;
      }
      if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
      {
	w->color=SEL_ARG::RED;
	x=par;
      }
      else
      {
	if (w->right->color == SEL_ARG::BLACK)
	{
	  w->left->color=SEL_ARG::BLACK;
	  w->color=SEL_ARG::RED;
	  right_rotate(&root,w);
	  w=par->right;
	}
	w->color=par->color;
	par->color=SEL_ARG::BLACK;
	w->right->color=SEL_ARG::BLACK;
	left_rotate(&root,par);
	x=root;
	break;
      }
    }
    else
    {
      w=par->left;
      if (w->color == SEL_ARG::RED)
      {
	w->color=SEL_ARG::BLACK;
	par->color=SEL_ARG::RED;
	right_rotate(&root,par);
	w=par->left;
      }
      if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
      {
	w->color=SEL_ARG::RED;
	x=par;
      }
      else
      {
	if (w->left->color == SEL_ARG::BLACK)
	{
	  w->right->color=SEL_ARG::BLACK;
	  w->color=SEL_ARG::RED;
	  left_rotate(&root,w);
	  w=par->left;
	}
	w->color=par->color;
	par->color=SEL_ARG::BLACK;
	w->left->color=SEL_ARG::BLACK;
	right_rotate(&root,par);
	x=root;
	break;
      }
    }
    par=x->parent;
  }
  x->color=SEL_ARG::BLACK;
  return root;
}


6330
	/* Test that the properties for a red-black tree hold */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386

#ifdef EXTRA_DEBUG
int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
{
  int count_l,count_r;

  if (element == &null_element)
    return 0;					// Found end of tree
  if (element->parent != parent)
  {
    sql_print_error("Wrong tree: Parent doesn't point at parent");
    return -1;
  }
  if (element->color == SEL_ARG::RED &&
      (element->left->color == SEL_ARG::RED ||
       element->right->color == SEL_ARG::RED))
  {
    sql_print_error("Wrong tree: Found two red in a row");
    return -1;
  }
  if (element->left == element->right && element->left != &null_element)
  {						// Dummy test
    sql_print_error("Wrong tree: Found right == left");
    return -1;
  }
  count_l=test_rb_tree(element->left,element);
  count_r=test_rb_tree(element->right,element);
  if (count_l >= 0 && count_r >= 0)
  {
    if (count_l == count_r)
      return count_l+(element->color == SEL_ARG::BLACK);
    sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
	    count_l,count_r);
  }
  return -1;					// Error, no more warnings
}

static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
{
  ulong count= 0;
  for (root=root->first(); root ; root=root->next)
  {
    if (root->next_key_part)
    {
      if (root->next_key_part == key)
	count++;
      if (root->next_key_part->part < key->part)
	count+=count_key_part_usage(root->next_key_part,key);
    }
  }
  return count;
}


void SEL_ARG::test_use_count(SEL_ARG *root)
{
6387
  uint e_count=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6388 6389
  if (this == root && use_count != 1)
  {
monty@mysql.com's avatar
monty@mysql.com committed
6390
    sql_print_information("Use_count: Wrong count %lu for root",use_count);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402
    return;
  }
  if (this->type != SEL_ARG::KEY_RANGE)
    return;
  for (SEL_ARG *pos=first(); pos ; pos=pos->next)
  {
    e_count++;
    if (pos->next_key_part)
    {
      ulong count=count_key_part_usage(root,pos->next_key_part);
      if (count > pos->next_key_part->use_count)
      {
monty@mysql.com's avatar
monty@mysql.com committed
6403
	sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu should be %lu",
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6404 6405 6406 6407 6408 6409 6410
			pos,pos->next_key_part->use_count,count);
	return;
      }
      pos->next_key_part->test_use_count(root);
    }
  }
  if (e_count != elements)
monty@mysql.com's avatar
monty@mysql.com committed
6411
    sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
6412
		    e_count, elements, (gptr) this);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6413 6414 6415 6416 6417
}

#endif


6418 6419 6420 6421 6422 6423 6424 6425 6426 6427
/*
  Calculate estimate of number records that will be retrieved by a range
  scan on given index using given SEL_ARG intervals tree.
  SYNOPSIS
    check_quick_select
      param  Parameter from test_quick_select
      idx    Number of index to use in PARAM::key SEL_TREE::key
      tree   Transformed selection condition, tree->key[idx] holds intervals
             tree to be used for scanning.
  NOTES
6428
    param->is_ror_scan is set to reflect if the key scan is a ROR (see
6429
    is_key_scan_ror function for more info)
6430
    param->table->quick_*, param->range_count (and maybe others) are
6431
    updated with data of given key scan, see check_quick_keys for details.
6432 6433

  RETURN
6434
    Estimate # of records to be retrieved.
6435
    HA_POS_ERROR if estimate calculation failed due to table handler problems.
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6436

6437
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6438 6439 6440 6441 6442

static ha_rows
check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
{
  ha_rows records;
6443 6444
  bool    cpk_scan;
  uint key;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6445
  DBUG_ENTER("check_quick_select");
6446

monty@mysql.com's avatar
monty@mysql.com committed
6447
  param->is_ror_scan= FALSE;
6448

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6449 6450
  if (!tree)
    DBUG_RETURN(HA_POS_ERROR);			// Can't use it
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
6451 6452
  param->max_key_part=0;
  param->range_count=0;
6453 6454
  key= param->real_keynr[idx];

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6455 6456 6457 6458
  if (tree->type == SEL_ARG::IMPOSSIBLE)
    DBUG_RETURN(0L);				// Impossible select. return
  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
6459 6460 6461 6462 6463

  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
  {
    /* Records are not ordered by rowid for other types of indexes. */
monty@mysql.com's avatar
monty@mysql.com committed
6464
    cpk_scan= FALSE;
6465 6466 6467 6468 6469 6470 6471
  }
  else
  {
    /*
      Clustered PK scan is a special case, check_quick_keys doesn't recognize
      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
    */
6472 6473
    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
               param->table->file->primary_key_is_clustered());
6474
    param->is_ror_scan= !cpk_scan;
6475 6476
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6477 6478
  records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
  if (records != HA_POS_ERROR)
6479
  {
6480
    param->table->quick_keys.set_bit(key);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6481 6482
    param->table->quick_rows[key]=records;
    param->table->quick_key_parts[key]=param->max_key_part+1;
6483

6484
    if (cpk_scan)
monty@mysql.com's avatar
monty@mysql.com committed
6485
      param->is_ror_scan= TRUE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6486
  }
6487 6488
  if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
    param->is_ror_scan= FALSE;
6489
  DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6490 6491 6492 6493
  DBUG_RETURN(records);
}


6494
/*
6495 6496
  Recursively calculate estimate of # rows that will be retrieved by
  key scan on key idx.
6497 6498
  SYNOPSIS
    check_quick_keys()
6499
      param         Parameter from test_quick select function.
6500
      idx           Number of key to use in PARAM::keys in list of used keys
6501 6502 6503
                    (param->real_keynr[idx] holds the key number in table)
      key_tree      SEL_ARG tree being examined.
      min_key       Buffer with partial min key value tuple
6504
      min_key_flag
6505
      max_key       Buffer with partial max key value tuple
6506 6507
      max_key_flag

6508
  NOTES
6509 6510
    The function does the recursive descent on the tree via SEL_ARG::left,
    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
6511 6512
    are calculated using records_in_range calls at the leaf nodes and then
    summed.
6513

6514 6515
    param->min_key and param->max_key are used to hold prefixes of key value
    tuples.
6516 6517

    The side effects are:
6518

6519 6520
    param->max_key_part is updated to hold the maximum number of key parts used
      in scan minus 1.
6521 6522

    param->range_count is incremented if the function finds a range that
6523
      wasn't counted by the caller.
6524

6525 6526 6527
    param->is_ror_scan is cleared if the function detects that the key scan is
      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
      function for description of which key scans are ROR scans)
6528 6529
*/

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6530 6531 6532 6533 6534
static ha_rows
check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
		 char *min_key,uint min_key_flag, char *max_key,
		 uint max_key_flag)
{
monty@mysql.com's avatar
monty@mysql.com committed
6535 6536 6537
  ha_rows records=0, tmp;
  uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
  char *tmp_min_key, *tmp_max_key;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6538 6539 6540 6541

  param->max_key_part=max(param->max_key_part,key_tree->part);
  if (key_tree->left != &null_element)
  {
6542 6543 6544 6545 6546 6547
    /*
      There are at least two intervals for current key part, i.e. condition
      was converted to something like
        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
      This is not a ROR scan if the key is not Clustered Primary Key.
    */
monty@mysql.com's avatar
monty@mysql.com committed
6548
    param->is_ror_scan= FALSE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6549 6550 6551 6552 6553 6554
    records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
			     max_key,max_key_flag);
    if (records == HA_POS_ERROR)			// Impossible
      return records;
  }

monty@mysql.com's avatar
monty@mysql.com committed
6555 6556
  tmp_min_key= min_key;
  tmp_max_key= max_key;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6557
  key_tree->store(param->key[idx][key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6558
		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
monty@mysql.com's avatar
monty@mysql.com committed
6559 6560
  min_key_length= (uint) (tmp_min_key- param->min_key);
  max_key_length= (uint) (tmp_max_key- param->max_key);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6561

6562 6563
  if (param->is_ror_scan)
  {
6564
    /*
6565
      If the index doesn't cover entire key, mark the scan as non-ROR scan.
6566
      Actually we're cutting off some ROR scans here.
6567 6568 6569
    */
    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
                    key_part[key_tree->part].fieldnr - 1;
6570
    if (param->table->field[fieldnr]->key_length() !=
6571
        param->key[idx][key_tree->part].length)
monty@mysql.com's avatar
monty@mysql.com committed
6572
      param->is_ror_scan= FALSE;
6573 6574
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587
  if (key_tree->next_key_part &&
      key_tree->next_key_part->part == key_tree->part+1 &&
      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
  {						// const key as prefix
    if (min_key_length == max_key_length &&
	!memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
	!key_tree->min_flag && !key_tree->max_flag)
    {
      tmp=check_quick_keys(param,idx,key_tree->next_key_part,
			   tmp_min_key, min_key_flag | key_tree->min_flag,
			   tmp_max_key, max_key_flag | key_tree->max_flag);
      goto end;					// Ugly, but efficient
    }
6588
    else
6589 6590
    {
      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
monty@mysql.com's avatar
monty@mysql.com committed
6591
      param->is_ror_scan= FALSE;
6592
    }
6593

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611
    tmp_min_flag=key_tree->min_flag;
    tmp_max_flag=key_tree->max_flag;
    if (!tmp_min_flag)
      key_tree->next_key_part->store_min_key(param->key[idx], &tmp_min_key,
					     &tmp_min_flag);
    if (!tmp_max_flag)
      key_tree->next_key_part->store_max_key(param->key[idx], &tmp_max_key,
					     &tmp_max_flag);
    min_key_length= (uint) (tmp_min_key- param->min_key);
    max_key_length= (uint) (tmp_max_key- param->max_key);
  }
  else
  {
    tmp_min_flag=min_key_flag | key_tree->min_flag;
    tmp_max_flag=max_key_flag | key_tree->max_flag;
  }

  keynr=param->real_keynr[idx];
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
6612
  param->range_count++;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6613 6614
  if (!tmp_min_flag && ! tmp_max_flag &&
      (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
6615 6616
      (param->table->key_info[keynr].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
      HA_NOSAME &&
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6617 6618 6619 6620
      min_key_length == max_key_length &&
      !memcmp(param->min_key,param->max_key,min_key_length))
    tmp=1;					// Max one record
  else
6621
  {
6622 6623
    if (param->is_ror_scan)
    {
6624 6625 6626 6627 6628 6629 6630 6631 6632
      /*
        If we get here, the condition on the key was converted to form
        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
          somecond(keyXpart{key_tree->part})"
        Check if
          somecond is "keyXpart{key_tree->part} = const" and
          uncovered "tail" of KeyX parts is either empty or is identical to
          first members of clustered primary key.
      */
6633 6634
      if (!(min_key_length == max_key_length &&
            !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
6635
            !key_tree->min_flag && !key_tree->max_flag &&
6636
            is_key_scan_ror(param, keynr, key_tree->part + 1)))
monty@mysql.com's avatar
monty@mysql.com committed
6637
        param->is_ror_scan= FALSE;
6638 6639
    }

6640
    if (tmp_min_flag & GEOM_FLAG)
6641
    {
6642 6643 6644 6645 6646 6647 6648 6649
      key_range min_range;
      min_range.key=    (byte*) param->min_key;
      min_range.length= min_key_length;
      /* In this case tmp_min_flag contains the handler-read-function */
      min_range.flag=   (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);

      tmp= param->table->file->records_in_range(keynr, &min_range,
                                                (key_range*) 0);
6650 6651 6652
    }
    else
    {
6653 6654 6655 6656 6657 6658
      key_range min_range, max_range;

      min_range.key=    (byte*) param->min_key;
      min_range.length= min_key_length;
      min_range.flag=   (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
                         HA_READ_KEY_EXACT);
monty@mysql.com's avatar
monty@mysql.com committed
6659
      max_range.key=    (byte*) param->max_key;
6660 6661 6662 6663 6664 6665 6666 6667
      max_range.length= max_key_length;
      max_range.flag=   (tmp_max_flag & NEAR_MAX ?
                         HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
      tmp=param->table->file->records_in_range(keynr,
                                               (min_key_length ? &min_range :
                                                (key_range*) 0),
                                               (max_key_length ? &max_range :
                                                (key_range*) 0));
6668 6669
    }
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6670 6671 6672 6673 6674 6675
 end:
  if (tmp == HA_POS_ERROR)			// Impossible range
    return tmp;
  records+=tmp;
  if (key_tree->right != &null_element)
  {
6676 6677 6678 6679 6680 6681
    /*
      There are at least two intervals for current key part, i.e. condition
      was converted to something like
        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
      This is not a ROR scan if the key is not Clustered Primary Key.
    */
monty@mysql.com's avatar
monty@mysql.com committed
6682
    param->is_ror_scan= FALSE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6683 6684 6685 6686 6687 6688 6689 6690 6691
    tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
			 max_key,max_key_flag);
    if (tmp == HA_POS_ERROR)
      return tmp;
    records+=tmp;
  }
  return records;
}

6692

6693
/*
6694
  Check if key scan on given index with equality conditions on first n key
6695 6696 6697 6698
  parts is a ROR scan.

  SYNOPSIS
    is_key_scan_ror()
6699
      param  Parameter from test_quick_select
6700 6701 6702 6703
      keynr  Number of key in the table. The key must not be a clustered
             primary key.
      nparts Number of first key parts for which equality conditions
             are present.
6704

6705 6706 6707
  NOTES
    ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
    ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
6708

6709 6710 6711
    An index scan is a ROR scan if it is done using a condition in form

        "key1_1=c_1 AND ... AND key1_n=c_n"  (1)
6712

6713 6714
    where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])

6715
    and the table has a clustered Primary Key
6716

6717
    PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being
6718
    identical to uncovered parts ot the key being scanned (2)
6719 6720

    Scans on HASH indexes are not ROR scans,
6721 6722 6723 6724 6725 6726
    any range scan on clustered primary key is ROR scan  (3)

    Check (1) is made in check_quick_keys()
    Check (3) is made check_quick_select()
    Check (2) is made by this function.

6727
  RETURN
monty@mysql.com's avatar
monty@mysql.com committed
6728 6729
    TRUE  If the scan is ROR-scan
    FALSE otherwise
6730
*/
6731

6732 6733 6734 6735
static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
{
  KEY *table_key= param->table->key_info + keynr;
  KEY_PART_INFO *key_part= table_key->key_part + nparts;
6736 6737 6738
  KEY_PART_INFO *key_part_end= (table_key->key_part +
                                table_key->key_parts);
  uint pk_number;
6739

6740
  if (key_part == key_part_end)
monty@mysql.com's avatar
monty@mysql.com committed
6741
    return TRUE;
6742
  pk_number= param->table->s->primary_key;
6743
  if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
monty@mysql.com's avatar
monty@mysql.com committed
6744
    return FALSE;
6745 6746

  KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
6747
  KEY_PART_INFO *pk_part_end= pk_part +
6748
                              param->table->key_info[pk_number].key_parts;
6749 6750
  for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
       ++key_part, ++pk_part)
6751
  {
6752
    if ((key_part->field != pk_part->field) ||
6753
        (key_part->length != pk_part->length))
6754
      return FALSE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6755
  }
6756
  return (key_part == key_part_end);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6757 6758 6759
}


6760 6761
/*
  Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
6762

6763 6764
  SYNOPSIS
    get_quick_select()
6765
      param
6766
      idx          Index of used key in param->key.
6767 6768
      key_tree     SEL_ARG tree for the used key
      parent_alloc If not NULL, use it to allocate memory for
6769
                   quick select data. Otherwise use quick->alloc.
6770
  NOTES
6771
    The caller must call QUICK_SELECT::init for returned quick select
6772

6773
    CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
6774
    deallocated when the returned quick select is deleted.
6775 6776 6777 6778

  RETURN
    NULL on error
    otherwise created quick select
6779
*/
6780

6781 6782 6783
QUICK_RANGE_SELECT *
get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
                 MEM_ROOT *parent_alloc)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6784
{
6785
  QUICK_RANGE_SELECT *quick;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6786
  DBUG_ENTER("get_quick_select");
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6787 6788 6789 6790 6791 6792 6793 6794 6795

  if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
    quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
                                      param->real_keynr[idx],
                                      test(parent_alloc),
                                      parent_alloc);
  else
    quick=new QUICK_RANGE_SELECT(param->thd, param->table,
                                 param->real_keynr[idx],
monty@mysql.com's avatar
monty@mysql.com committed
6796
                                 test(parent_alloc));
6797

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6798
  if (quick)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809
  {
    if (quick->error ||
	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
		       param->max_key,0))
    {
      delete quick;
      quick=0;
    }
    else
    {
      quick->key_parts=(KEY_PART*)
6810 6811 6812 6813
        memdup_root(parent_alloc? parent_alloc : &quick->alloc,
                    (char*) param->key[idx],
                    sizeof(KEY_PART)*
                    param->table->key_info[param->real_keynr[idx]].key_parts);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6814
    }
6815
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6816 6817 6818 6819 6820 6821 6822
  DBUG_RETURN(quick);
}


/*
** Fix this to get all possible sub_ranges
*/
6823 6824
bool
get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837
	       SEL_ARG *key_tree,char *min_key,uint min_key_flag,
	       char *max_key, uint max_key_flag)
{
  QUICK_RANGE *range;
  uint flag;

  if (key_tree->left != &null_element)
  {
    if (get_quick_keys(param,quick,key,key_tree->left,
		       min_key,min_key_flag, max_key, max_key_flag))
      return 1;
  }
  char *tmp_min_key=min_key,*tmp_max_key=max_key;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6838
  key_tree->store(key[key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866
		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);

  if (key_tree->next_key_part &&
      key_tree->next_key_part->part == key_tree->part+1 &&
      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
  {						  // const key as prefix
    if (!((tmp_min_key - min_key) != (tmp_max_key - max_key) ||
	  memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) ||
	  key_tree->min_flag || key_tree->max_flag))
    {
      if (get_quick_keys(param,quick,key,key_tree->next_key_part,
			 tmp_min_key, min_key_flag | key_tree->min_flag,
			 tmp_max_key, max_key_flag | key_tree->max_flag))
	return 1;
      goto end;					// Ugly, but efficient
    }
    {
      uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
      if (!tmp_min_flag)
	key_tree->next_key_part->store_min_key(key, &tmp_min_key,
					       &tmp_min_flag);
      if (!tmp_max_flag)
	key_tree->next_key_part->store_max_key(key, &tmp_max_key,
					       &tmp_max_flag);
      flag=tmp_min_flag | tmp_max_flag;
    }
  }
  else
6867 6868 6869 6870
  {
    flag = (key_tree->min_flag & GEOM_FLAG) ?
      key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6871

6872 6873 6874 6875 6876
  /*
    Ensure that some part of min_key and max_key are used.  If not,
    regard this as no lower/upper range
  */
  if ((flag & GEOM_FLAG) == 0)
6877 6878 6879 6880 6881 6882 6883 6884 6885 6886
  {
    if (tmp_min_key != param->min_key)
      flag&= ~NO_MIN_RANGE;
    else
      flag|= NO_MIN_RANGE;
    if (tmp_max_key != param->max_key)
      flag&= ~NO_MAX_RANGE;
    else
      flag|= NO_MAX_RANGE;
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6887 6888 6889 6890 6891 6892 6893 6894
  if (flag == 0)
  {
    uint length= (uint) (tmp_min_key - param->min_key);
    if (length == (uint) (tmp_max_key - param->max_key) &&
	!memcmp(param->min_key,param->max_key,length))
    {
      KEY *table_key=quick->head->key_info+quick->index;
      flag=EQ_RANGE;
6895 6896
      if ((table_key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME &&
	  key->part == table_key->key_parts-1)
6897 6898 6899 6900 6901 6902 6903 6904 6905
      {
	if (!(table_key->flags & HA_NULL_PART_KEY) ||
	    !null_part_in_key(key,
			      param->min_key,
			      (uint) (tmp_min_key - param->min_key)))
	  flag|= UNIQUE_RANGE;
	else
	  flag|= NULL_RANGE;
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6906 6907 6908 6909
    }
  }

  /* Get range for retrieving rows in QUICK_SELECT::get_next */
6910
  if (!(range= new QUICK_RANGE((const char *) param->min_key,
6911
			       (uint) (tmp_min_key - param->min_key),
6912
			       (const char *) param->max_key,
6913 6914
			       (uint) (tmp_max_key - param->max_key),
			       flag)))
6915 6916
    return 1;			// out of memory

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6917 6918
  set_if_bigger(quick->max_used_key_length,range->min_length);
  set_if_bigger(quick->max_used_key_length,range->max_length);
6919
  set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
6920 6921 6922
  if (insert_dynamic(&quick->ranges, (gptr)&range))
    return 1;

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934
 end:
  if (key_tree->right != &null_element)
    return get_quick_keys(param,quick,key,key_tree->right,
			  min_key,min_key_flag,
			  max_key,max_key_flag);
  return 0;
}

/*
  Return 1 if there is only one range and this uses the whole primary key
*/

6935
bool QUICK_RANGE_SELECT::unique_key_range()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6936 6937 6938
{
  if (ranges.elements == 1)
  {
6939 6940
    QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
    if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6941 6942
    {
      KEY *key=head->key_info+index;
6943
      return ((key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME &&
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6944 6945 6946 6947 6948 6949
	      key->key_length == tmp->min_length);
    }
  }
  return 0;
}

6950

monty@mysql.com's avatar
monty@mysql.com committed
6951
/* Returns TRUE if any part of the key is NULL */
6952 6953 6954

static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
{
6955
  for (const char *end=key+length ;
6956
       key < end;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6957
       key+= key_part++->store_length)
6958
  {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6959 6960
    if (key_part->null_bit && *key)
      return 1;
6961 6962 6963 6964
  }
  return 0;
}

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6965

6966 6967
bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields)
{
6968
  return check_if_key_used(head, index, *fields);
6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006
}

bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields)
{
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
    if (check_if_key_used(head, quick->index, *fields))
      return 1;
  }
  return 0;
}

bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields)
{
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
    if (check_if_key_used(head, quick->index, *fields))
      return 1;
  }
  return 0;
}

bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields)
{
  QUICK_SELECT_I *quick;
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  while ((quick= it++))
  {
    if (quick->check_if_keys_used(fields))
      return 1;
  }
  return 0;
}

monty@mysql.com's avatar
monty@mysql.com committed
7007

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7008 7009
/*
  Create quick select from ref/ref_or_null scan.
7010

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025
  SYNOPSIS
    get_quick_select_for_ref()
      thd      Thread handle
      table    Table to access
      ref      ref[_or_null] scan parameters
      records  Estimate of number of records (needed only to construct 
               quick select)
  NOTES
    This allocates things in a new memory root, as this may be called many
    times during a query.
  
  RETURN 
    Quick select that retrieves the same rows as passed ref scan
    NULL on error.
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7026

7027
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7028
                                             TABLE_REF *ref, ha_rows records)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7029
{
7030 7031
  MEM_ROOT *old_root, *alloc;
  QUICK_RANGE_SELECT *quick;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7032 7033
  KEY *key_info = &table->key_info[ref->key];
  KEY_PART *key_part;
serg@serg.mylan's avatar
serg@serg.mylan committed
7034
  QUICK_RANGE *range;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7035
  uint part;
7036 7037 7038 7039 7040 7041

  old_root= thd->mem_root;
  /* The following call may change thd->mem_root */
  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0);
  /* save mem_root set by QUICK_RANGE_SELECT constructor */
  alloc= thd->mem_root;
7042 7043 7044 7045 7046
  /*
    return back default mem_root (thd->mem_root) changed by
    QUICK_RANGE_SELECT constructor
  */
  thd->mem_root= old_root;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7047 7048

  if (!quick)
7049
    return 0;			/* no ranges found */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7050
  if (quick->init())
7051 7052
  {
    delete quick;
monty@mysql.com's avatar
monty@mysql.com committed
7053
    goto err;
7054
  }
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7055
  quick->records= records;
7056

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7057
  if (cp_buffer_from_ref(thd,ref) && thd->is_fatal_error ||
7058
      !(range= new(alloc) QUICK_RANGE()))
monty@mysql.com's avatar
monty@mysql.com committed
7059
    goto err;                                   // out of memory
7060

bk@work.mysql.com's avatar
bk@work.mysql.com committed
7061 7062 7063
  range->min_key=range->max_key=(char*) ref->key_buff;
  range->min_length=range->max_length=ref->key_length;
  range->flag= ((ref->key_length == key_info->key_length &&
7064 7065
		 (key_info->flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
		 HA_NOSAME) ? EQ_RANGE : 0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7066 7067

  if (!(quick->key_parts=key_part=(KEY_PART *)
7068
	alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7069 7070 7071 7072 7073 7074
    goto err;

  for (part=0 ; part < ref->key_parts ;part++,key_part++)
  {
    key_part->part=part;
    key_part->field=        key_info->key_part[part].field;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7075 7076
    key_part->length=  	    key_info->key_part[part].length;
    key_part->store_length= key_info->key_part[part].store_length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7077 7078
    key_part->null_bit=     key_info->key_part[part].null_bit;
  }
pem@mysql.com's avatar
pem@mysql.com committed
7079
  if (insert_dynamic(&quick->ranges,(gptr)&range))
7080 7081
    goto err;

7082
  /*
7083 7084 7085 7086 7087
     Add a NULL range if REF_OR_NULL optimization is used.
     For example:
       if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
       and have ref->null_ref_key set. Will create a new NULL range here.
  */
7088 7089 7090 7091 7092
  if (ref->null_ref_key)
  {
    QUICK_RANGE *null_range;

    *ref->null_ref_key= 1;		// Set null byte then create a range
7093 7094 7095 7096 7097
    if (!(null_range= new (alloc) QUICK_RANGE((char*)ref->key_buff,
                                              ref->key_length,
                                              (char*)ref->key_buff,
                                              ref->key_length,
                                              EQ_RANGE)))
7098 7099
      goto err;
    *ref->null_ref_key= 0;		// Clear null byte
pem@mysql.com's avatar
pem@mysql.com committed
7100
    if (insert_dynamic(&quick->ranges,(gptr)&null_range))
7101 7102 7103 7104
      goto err;
  }

  return quick;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7105 7106 7107 7108 7109 7110

err:
  delete quick;
  return 0;
}

7111 7112

/*
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7113 7114 7115 7116 7117 7118
  Perform key scans for all used indexes (except CPK), get rowids and merge 
  them into an ordered non-recurrent sequence of rowids.
  
  The merge/duplicate removal is performed using Unique class. We put all
  rowids into Unique, get the sorted sequence and destroy the Unique.
  
7119
  If table has a clustered primary key that covers all rows (TRUE for bdb
7120
     and innodb currently) and one of the index_merge scans is a scan on PK,
7121
  then
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7122 7123
    rows that will be retrieved by PK scan are not put into Unique and 
    primary key scan is not performed here, it is performed later separately.
7124

7125 7126 7127
  RETURN
    0     OK
    other error
7128
*/
7129

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7130
int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
7131
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7132 7133
  List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
  QUICK_RANGE_SELECT* cur_quick;
7134
  int result;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7135
  Unique *unique;
7136
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::prepare_unique");
7137

7138
  /* We're going to just read rowids. */
7139 7140
  if (head->file->extra(HA_EXTRA_KEYREAD))
    DBUG_RETURN(1);
7141

7142 7143
  /*
    Make innodb retrieve all PK member fields, so
7144
     * ha_innobase::position (which uses them) call works.
7145
     * We can filter out rows that will be retrieved by clustered PK.
7146
    (This also creates a deficiency - it is possible that we will retrieve
7147
     parts of key that are not used by current query at all.)
7148
  */
7149
  if (head->file->ha_retrieve_all_pk())
7150
    DBUG_RETURN(1);
7151

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7152 7153
  cur_quick_it.rewind();
  cur_quick= cur_quick_it++;
7154
  DBUG_ASSERT(cur_quick != 0);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7155 7156 7157 7158 7159
  
  /*
    We reuse the same instance of handler so we need to call both init and 
    reset here.
  */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7160
  if (cur_quick->init() || cur_quick->reset())
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7161
    DBUG_RETURN(1);
7162

7163
  unique= new Unique(refpos_order_cmp, (void *)head->file,
7164
                     head->file->ref_length,
7165
                     thd->variables.sortbuff_size);
7166 7167
  if (!unique)
    DBUG_RETURN(1);
monty@mysql.com's avatar
monty@mysql.com committed
7168
  for (;;)
7169
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7170
    while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
7171
    {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7172 7173 7174
      cur_quick->range_end();
      cur_quick= cur_quick_it++;
      if (!cur_quick)
7175
        break;
7176

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7177 7178
      if (cur_quick->file->inited != handler::NONE) 
        cur_quick->file->ha_index_end();
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7179
      if (cur_quick->init() || cur_quick->reset())
7180
        DBUG_RETURN(1);
7181 7182 7183
    }

    if (result)
7184
    {
7185
      if (result != HA_ERR_END_OF_FILE)
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7186 7187
      {
        cur_quick->range_end();
7188
        DBUG_RETURN(result);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7189
      }
7190
      break;
7191
    }
7192

7193 7194
    if (thd->killed)
      DBUG_RETURN(1);
7195

7196
    /* skip row if it will be retrieved by clustered PK scan */
7197 7198
    if (pk_quick_select && pk_quick_select->row_in_ranges())
      continue;
7199

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7200 7201
    cur_quick->file->position(cur_quick->record);
    result= unique->unique_add((char*)cur_quick->file->ref);
7202
    if (result)
7203 7204
      DBUG_RETURN(1);

monty@mysql.com's avatar
monty@mysql.com committed
7205
  }
7206

7207 7208
  /* ok, all row ids are in Unique */
  result= unique->get(head);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7209
  delete unique;
monty@mysql.com's avatar
monty@mysql.com committed
7210
  doing_pk_scan= FALSE;
monty@mysql.com's avatar
monty@mysql.com committed
7211 7212
  /* start table scan */
  init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1);
7213 7214
  /* index_merge currently doesn't support "using index" at all */
  head->file->extra(HA_EXTRA_NO_KEYREAD);
7215

7216 7217 7218
  DBUG_RETURN(result);
}

7219

7220 7221 7222
/*
  Get next row for index_merge.
  NOTES
7223 7224 7225 7226
    The rows are read from
      1. rowids stored in Unique.
      2. QUICK_RANGE_SELECT with clustered primary key (if any).
    The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
7227
*/
7228

7229 7230
int QUICK_INDEX_MERGE_SELECT::get_next()
{
7231
  int result;
7232
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
7233

7234 7235 7236 7237 7238 7239 7240 7241 7242
  if (doing_pk_scan)
    DBUG_RETURN(pk_quick_select->get_next());

  result= read_record.read_record(&read_record);

  if (result == -1)
  {
    result= HA_ERR_END_OF_FILE;
    end_read_record(&read_record);
7243
    /* All rows from Unique have been retrieved, do a clustered PK scan */
monty@mysql.com's avatar
monty@mysql.com committed
7244
    if (pk_quick_select)
7245
    {
monty@mysql.com's avatar
monty@mysql.com committed
7246
      doing_pk_scan= TRUE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7247
      if ((result= pk_quick_select->init()) || (result= pk_quick_select->reset()))
7248 7249 7250 7251 7252 7253
        DBUG_RETURN(result);
      DBUG_RETURN(pk_quick_select->get_next());
    }
  }

  DBUG_RETURN(result);
7254 7255
}

7256 7257

/*
7258
  Retrieve next record.
7259
  SYNOPSIS
7260 7261
     QUICK_ROR_INTERSECT_SELECT::get_next()

7262
  NOTES
7263 7264
    Invariant on enter/exit: all intersected selects have retrieved all index
    records with rowid <= some_rowid_val and no intersected select has
7265 7266 7267 7268
    retrieved any index records with rowid > some_rowid_val.
    We start fresh and loop until we have retrieved the same rowid in each of
    the key scans or we got an error.

7269
    If a Clustered PK scan is present, it is used only to check if row
7270 7271 7272 7273 7274
    satisfies its condition (and never used for row retrieval).

  RETURN
   0     - Ok
   other - Error code if any error occurred.
7275 7276 7277 7278 7279 7280 7281 7282 7283
*/

int QUICK_ROR_INTERSECT_SELECT::get_next()
{
  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
  QUICK_RANGE_SELECT* quick;
  int error, cmp;
  uint last_rowid_count=0;
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
7284

7285 7286 7287 7288 7289 7290 7291 7292 7293 7294
  /* Get a rowid for first quick and save it as a 'candidate' */
  quick= quick_it++;
  if (cpk_quick)
  {
    do {
      error= quick->get_next();
    }while (!error && !cpk_quick->row_in_ranges());
  }
  else
    error= quick->get_next();
7295

7296 7297 7298 7299 7300 7301
  if (error)
    DBUG_RETURN(error);

  quick->file->position(quick->record);
  memcpy(last_rowid, quick->file->ref, head->file->ref_length);
  last_rowid_count= 1;
7302

7303 7304 7305 7306 7307 7308 7309
  while (last_rowid_count < quick_selects.elements)
  {
    if (!(quick= quick_it++))
    {
      quick_it.rewind();
      quick= quick_it++;
    }
7310

7311 7312 7313 7314
    do {
      if ((error= quick->get_next()))
        DBUG_RETURN(error);
      quick->file->position(quick->record);
7315
      cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330
    } while (cmp < 0);

    /* Ok, current select 'caught up' and returned ref >= cur_ref */
    if (cmp > 0)
    {
      /* Found a row with ref > cur_ref. Make it a new 'candidate' */
      if (cpk_quick)
      {
        while (!cpk_quick->row_in_ranges())
        {
          if ((error= quick->get_next()))
            DBUG_RETURN(error);
        }
      }
      memcpy(last_rowid, quick->file->ref, head->file->ref_length);
7331
      last_rowid_count= 1;
7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346
    }
    else
    {
      /* current 'candidate' row confirmed by this select */
      last_rowid_count++;
    }
  }

  /* We get here iff we got the same row ref in all scans. */
  if (need_to_fetch_row)
    error= head->file->rnd_pos(head->record[0], last_rowid);
  DBUG_RETURN(error);
}


7347 7348
/*
  Retrieve next record.
7349 7350
  SYNOPSIS
    QUICK_ROR_UNION_SELECT::get_next()
7351

7352
  NOTES
7353 7354
    Enter/exit invariant:
    For each quick select in the queue a {key,rowid} tuple has been
7355
    retrieved but the corresponding row hasn't been passed to output.
7356

7357
  RETURN
7358 7359
   0     - Ok
   other - Error code if any error occurred.
7360 7361 7362 7363 7364 7365 7366 7367
*/

int QUICK_ROR_UNION_SELECT::get_next()
{
  int error, dup_row;
  QUICK_SELECT_I *quick;
  byte *tmp;
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
7368

7369 7370 7371 7372
  do
  {
    if (!queue.elements)
      DBUG_RETURN(HA_ERR_END_OF_FILE);
7373
    /* Ok, we have a queue with >= 1 scans */
7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389

    quick= (QUICK_SELECT_I*)queue_top(&queue);
    memcpy(cur_rowid, quick->last_rowid, rowid_length);

    /* put into queue rowid from the same stream as top element */
    if ((error= quick->get_next()))
    {
      if (error != HA_ERR_END_OF_FILE)
        DBUG_RETURN(error);
      queue_remove(&queue, 0);
    }
    else
    {
      quick->save_last_pos();
      queue_replaced(&queue);
    }
7390

7391 7392 7393
    if (!have_prev_rowid)
    {
      /* No rows have been returned yet */
monty@mysql.com's avatar
monty@mysql.com committed
7394 7395
      dup_row= FALSE;
      have_prev_rowid= TRUE;
7396 7397 7398 7399
    }
    else
      dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
  }while (dup_row);
7400

7401 7402 7403 7404 7405 7406 7407 7408
  tmp= cur_rowid;
  cur_rowid= prev_rowid;
  prev_rowid= tmp;

  error= head->file->rnd_pos(quick->record, prev_rowid);
  DBUG_RETURN(error);
}

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7409
int QUICK_RANGE_SELECT::reset()
ingo@mysql.com's avatar
ingo@mysql.com committed
7410 7411 7412
{
  uint  mrange_bufsiz;
  byte  *mrange_buff;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7413 7414 7415
  DBUG_ENTER("QUICK_RANGE_SELECT::reset");
  next=0;
  range= NULL;
7416
  in_range= FALSE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7417
  cur_range= (QUICK_RANGE**) ranges.buffer;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
7418

7419
  if (file->inited == handler::NONE && (error= file->ha_index_init(index,1)))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
7420
    DBUG_RETURN(error);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
7421
 
ingo@mysql.com's avatar
ingo@mysql.com committed
7422 7423 7424 7425 7426 7427 7428
  /* Do not allocate the buffers twice. */
  if (multi_range_length)
  {
    DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
    DBUG_RETURN(0);
  }

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7429 7430
  /* Allocate the ranges array. */
  DBUG_ASSERT(ranges.elements);
ingo@mysql.com's avatar
ingo@mysql.com committed
7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446
  multi_range_length= min(multi_range_count, ranges.elements);
  DBUG_ASSERT(multi_range_length > 0);
  while (multi_range_length && ! (multi_range= (KEY_MULTI_RANGE*)
                                  my_malloc(multi_range_length *
                                            sizeof(KEY_MULTI_RANGE),
                                            MYF(MY_WME))))
  {
    /* Try to shrink the buffers until it is 0. */
    multi_range_length/= 2;
  }
  if (! multi_range)
  {
    multi_range_length= 0;
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7447
  /* Allocate the handler buffer if necessary.  */
ingo@mysql.com's avatar
ingo@mysql.com committed
7448 7449 7450
  if (file->table_flags() & HA_NEED_READ_RANGE_BUFFER)
  {
    mrange_bufsiz= min(multi_range_bufsiz,
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
7451
                       (QUICK_SELECT_I::records + 1)* head->s->reclength);
ingo@mysql.com's avatar
ingo@mysql.com committed
7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473

    while (mrange_bufsiz &&
           ! my_multi_malloc(MYF(MY_WME),
                             &multi_range_buff, sizeof(*multi_range_buff),
                             &mrange_buff, mrange_bufsiz,
                             NullS))
    {
      /* Try to shrink the buffers until both are 0. */
      mrange_bufsiz/= 2;
    }
    if (! multi_range_buff)
    {
      my_free((char*) multi_range, MYF(0));
      multi_range= NULL;
      multi_range_length= 0;
      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    }

    /* Initialize the handler buffer. */
    multi_range_buff->buffer= mrange_buff;
    multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz;
    multi_range_buff->end_of_used_area= mrange_buff;
7474 7475 7476 7477 7478 7479 7480 7481
#ifdef HAVE_purify
    /*
      We need this until ndb will use the buffer efficiently
      (Now ndb stores  complete row in here, instead of only the used fields
      which gives us valgrind warnings in compare_record[])
    */
    bzero((char*) mrange_buff, mrange_bufsiz);
#endif
ingo@mysql.com's avatar
ingo@mysql.com committed
7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500
  }
  DBUG_RETURN(0);
}


/*
  Get next possible record using quick-struct.

  SYNOPSIS
    QUICK_RANGE_SELECT::get_next()

  NOTES
    Record is read into table->record[0]

  RETURN
    0			Found row
    HA_ERR_END_OF_FILE	No (more) rows in range
    #			Error code
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7501

7502
int QUICK_RANGE_SELECT::get_next()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7503
{
ingo@mysql.com's avatar
ingo@mysql.com committed
7504 7505 7506 7507
  int             result;
  KEY_MULTI_RANGE *mrange;
  key_range       *start_key;
  key_range       *end_key;
7508
  DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
ingo@mysql.com's avatar
ingo@mysql.com committed
7509 7510 7511
  DBUG_ASSERT(multi_range_length && multi_range &&
              (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
              (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7512 7513 7514

  for (;;)
  {
ingo@mysql.com's avatar
ingo@mysql.com committed
7515
    if (in_range)
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7516
    {
ingo@mysql.com's avatar
ingo@mysql.com committed
7517 7518
      /* We did already start to read this key. */
      result= file->read_multi_range_next(&mrange);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7519
      if (result != HA_ERR_END_OF_FILE)
ingo@mysql.com's avatar
ingo@mysql.com committed
7520 7521
      {
        in_range= ! result;
7522
	DBUG_RETURN(result);
ingo@mysql.com's avatar
ingo@mysql.com committed
7523
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7524
    }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7525

ingo@mysql.com's avatar
ingo@mysql.com committed
7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555
    uint count= min(multi_range_length, ranges.elements -
                    (cur_range - (QUICK_RANGE**) ranges.buffer));
    if (count == 0)
    {
      /* Ranges have already been used up before. None is left for read. */
      in_range= FALSE;
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    KEY_MULTI_RANGE *mrange_slot, *mrange_end;
    for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
         mrange_slot < mrange_end;
         mrange_slot++)
    {
      start_key= &mrange_slot->start_key;
      end_key= &mrange_slot->end_key;
      range= *(cur_range++);

      start_key->key=    (const byte*) range->min_key;
      start_key->length= range->min_length;
      start_key->flag=   ((range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
                          (range->flag & EQ_RANGE) ?
                          HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
      end_key->key=      (const byte*) range->max_key;
      end_key->length=   range->max_length;
      /*
        We use HA_READ_AFTER_KEY here because if we are reading on a key
        prefix. We want to find all keys with this prefix.
      */
      end_key->flag=     (range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
                          HA_READ_AFTER_KEY);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7556

ingo@mysql.com's avatar
ingo@mysql.com committed
7557 7558
      mrange_slot->range_flag= range->flag;
    }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7559

ingo@mysql.com's avatar
ingo@mysql.com committed
7560 7561
    result= file->read_multi_range_first(&mrange, multi_range, count,
                                         sorted, multi_range_buff);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7562
    if (result != HA_ERR_END_OF_FILE)
ingo@mysql.com's avatar
ingo@mysql.com committed
7563 7564
    {
      in_range= ! result;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7565
      DBUG_RETURN(result);
ingo@mysql.com's avatar
ingo@mysql.com committed
7566 7567
    }
    in_range= FALSE; /* No matching rows; go to next set of ranges. */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7568 7569 7570
  }
}

7571

7572 7573 7574 7575 7576 7577
/*
  Get the next record with a different prefix.

  SYNOPSIS
    QUICK_RANGE_SELECT::get_next_prefix()
    prefix_length  length of cur_prefix
7578
    cur_prefix     prefix of a key to be searched for
7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610

  DESCRIPTION
    Each subsequent call to the method retrieves the first record that has a
    prefix with length prefix_length different from cur_prefix, such that the
    record with the new prefix is within the ranges described by
    this->ranges. The record found is stored into the buffer pointed by
    this->record.
    The method is useful for GROUP-BY queries with range conditions to
    discover the prefix of the next group that satisfies the range conditions.

  TODO
    This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
    methods should be unified into a more general one to reduce code
    duplication.

  RETURN
    0                  on success
    HA_ERR_END_OF_FILE if returned all keys
    other              if some error occurred
*/

int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length, byte *cur_prefix)
{
  DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");

  for (;;)
  {
    int result;
    key_range start_key, end_key;
    if (range)
    {
      /* Read the next record in the same range with prefix after cur_prefix. */
7611
      DBUG_ASSERT(cur_prefix != 0);
7612 7613 7614 7615 7616 7617
      result= file->index_read(record, cur_prefix, prefix_length,
                               HA_READ_AFTER_KEY);
      if (result || (file->compare_key(file->end_range) <= 0))
        DBUG_RETURN(result);
    }

ingo@mysql.com's avatar
ingo@mysql.com committed
7618 7619 7620 7621 7622 7623 7624 7625
    uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
    if (count == 0)
    {
      /* Ranges have already been used up before. None is left for read. */
      range= 0;
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    range= *(cur_range++);
7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654

    start_key.key=    (const byte*) range->min_key;
    start_key.length= min(range->min_length, prefix_length);
    start_key.flag=   ((range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
		       (range->flag & EQ_RANGE) ?
		       HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
    end_key.key=      (const byte*) range->max_key;
    end_key.length=   min(range->max_length, prefix_length);
    /*
      We use READ_AFTER_KEY here because if we are reading on a key
      prefix we want to find all keys with this prefix
    */
    end_key.flag=     (range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
		       HA_READ_AFTER_KEY);

    result= file->read_range_first(range->min_length ? &start_key : 0,
				   range->max_length ? &end_key : 0,
                                   test(range->flag & EQ_RANGE),
				   sorted);
    if (range->flag == (UNIQUE_RANGE | EQ_RANGE))
      range=0;				// Stop searching

    if (result != HA_ERR_END_OF_FILE)
      DBUG_RETURN(result);
    range=0;				// No matching rows; go to next range
  }
}


pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7655
/* Get next for geometrical indexes */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7656

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7657
int QUICK_RANGE_SELECT_GEOM::get_next()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7658
{
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7659
  DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7660

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7661
  for (;;)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7662
  {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7663 7664
    int result;
    if (range)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7665
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7666 7667 7668 7669 7670
      // Already read through key
      result= file->index_next_same(record, (byte*) range->min_key,
				    range->min_length);
      if (result != HA_ERR_END_OF_FILE)
	DBUG_RETURN(result);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7671
    }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7672

ingo@mysql.com's avatar
ingo@mysql.com committed
7673 7674 7675 7676 7677 7678 7679 7680
    uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
    if (count == 0)
    {
      /* Ranges have already been used up before. None is left for read. */
      range= 0;
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    range= *(cur_range++);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7681 7682 7683 7684 7685

    result= file->index_read(record,
			     (byte*) range->min_key,
			     range->min_length,
			     (ha_rkey_function)(range->flag ^ GEOM_FLAG));
7686
    if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7687 7688
      DBUG_RETURN(result);
    range=0;				// Not found, to next range
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7689 7690 7691
  }
}

7692

7693 7694 7695 7696
/*
  Check if current row will be retrieved by this QUICK_RANGE_SELECT

  NOTES
7697 7698
    It is assumed that currently a scan is being done on another index
    which reads all necessary parts of the index that is scanned by this
7699
    quick select.
7700
    The implementation does a binary search on sorted array of disjoint
7701 7702
    ranges, without taking size of range into account.

7703
    This function is used to filter out clustered PK scan rows in
7704 7705
    index_merge quick select.

7706
  RETURN
monty@mysql.com's avatar
monty@mysql.com committed
7707 7708
    TRUE  if current row will be retrieved by this quick select
    FALSE if not
7709 7710 7711 7712 7713 7714 7715 7716 7717 7718
*/

bool QUICK_RANGE_SELECT::row_in_ranges()
{
  QUICK_RANGE *range;
  uint min= 0;
  uint max= ranges.elements - 1;
  uint mid= (max + min)/2;

  while (min != max)
7719
  {
7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732
    if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
    {
      /* current row value > mid->max */
      min= mid + 1;
    }
    else
      max= mid;
    mid= (min + max) / 2;
  }
  range= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
  return (!cmp_next(range) && !cmp_prev(range));
}

7733
/*
7734 7735 7736 7737 7738 7739 7740
  This is a hack: we inherit from QUICK_SELECT so that we can use the
  get_next() interface, but we have to hold a pointer to the original
  QUICK_SELECT because its data are used all over the place.  What
  should be done is to factor out the data that is needed into a base
  class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
  which handle the ranges and implement the get_next() function.  But
  for now, this seems to work right at least.
7741
 */
7742

7743
QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
7744 7745
                                     uint used_key_parts)
 : QUICK_RANGE_SELECT(*q), rev_it(rev_ranges)
7746
{
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7747
  QUICK_RANGE *r;
7748

7749 7750
  QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
  QUICK_RANGE **last_range= pr + ranges.elements;
monty@mysql.com's avatar
monty@mysql.com committed
7751 7752
  for (; pr!=last_range; pr++)
    rev_ranges.push_front(*pr);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7753

7754
  /* Remove EQ_RANGE flag for keys that are not using the full key */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7755
  for (r = rev_it++; r; r = rev_it++)
7756 7757 7758 7759 7760 7761 7762 7763
  {
    if ((r->flag & EQ_RANGE) &&
	head->key_info[index].key_length != r->max_length)
      r->flag&= ~EQ_RANGE;
  }
  rev_it.rewind();
  q->dont_free=1;				// Don't free shared mem
  delete q;
7764 7765
}

7766

7767 7768 7769 7770 7771 7772
int QUICK_SELECT_DESC::get_next()
{
  DBUG_ENTER("QUICK_SELECT_DESC::get_next");

  /* The max key is handled as follows:
   *   - if there is NO_MAX_RANGE, start at the end and move backwards
7773 7774
   *   - if it is an EQ_RANGE, which means that max key covers the entire
   *     key, go directly to the key and read through it (sorting backwards is
7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786
   *     same as sorting forwards)
   *   - if it is NEAR_MAX, go to the key or next, step back once, and
   *     move backwards
   *   - otherwise (not NEAR_MAX == include the key), go after the key,
   *     step back once, and move backwards
   */

  for (;;)
  {
    int result;
    if (range)
    {						// Already read through key
7787 7788 7789
      result = ((range->flag & EQ_RANGE)
		? file->index_next_same(record, (byte*) range->min_key,
					range->min_length) :
7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804
		file->index_prev(record));
      if (!result)
      {
	if (cmp_prev(*rev_it.ref()) == 0)
	  DBUG_RETURN(0);
      }
      else if (result != HA_ERR_END_OF_FILE)
	DBUG_RETURN(result);
    }

    if (!(range=rev_it++))
      DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used

    if (range->flag & NO_MAX_RANGE)		// Read last record
    {
7805 7806 7807
      int local_error;
      if ((local_error=file->index_last(record)))
	DBUG_RETURN(local_error);		// Empty table
7808 7809 7810 7811 7812 7813
      if (cmp_prev(range) == 0)
	DBUG_RETURN(0);
      range=0;			// No matching records; go to next range
      continue;
    }

7814
    if (range->flag & EQ_RANGE)
7815 7816 7817 7818 7819 7820
    {
      result = file->index_read(record, (byte*) range->max_key,
				range->max_length, HA_READ_KEY_EXACT);
    }
    else
    {
7821 7822 7823 7824 7825
      DBUG_ASSERT(range->flag & NEAR_MAX || range_reads_after_key(range));
      result=file->index_read(record, (byte*) range->max_key,
			      range->max_length,
			      ((range->flag & NEAR_MAX) ?
			       HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV));
7826 7827 7828
    }
    if (result)
    {
7829
      if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843
	DBUG_RETURN(result);
      range=0;					// Not found, to next range
      continue;
    }
    if (cmp_prev(range) == 0)
    {
      if (range->flag == (UNIQUE_RANGE | EQ_RANGE))
	range = 0;				// Stop searching
      DBUG_RETURN(0);				// Found key is in range
    }
    range = 0;					// To next range
  }
}

7844

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885
/*
  Compare if found key is over max-value
  Returns 0 if key <= range->max_key
*/

int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
{
  if (range_arg->flag & NO_MAX_RANGE)
    return 0;                                   /* key can't be to large */

  KEY_PART *key_part=key_parts;
  uint store_length;

  for (char *key=range_arg->max_key, *end=key+range_arg->max_length;
       key < end;
       key+= store_length, key_part++)
  {
    int cmp;
    store_length= key_part->store_length;
    if (key_part->null_bit)
    {
      if (*key)
      {
        if (!key_part->field->is_null())
          return 1;
        continue;
      }
      else if (key_part->field->is_null())
        return 0;
      key++;					// Skip null byte
      store_length--;
    }
    if ((cmp=key_part->field->key_cmp((byte*) key, key_part->length)) < 0)
      return 0;
    if (cmp > 0)
      return 1;
  }
  return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
}


7886
/*
7887 7888 7889
  Returns 0 if found key is inside range (found key >= range->min_key).
*/

7890
int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
7891
{
7892
  int cmp;
7893
  if (range_arg->flag & NO_MIN_RANGE)
7894
    return 0;					/* key can't be to small */
7895

monty@mysql.com's avatar
monty@mysql.com committed
7896 7897
  cmp= key_cmp(key_part_info, (byte*) range_arg->min_key,
               range_arg->min_length);
7898 7899 7900
  if (cmp > 0 || cmp == 0 && !(range_arg->flag & NEAR_MIN))
    return 0;
  return 1;                                     // outside of range
7901 7902
}

7903

7904
/*
monty@mysql.com's avatar
monty@mysql.com committed
7905
 * TRUE if this range will require using HA_READ_AFTER_KEY
7906
   See comment in get_next() about this
7907
 */
7908

7909
bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
7910
{
7911
  return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
7912
	  !(range_arg->flag & EQ_RANGE) ||
7913
	  head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
7914 7915
}

7916

monty@mysql.com's avatar
monty@mysql.com committed
7917
/* TRUE if we are reading over a key that may have a NULL value */
7918

7919
#ifdef NOT_USED
7920
bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
7921 7922
					   uint used_key_parts)
{
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7923
  uint offset, end;
7924 7925 7926
  KEY_PART *key_part = key_parts,
           *key_part_end= key_part+used_key_parts;

7927
  for (offset= 0,  end = min(range_arg->min_length, range_arg->max_length) ;
7928
       offset < end && key_part != key_part_end ;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7929
       offset+= key_part++->store_length)
7930
  {
7931 7932
    if (!memcmp((char*) range_arg->min_key+offset,
		(char*) range_arg->max_key+offset,
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7933
		key_part->store_length))
7934
      continue;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7935 7936

    if (key_part->null_bit && range_arg->min_key[offset])
7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948
      return 1;				// min_key is null and max_key isn't
    // Range doesn't cover NULL. This is ok if there is no more null parts
    break;
  }
  /*
    If the next min_range is > NULL, then we can use this, even if
    it's a NULL key
    Example:  SELECT * FROM t1 WHERE a = 2 AND b >0 ORDER BY a DESC,b DESC;

  */
  if (key_part != key_part_end && key_part->null_bit)
  {
7949
    if (offset >= range_arg->min_length || range_arg->min_key[offset])
7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961
      return 1;					// Could be null
    key_part++;
  }
  /*
    If any of the key parts used in the ORDER BY could be NULL, we can't
    use the key to sort the data.
  */
  for (; key_part != key_part_end ; key_part++)
    if (key_part->null_bit)
      return 1;					// Covers null part
  return 0;
}
7962
#endif
7963 7964


7965 7966 7967 7968 7969 7970 7971 7972 7973
void QUICK_RANGE_SELECT::add_info_string(String *str)
{
  KEY *key_info= head->key_info + index;
  str->append(key_info->name);
}

void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
{
  QUICK_RANGE_SELECT *quick;
monty@mysql.com's avatar
monty@mysql.com committed
7974
  bool first= TRUE;
7975
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
7976
  str->append(STRING_WITH_LEN("sort_union("));
7977 7978 7979 7980 7981
  while ((quick= it++))
  {
    if (!first)
      str->append(',');
    else
monty@mysql.com's avatar
monty@mysql.com committed
7982
      first= FALSE;
7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994
    quick->add_info_string(str);
  }
  if (pk_quick_select)
  {
    str->append(',');
    pk_quick_select->add_info_string(str);
  }
  str->append(')');
}

void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
{
7995
  bool first= TRUE;
7996 7997
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
7998
  str->append(STRING_WITH_LEN("intersect("));
7999 8000 8001 8002 8003
  while ((quick= it++))
  {
    KEY *key_info= head->key_info + quick->index;
    if (!first)
      str->append(',');
8004
    else
monty@mysql.com's avatar
monty@mysql.com committed
8005
      first= FALSE;
8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018
    str->append(key_info->name);
  }
  if (cpk_quick)
  {
    KEY *key_info= head->key_info + cpk_quick->index;
    str->append(',');
    str->append(key_info->name);
  }
  str->append(')');
}

void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
{
8019
  bool first= TRUE;
8020 8021
  QUICK_SELECT_I *quick;
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
8022
  str->append(STRING_WITH_LEN("union("));
8023 8024 8025 8026 8027
  while ((quick= it++))
  {
    if (!first)
      str->append(',');
    else
monty@mysql.com's avatar
monty@mysql.com committed
8028
      first= FALSE;
8029 8030 8031 8032 8033 8034
    quick->add_info_string(str);
  }
  str->append(')');
}


8035
void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
8036
                                              String *used_lengths)
8037 8038 8039 8040 8041 8042 8043 8044 8045
{
  char buf[64];
  uint length;
  KEY *key_info= head->key_info + index;
  key_names->append(key_info->name);
  length= longlong2str(max_used_key_length, buf, 10) - buf;
  used_lengths->append(buf, length);
}

8046 8047
void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
                                                    String *used_lengths)
8048 8049 8050
{
  char buf[64];
  uint length;
monty@mysql.com's avatar
monty@mysql.com committed
8051
  bool first= TRUE;
8052
  QUICK_RANGE_SELECT *quick;
8053

8054 8055 8056
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
8057
    if (first)
monty@mysql.com's avatar
monty@mysql.com committed
8058
      first= FALSE;
8059 8060
    else
    {
8061 8062
      key_names->append(',');
      used_lengths->append(',');
8063
    }
8064

8065 8066
    KEY *key_info= head->key_info + quick->index;
    key_names->append(key_info->name);
8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080
    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
    used_lengths->append(buf, length);
  }
  if (pk_quick_select)
  {
    KEY *key_info= head->key_info + pk_quick_select->index;
    key_names->append(',');
    key_names->append(key_info->name);
    length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
    used_lengths->append(',');
    used_lengths->append(buf, length);
  }
}

8081 8082
void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
                                                      String *used_lengths)
8083 8084 8085
{
  char buf[64];
  uint length;
8086
  bool first= TRUE;
8087 8088 8089 8090 8091 8092
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
    KEY *key_info= head->key_info + quick->index;
    if (first)
monty@mysql.com's avatar
monty@mysql.com committed
8093
      first= FALSE;
8094
    else
8095 8096
    {
      key_names->append(',');
8097
      used_lengths->append(',');
8098 8099
    }
    key_names->append(key_info->name);
8100 8101 8102
    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
    used_lengths->append(buf, length);
  }
8103

8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114
  if (cpk_quick)
  {
    KEY *key_info= head->key_info + cpk_quick->index;
    key_names->append(',');
    key_names->append(key_info->name);
    length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
    used_lengths->append(',');
    used_lengths->append(buf, length);
  }
}

8115 8116
void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
                                                  String *used_lengths)
8117
{
8118
  bool first= TRUE;
8119 8120 8121 8122 8123
  QUICK_SELECT_I *quick;
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  while ((quick= it++))
  {
    if (first)
monty@mysql.com's avatar
monty@mysql.com committed
8124
      first= FALSE;
8125
    else
8126
    {
8127 8128 8129
      used_lengths->append(',');
      key_names->append(',');
    }
8130
    quick->add_keys_and_lengths(key_names, used_lengths);
8131 8132 8133
  }
}

8134 8135 8136 8137 8138 8139 8140 8141 8142

/*******************************************************************************
* Implementation of QUICK_GROUP_MIN_MAX_SELECT
*******************************************************************************/

static inline uint get_field_keypart(KEY *index, Field *field);
static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
                                             PARAM *param, uint *param_idx);
static bool
8143
get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
8144
                       KEY_PART_INFO *first_non_group_part,
8145 8146 8147 8148
                       KEY_PART_INFO *min_max_arg_part,
                       KEY_PART_INFO *last_part, THD *thd,
                       byte *key_infix, uint *key_infix_len,
                       KEY_PART_INFO **first_non_infix_part);
8149
static bool
8150 8151
check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
                               Field::imagetype image_type);
8152

8153 8154 8155 8156 8157 8158
static void
cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
                   uint group_key_parts, SEL_TREE *range_tree,
                   SEL_ARG *index_tree, ha_rows quick_prefix_records,
                   bool have_min, bool have_max,
                   double *read_cost, ha_rows *records);
8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184

/*
  Test if this access method is applicable to a GROUP query with MIN/MAX
  functions, and if so, construct a new TRP object.

  SYNOPSIS
    get_best_group_min_max()
    param    Parameter from test_quick_select
    sel_tree Range tree generated by get_mm_tree

  DESCRIPTION
    Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
    Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
    following conditions:
    A) Table T has at least one compound index I of the form:
       I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
    B) Query conditions:
    B0. Q is over a single table T.
    B1. The attributes referenced by Q are a subset of the attributes of I.
    B2. All attributes QA in Q can be divided into 3 overlapping groups:
        - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
          referenced by any number of MIN and/or MAX functions if present.
        - WA = {W_1, ..., W_p} - from the WHERE clause
        - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
             = SA              - if Q is a DISTINCT query (based on the
                                 equivalence of DISTINCT and GROUP queries.
monty@mysql.com's avatar
monty@mysql.com committed
8185 8186
        - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
          GROUP BY and not referenced by MIN/MAX functions.
8187
        with the following properties specified below.
8188 8189
    B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not 
        applicable.
8190 8191 8192 8193 8194 8195 8196 8197 8198 8199

    SA1. There is at most one attribute in SA referenced by any number of
         MIN and/or MAX functions which, which if present, is denoted as C.
    SA2. The position of the C attribute in the index is after the last A_k.
    SA3. The attribute C can be referenced in the WHERE clause only in
         predicates of the forms:
         - (C {< | <= | > | >= | =} const)
         - (const {< | <= | > | >= | =} C)
         - (C between const_i and const_j)
         - C IS NULL
8200 8201
         - C IS NOT NULL
         - C != const
8202 8203 8204
    SA4. If Q has a GROUP BY clause, there are no other aggregate functions
         except MIN and MAX. For queries with DISTINCT, aggregate functions
         are allowed.
8205
    SA5. The select list in DISTINCT queries should not contain expressions.
8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234
    GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
         G_i = A_j => i = j.
    GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
         forms a prefix of I. This permutation is used as the GROUP clause
         when the DISTINCT query is converted to a GROUP query.
    GA3. The attributes in GA may participate in arbitrary predicates, divided
         into two groups:
         - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
           attributes of a prefix of GA
         - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
           of GA. Since P is applied to only GROUP attributes it filters some
           groups, and thus can be applied after the grouping.
    GA4. There are no expressions among G_i, just direct column references.
    NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
         and the MIN/MAX attribute C, then NGA must consist of exactly the index
         attributes that constitute the gap. As a result there is a permutation
         of NGA that coincides with the gap in the index <B_1, ..., B_m>.
    NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
         equality conditions for all NG_i of the form (NG_i = const) or
         (const = NG_i), such that each NG_i is referenced in exactly one
         conjunct. Informally, the predicates provide constants to fill the
         gap in the index.
    WA1. There are no other attributes in the WHERE clause except the ones
         referenced in predicates RNG, PA, PC, EQ defined above. Therefore
         WA is subset of (GA union NGA union C) for GA,NGA,C that pass the above
         tests. By transitivity then it also follows that each WA_i participates
         in the index I (if this was already tested for GA, NGA and C).

    C) Overall query form:
8235 8236 8237 8238
       SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
         FROM T
        WHERE [RNG(A_1,...,A_p ; where p <= k)]
         [AND EQ(B_1,...,B_m)]
8239 8240
         [AND PC(C)]
         [AND PA(A_i1,...,A_iq)]
8241 8242 8243 8244
       GROUP BY A_1,...,A_k
       [HAVING PH(A_1, ..., B_1,..., C)]
    where EXPR(...) is an arbitrary expression over some or all SELECT fields,
    or:
8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273
       SELECT DISTINCT A_i1,...,A_ik
         FROM T
        WHERE [RNG(A_1,...,A_p ; where p <= k)]
         [AND PA(A_i1,...,A_iq)];

  NOTES
    If the current query satisfies the conditions above, and if
    (mem_root! = NULL), then the function constructs and returns a new TRP
    object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
    If (mem_root == NULL), then the function only tests whether the current
    query satisfies the conditions above, and, if so, sets
    is_applicable = TRUE.

    Queries with DISTINCT for which index access can be used are transformed
    into equivalent group-by queries of the form:

    SELECT A_1,...,A_k FROM T
     WHERE [RNG(A_1,...,A_p ; where p <= k)]
      [AND PA(A_i1,...,A_iq)]
    GROUP BY A_1,...,A_k;

    The group-by list is a permutation of the select attributes, according
    to their order in the index.

  TODO
  - What happens if the query groups by the MIN/MAX field, and there is no
    other field as in: "select min(a) from t1 group by a" ?
  - We assume that the general correctness of the GROUP-BY query was checked
    before this point. Is this correct, or do we have to check it completely?
8274 8275
  - Lift the limitation in condition (B3), that is, make this access method 
    applicable to ROLLUP queries.
8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314

  RETURN
    If mem_root != NULL
    - valid TRP_GROUP_MIN_MAX object if this QUICK class can be used for
      the query
    -  NULL o/w.
    If mem_root == NULL
    - NULL
*/

static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM *param, SEL_TREE *tree)
{
  THD *thd= param->thd;
  JOIN *join= thd->lex->select_lex.join;
  TABLE *table= param->table;
  bool have_min= FALSE;              /* TRUE if there is a MIN function. */
  bool have_max= FALSE;              /* TRUE if there is a MAX function. */
  Item_field *min_max_arg_item= NULL;/* The argument of all MIN/MAX functions.*/
  KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
  uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
  KEY *index_info= NULL;    /* The index chosen for data access. */
  uint index= 0;            /* The id of the chosen index. */
  uint group_key_parts= 0;  /* Number of index key parts in the group prefix. */
  uint used_key_parts= 0;   /* Number of index key parts used for access. */
  byte key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
  uint key_infix_len= 0;          /* Length of key_infix. */
  TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
  uint key_part_nr;
  ORDER *tmp_group;
  Item *item;
  Item_field *item_field;
  DBUG_ENTER("get_best_group_min_max");

  /* Perform few 'cheap' tests whether this access method is applicable. */
  if (!join || (thd->lex->sql_command != SQLCOM_SELECT))
    DBUG_RETURN(NULL);        /* This is not a select statement. */
  if ((join->tables != 1) ||  /* The query must reference one table. */
      ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
8315 8316
       (!join->select_distinct)) ||
      (thd->lex->select_lex.olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
8317
    DBUG_RETURN(NULL);
8318
  if (table->s->keys == 0)        /* There are no indexes to use. */
8319 8320 8321
    DBUG_RETURN(NULL);

  /* Analyze the query in more detail. */
8322
  List_iterator<Item> select_items_it(join->fields_list);
8323

8324
  /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
monty@mishka.local's avatar
monty@mishka.local committed
8325
  if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
8326 8327
    DBUG_RETURN(NULL);
  if (join->sum_funcs[0])
8328
  {
8329 8330 8331
    Item_sum *min_max_item;
    Item_sum **func_ptr= join->sum_funcs;
    while ((min_max_item= *(func_ptr++)))
8332
    {
8333 8334 8335 8336 8337
      if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
        have_min= TRUE;
      else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
        have_max= TRUE;
      else
8338 8339
        DBUG_RETURN(NULL);

8340 8341
      Item *expr= min_max_item->args[0];    /* The argument of MIN/MAX. */
      if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
8342
      {
8343 8344 8345 8346
        if (! min_max_arg_item)
          min_max_arg_item= (Item_field*) expr;
        else if (! min_max_arg_item->eq(expr, 1))
          DBUG_RETURN(NULL);
8347
      }
8348 8349
      else
        DBUG_RETURN(NULL);
8350
    }
8351
  }
8352

8353 8354 8355 8356
  /* Check (SA5). */
  if (join->select_distinct)
  {
    while ((item= select_items_it++))
8357
    {
8358 8359
      if (item->type() != Item::FIELD_ITEM)
        DBUG_RETURN(NULL);
8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375
    }
  }

  /* Check (GA4) - that there are no expressions among the group attributes. */
  for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
  {
    if ((*tmp_group->item)->type() != Item::FIELD_ITEM)
      DBUG_RETURN(NULL);
  }

  /*
    Check that table has at least one compound index such that the conditions
    (GA1,GA2) are all TRUE. If there is more than one such index, select the
    first one. Here we set the variables: group_prefix_len and index_info.
  */
  KEY *cur_index_info= table->key_info;
8376
  KEY *cur_index_info_end= cur_index_info + table->s->keys;
8377
  KEY_PART_INFO *cur_part= NULL;
8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396
  KEY_PART_INFO *end_part; /* Last part for loops. */
  /* Last index part. */
  KEY_PART_INFO *last_part= NULL;
  KEY_PART_INFO *first_non_group_part= NULL;
  KEY_PART_INFO *first_non_infix_part= NULL;
  uint key_infix_parts= 0;
  uint cur_group_key_parts= 0;
  uint cur_group_prefix_len= 0;
  /* Cost-related variables for the best index so far. */
  double best_read_cost= DBL_MAX;
  ha_rows best_records= 0;
  SEL_ARG *best_index_tree= NULL;
  ha_rows best_quick_prefix_records= 0;
  uint best_param_idx= 0;
  double cur_read_cost= DBL_MAX;
  ha_rows cur_records;
  SEL_ARG *cur_index_tree= NULL;
  ha_rows cur_quick_prefix_records= 0;
  uint cur_param_idx;
timour@mysql.com's avatar
timour@mysql.com committed
8397
  key_map cur_used_key_parts;
timour@mysql.com's avatar
timour@mysql.com committed
8398
  uint pk= param->table->s->primary_key;
8399 8400 8401 8402 8403 8404 8405

  for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ;
       cur_index_info++, cur_index++)
  {
    /* Check (B1) - if current index is covering. */
    if (!table->used_keys.is_set(cur_index))
      goto next_index;
8406

timour@mysql.com's avatar
timour@mysql.com committed
8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430
    /*
      If the current storage manager is such that it appends the primary key to
      each index, then the above condition is insufficient to check if the
      index is covering. In such cases it may happen that some fields are
      covered by the PK index, but not by the current index. Since we can't
      use the concatenation of both indexes for index lookup, such an index
      does not qualify as covering in our case. If this is the case, below
      we check that all query fields are indeed covered by 'cur_index'.
    */
    if (pk < MAX_KEY && cur_index != pk &&
        (table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
    {
      /* For each table field */
      for (uint i= 0; i < table->s->fields; i++)
      {
        Field *cur_field= table->field[i];
        /*
          If the field is used in the current query, check that the
          field is covered by some keypart of the current index.
        */
        if (thd->query_id == cur_field->query_id)
        {
          KEY_PART_INFO *key_part= cur_index_info->key_part;
          KEY_PART_INFO *key_part_end= key_part + cur_index_info->key_parts;
8431
          for (;;)
timour@mysql.com's avatar
timour@mysql.com committed
8432 8433 8434
          {
            if (key_part->field == cur_field)
              break;
8435 8436
            if (++key_part == key_part_end)
              goto next_index;                  // Field was not part of key
timour@mysql.com's avatar
timour@mysql.com committed
8437 8438 8439 8440 8441
          }
        }
      }
    }

8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462
    /*
      Check (GA1) for GROUP BY queries.
    */
    if (join->group_list)
    {
      cur_part= cur_index_info->key_part;
      end_part= cur_part + cur_index_info->key_parts;
      /* Iterate in parallel over the GROUP list and the index parts. */
      for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
           tmp_group= tmp_group->next, cur_part++)
      {
        /*
          TODO:
          tmp_group::item is an array of Item, is it OK to consider only the
          first Item? If so, then why? What is the array for?
        */
        /* Above we already checked that all group items are fields. */
        DBUG_ASSERT((*tmp_group->item)->type() == Item::FIELD_ITEM);
        Item_field *group_field= (Item_field *) (*tmp_group->item);
        if (group_field->field->eq(cur_part->field))
        {
8463 8464
          cur_group_prefix_len+= cur_part->store_length;
          ++cur_group_key_parts;
8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479
        }
        else
          goto next_index;
      }
    }
    /*
      Check (GA2) if this is a DISTINCT query.
      If GA2, then Store a new ORDER object in group_fields_array at the
      position of the key part of item_field->field. Thus we get the ORDER
      objects for each field ordered as the corresponding key parts.
      Later group_fields_array of ORDER objects is used to convert the query
      to a GROUP query.
    */
    else if (join->select_distinct)
    {
8480
      select_items_it.rewind();
timour@mysql.com's avatar
timour@mysql.com committed
8481
      cur_used_key_parts.clear_all();
8482
      uint max_key_part= 0;
8483
      while ((item= select_items_it++))
8484
      {
8485
        item_field= (Item_field*) item; /* (SA5) already checked above. */
8486 8487
        /* Find the order of the key part in the index. */
        key_part_nr= get_field_keypart(cur_index_info, item_field->field);
timour@mysql.com's avatar
timour@mysql.com committed
8488 8489 8490 8491 8492 8493
        /*
          Check if this attribute was already present in the select list.
          If it was present, then its corresponding key part was alredy used.
        */
        if (cur_used_key_parts.is_set(key_part_nr))
          continue;
8494
        if (key_part_nr < 1 || key_part_nr > join->fields_list.elements)
8495 8496
          goto next_index;
        cur_part= cur_index_info->key_part + key_part_nr - 1;
8497
        cur_group_prefix_len+= cur_part->store_length;
timour@mysql.com's avatar
timour@mysql.com committed
8498 8499
        cur_used_key_parts.set_bit(key_part_nr);
        ++cur_group_key_parts;
8500
        max_key_part= max(max_key_part,key_part_nr);
8501
      }
8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512
      /*
        Check that used key parts forms a prefix of the index.
        To check this we compare bits in all_parts and cur_parts.
        all_parts have all bits set from 0 to (max_key_part-1).
        cur_parts have bits set for only used keyparts.
      */
      ulonglong all_parts, cur_parts;
      all_parts= (1<<max_key_part) - 1;
      cur_parts= cur_used_key_parts.to_ulonglong() >> 1;
      if (all_parts != cur_parts)
        goto next_index;
8513 8514 8515 8516 8517 8518 8519 8520
    }
    else
      DBUG_ASSERT(FALSE);

    /* Check (SA2). */
    if (min_max_arg_item)
    {
      key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
8521
      if (key_part_nr <= cur_group_key_parts)
8522 8523 8524 8525 8526 8527 8528 8529
        goto next_index;
      min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
    }

    /*
      Check (NGA1, NGA2) and extract a sequence of constants to be used as part
      of all search keys.
    */
8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551

    /*
      If there is MIN/MAX, each keypart between the last group part and the
      MIN/MAX part must participate in one equality with constants, and all
      keyparts after the MIN/MAX part must not be referenced in the query.

      If there is no MIN/MAX, the keyparts after the last group part can be
      referenced only in equalities with constants, and the referenced keyparts
      must form a sequence without any gaps that starts immediately after the
      last group keypart.
    */
    last_part= cur_index_info->key_part + cur_index_info->key_parts;
    first_non_group_part= (cur_group_key_parts < cur_index_info->key_parts) ?
                          cur_index_info->key_part + cur_group_key_parts :
                          NULL;
    first_non_infix_part= min_max_arg_part ?
                          (min_max_arg_part < last_part) ?
                             min_max_arg_part + 1 :
                             NULL :
                           NULL;
    if (first_non_group_part &&
        (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
8552
    {
8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569
      if (tree)
      {
        uint dummy;
        SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
                                                        &dummy);
        if (!get_constant_key_infix(cur_index_info, index_range_tree,
                                    first_non_group_part, min_max_arg_part,
                                    last_part, thd, key_infix, &key_infix_len,
                                    &first_non_infix_part))
          goto next_index;
      }
      else if (min_max_arg_part &&
               (min_max_arg_part - first_non_group_part > 0))
        /*
          There is a gap but no range tree, thus no predicates at all for the
          non-group keyparts.
        */
8570 8571 8572
        goto next_index;
    }

8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585
    /*
      Test (WA1) partially - that no other keypart after the last infix part is
      referenced in the query.
    */
    if (first_non_infix_part)
    {
      for (cur_part= first_non_infix_part; cur_part != last_part; cur_part++)
      {
        if (cur_part->field->query_id == thd->query_id)
          goto next_index;
      }
    }

8586
    /* If we got to this point, cur_index_info passes the test. */
8587 8588 8589
    key_infix_parts= key_infix_len ?
                     (first_non_infix_part - first_non_group_part) : 0;
    used_key_parts= cur_group_key_parts + key_infix_parts;
8590

8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604
    /* Compute the cost of using this index. */
    if (tree)
    {
      /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
      cur_index_tree= get_index_range_tree(cur_index, tree, param,
                                           &cur_param_idx);
      /* Check if this range tree can be used for prefix retrieval. */
      cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
                                                    cur_index_tree);
    }
    cost_group_min_max(table, cur_index_info, used_key_parts,
                       cur_group_key_parts, tree, cur_index_tree,
                       cur_quick_prefix_records, have_min, have_max,
                       &cur_read_cost, &cur_records);
timour@mysql.com's avatar
timour@mysql.com committed
8605 8606 8607 8608 8609 8610
    /*
      If cur_read_cost is lower than best_read_cost use cur_index.
      Do not compare doubles directly because they may have different
      representations (64 vs. 80 bits).
    */
    if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621
    {
      index_info= cur_index_info;
      index= cur_index;
      best_read_cost= cur_read_cost;
      best_records= cur_records;
      best_index_tree= cur_index_tree;
      best_quick_prefix_records= cur_quick_prefix_records;
      best_param_idx= cur_param_idx;
      group_key_parts= cur_group_key_parts;
      group_prefix_len= cur_group_prefix_len;
    }
8622 8623

  next_index:
8624 8625
    cur_group_key_parts= 0;
    cur_group_prefix_len= 0;
8626 8627 8628 8629
  }
  if (!index_info) /* No usable index found. */
    DBUG_RETURN(NULL);

8630 8631 8632
  /* Check (SA3) for the where clause. */
  if (join->conds && min_max_arg_item &&
      !check_group_min_max_predicates(join->conds, min_max_arg_item,
8633 8634
                                      (index_info->flags & HA_SPATIAL) ?
                                      Field::itMBR : Field::itRAW))
8635 8636 8637 8638
    DBUG_RETURN(NULL);

  /* The query passes all tests, so construct a new TRP object. */
  read_plan= new (param->mem_root)
8639 8640 8641 8642
                 TRP_GROUP_MIN_MAX(have_min, have_max, min_max_arg_part,
                                   group_prefix_len, used_key_parts,
                                   group_key_parts, index_info, index,
                                   key_infix_len,
8643
                                   (key_infix_len > 0) ? key_infix : NULL,
8644
                                   tree, best_index_tree, best_param_idx,
8645
                                   best_quick_prefix_records);
8646 8647 8648 8649 8650
  if (read_plan)
  {
    if (tree && read_plan->quick_prefix_records == 0)
      DBUG_RETURN(NULL);

8651 8652 8653
    read_plan->read_cost= best_read_cost;
    read_plan->records=   best_records;

8654 8655 8656 8657 8658 8659 8660 8661 8662 8663
    DBUG_PRINT("info",
               ("Returning group min/max plan: cost: %g, records: %lu",
                read_plan->read_cost, (ulong) read_plan->records));
  }

  DBUG_RETURN(read_plan);
}


/*
8664 8665
  Check that the MIN/MAX attribute participates only in range predicates
  with constants.
8666 8667 8668 8669 8670 8671

  SYNOPSIS
    check_group_min_max_predicates()
    cond              tree (or subtree) describing all or part of the WHERE
                      clause being analyzed
    min_max_arg_item  the field referenced by the MIN/MAX function(s)
8672
    min_max_arg_part  the keypart of the MIN/MAX argument if any
8673 8674 8675

  DESCRIPTION
    The function walks recursively over the cond tree representing a WHERE
8676
    clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
8677 8678
    aggregate function, it is referenced only by one of the following
    predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
8679 8680 8681 8682 8683 8684 8685

  RETURN
    TRUE  if cond passes the test
    FALSE o/w
*/

static bool
8686 8687
check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
                               Field::imagetype image_type)
8688 8689
{
  DBUG_ENTER("check_group_min_max_predicates");
8690
  DBUG_ASSERT(cond && min_max_arg_item);
8691 8692 8693 8694 8695 8696 8697 8698 8699

  Item::Type cond_type= cond->type();
  if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
  {
    DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
    List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
    Item *and_or_arg;
    while ((and_or_arg= li++))
    {
monty@mishka.local's avatar
monty@mishka.local committed
8700
      if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
8701
                                         image_type))
8702 8703 8704 8705 8706
        DBUG_RETURN(FALSE);
    }
    DBUG_RETURN(TRUE);
  }

8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719
  /*
    TODO:
    This is a very crude fix to handle sub-selects in the WHERE clause
    (Item_subselect objects). With the test below we rule out from the
    optimization all queries with subselects in the WHERE clause. What has to
    be done, is that here we should analyze whether the subselect references
    the MIN/MAX argument field, and disallow the optimization only if this is
    so.
  */
  if (cond_type == Item::SUBSELECT_ITEM)
    DBUG_RETURN(FALSE);
  
  /* We presume that at this point there are no other Items than functions. */
8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732
  DBUG_ASSERT(cond_type == Item::FUNC_ITEM);

  /* Test if cond references only group-by or non-group fields. */
  Item_func *pred= (Item_func*) cond;
  Item **arguments= pred->arguments();
  Item *cur_arg;
  DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
  for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
  {
    cur_arg= arguments[arg_idx];
    DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
    if (cur_arg->type() == Item::FIELD_ITEM)
    {
8733
      if (min_max_arg_item->eq(cur_arg, 1)) 
8734 8735 8736
      {
       /*
         If pred references the MIN/MAX argument, check whether pred is a range
8737
         condition that compares the MIN/MAX argument with a constant.
8738 8739
       */
        Item_func::Functype pred_type= pred->functype();
8740 8741 8742 8743 8744 8745 8746 8747 8748 8749
        if (pred_type != Item_func::EQUAL_FUNC     &&
            pred_type != Item_func::LT_FUNC        &&
            pred_type != Item_func::LE_FUNC        &&
            pred_type != Item_func::GT_FUNC        &&
            pred_type != Item_func::GE_FUNC        &&
            pred_type != Item_func::BETWEEN        &&
            pred_type != Item_func::ISNULL_FUNC    &&
            pred_type != Item_func::ISNOTNULL_FUNC &&
            pred_type != Item_func::EQ_FUNC        &&
            pred_type != Item_func::NE_FUNC)
8750 8751 8752 8753
          DBUG_RETURN(FALSE);

        /* Check that pred compares min_max_arg_item with a constant. */
        Item *args[3];
8754
        bzero(args, 3 * sizeof(Item*));
8755 8756 8757 8758
        bool inv;
        /* Test if this is a comparison of a field and a constant. */
        if (!simple_pred(pred, args, &inv))
          DBUG_RETURN(FALSE);
8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777

        /* Check for compatible string comparisons - similar to get_mm_leaf. */
        if (args[0] && args[1] && !args[2] && // this is a binary function
            min_max_arg_item->result_type() == STRING_RESULT &&
            /*
              Don't use an index when comparing strings of different collations.
            */
            ((args[1]->result_type() == STRING_RESULT &&
              image_type == Field::itRAW &&
              ((Field_str*) min_max_arg_item->field)->charset() !=
              pred->compare_collation())
             ||
             /*
               We can't always use indexes when comparing a string index to a
               number.
             */
             (args[1]->result_type() != STRING_RESULT &&
              min_max_arg_item->field->cmp_type() != args[1]->result_type())))
          DBUG_RETURN(FALSE);
8778 8779 8780 8781
      }
    }
    else if (cur_arg->type() == Item::FUNC_ITEM)
    {
monty@mishka.local's avatar
monty@mishka.local committed
8782
      if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
8783
                                         image_type))
8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802
        DBUG_RETURN(FALSE);
    }
    else if (cur_arg->const_item())
    {
      DBUG_RETURN(TRUE);
    }
    else
      DBUG_RETURN(FALSE);
  }

  DBUG_RETURN(TRUE);
}


/*
  Extract a sequence of constants from a conjunction of equality predicates.

  SYNOPSIS
    get_constant_key_infix()
8803 8804 8805 8806 8807 8808 8809 8810 8811
    index_info             [in]  Descriptor of the chosen index.
    index_range_tree       [in]  Range tree for the chosen index
    first_non_group_part   [in]  First index part after group attribute parts
    min_max_arg_part       [in]  The keypart of the MIN/MAX argument if any
    last_part              [in]  Last keypart of the index
    thd                    [in]  Current thread
    key_infix              [out] Infix of constants to be used for index lookup
    key_infix_len          [out] Lenghth of the infix
    first_non_infix_part   [out] The first keypart after the infix (if any)
8812 8813 8814
    
  DESCRIPTION
    Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
8815 8816
    for each keypart field NGF_i not in GROUP-BY, check that there is a
    constant equality predicate among conds with the form (NGF_i = const_ci) or
8817 8818
    (const_ci = NGF_i).
    Thus all the NGF_i attributes must fill the 'gap' between the last group-by
8819 8820 8821 8822 8823 8824
    attribute and the MIN/MAX attribute in the index (if present). If these
    conditions hold, copy each constant from its corresponding predicate into
    key_infix, in the order its NG_i attribute appears in the index, and update
    key_infix_len with the total length of the key parts in key_infix.

  RETURN
8825
    TRUE  if the index passes the test
8826 8827 8828 8829
    FALSE o/w
*/

static bool
8830
get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
8831
                       KEY_PART_INFO *first_non_group_part,
8832 8833 8834 8835
                       KEY_PART_INFO *min_max_arg_part,
                       KEY_PART_INFO *last_part, THD *thd,
                       byte *key_infix, uint *key_infix_len,
                       KEY_PART_INFO **first_non_infix_part)
8836 8837 8838
{
  SEL_ARG       *cur_range;
  KEY_PART_INFO *cur_part;
8839 8840
  /* End part for the first loop below. */
  KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857

  *key_infix_len= 0;
  byte *key_ptr= key_infix;
  for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
  {
    /*
      Find the range tree for the current keypart. We assume that
      index_range_tree points to the leftmost keypart in the index.
    */
    for (cur_range= index_range_tree; cur_range;
         cur_range= cur_range->next_key_part)
    {
      if (cur_range->field->eq(cur_part->field))
        break;
    }
    if (!cur_range)
    {
8858 8859 8860 8861 8862 8863 8864
      if (min_max_arg_part)
        return FALSE; /* The current keypart has no range predicates at all. */
      else
      {
        *first_non_infix_part= cur_part;
        return TRUE;
      }
8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888
    }

    /* Check that the current range tree is a single point interval. */
    if (cur_range->prev || cur_range->next)
      return FALSE; /* This is not the only range predicate for the field. */
    if ((cur_range->min_flag & NO_MIN_RANGE) ||
        (cur_range->max_flag & NO_MAX_RANGE) ||
        (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
      return FALSE;

    uint field_length= cur_part->store_length;
    if ((cur_range->maybe_null &&
         cur_range->min_value[0] && cur_range->max_value[0])
        ||
        (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0))
    { /* cur_range specifies 'IS NULL' or an equality condition. */
      memcpy(key_ptr, cur_range->min_value, field_length);
      key_ptr+= field_length;
      *key_infix_len+= field_length;
    }
    else
      return FALSE;
  }

8889 8890 8891
  if (!min_max_arg_part && (cur_part == last_part))
    *first_non_infix_part= last_part;

8892 8893 8894 8895
  return TRUE;
}


8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915
/*
  Find the key part referenced by a field.

  SYNOPSIS
    get_field_keypart()
    index  descriptor of an index
    field  field that possibly references some key part in index

  NOTES
    The return value can be used to get a KEY_PART_INFO pointer by
    part= index->key_part + get_field_keypart(...) - 1;

  RETURN
    Positive number which is the consecutive number of the key part, or
    0 if field does not reference any index field.
*/

static inline uint
get_field_keypart(KEY *index, Field *field)
{
8916
  KEY_PART_INFO *part, *end;
8917

8918
  for (part= index->key_part, end= part + index->key_parts; part < end; part++)
8919 8920
  {
    if (field->eq(part->field))
ram@gw.mysql.r18.ru's avatar
ram@gw.mysql.r18.ru committed
8921
      return part - index->key_part + 1;
8922
  }
8923
  return 0;
8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964
}


/*
  Find the SEL_ARG sub-tree that corresponds to the chosen index.

  SYNOPSIS
    get_index_range_tree()
    index     [in]  The ID of the index being looked for
    range_tree[in]  Tree of ranges being searched
    param     [in]  PARAM from SQL_SELECT::test_quick_select
    param_idx [out] Index in the array PARAM::key that corresponds to 'index'

  DESCRIPTION

    A SEL_TREE contains range trees for all usable indexes. This procedure
    finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
    ordered in the same way as the members of PARAM::key, thus we first find
    the corresponding index in the array PARAM::key. This index is returned
    through the variable param_idx, to be used later as argument of
    check_quick_select().

  RETURN
    Pointer to the SEL_ARG subtree that corresponds to index.
*/

SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
                               uint *param_idx)
{
  uint idx= 0; /* Index nr in param->key_parts */
  while (idx < param->keys)
  {
    if (index == param->real_keynr[idx])
      break;
    idx++;
  }
  *param_idx= idx;
  return(range_tree->keys[idx]);
}


8965
/*
8966
  Compute the cost of a quick_group_min_max_select for a particular index.
8967 8968

  SYNOPSIS
8969 8970 8971 8972 8973 8974 8975
    cost_group_min_max()
    table                [in] The table being accessed
    index_info           [in] The index used to access the table
    used_key_parts       [in] Number of key parts used to access the index
    group_key_parts      [in] Number of index key parts in the group prefix
    range_tree           [in] Tree of ranges for all indexes
    index_tree           [in] The range tree for the current index
monty@mysql.com's avatar
monty@mysql.com committed
8976 8977
    quick_prefix_records [in] Number of records retrieved by the internally
			      used quick range select if any
8978 8979 8980 8981
    have_min             [in] True if there is a MIN function
    have_max             [in] True if there is a MAX function
    read_cost           [out] The cost to retrieve rows via this quick select
    records             [out] The number of rows retrieved
8982 8983

  DESCRIPTION
monty@mysql.com's avatar
monty@mysql.com committed
8984 8985
    This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
    the number of rows returned. It updates this->read_cost and this->records.
8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024

  NOTES
    The cost computation distinguishes several cases:
    1) No equality predicates over non-group attributes (thus no key_infix).
       If groups are bigger than blocks on the average, then we assume that it
       is very unlikely that block ends are aligned with group ends, thus even
       if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
       keys, except for the first MIN and the last MAX keys, will be in the
       same block.  If groups are smaller than blocks, then we are going to
       read all blocks.
    2) There are equality predicates over non-group attributes.
       In this case the group prefix is extended by additional constants, and
       as a result the min/max values are inside sub-groups of the original
       groups. The number of blocks that will be read depends on whether the
       ends of these sub-groups will be contained in the same or in different
       blocks. We compute the probability for the two ends of a subgroup to be
       in two different blocks as the ratio of:
       - the number of positions of the left-end of a subgroup inside a group,
         such that the right end of the subgroup is past the end of the buffer
         containing the left-end, and
       - the total number of possible positions for the left-end of the
         subgroup, which is the number of keys in the containing group.
       We assume it is very unlikely that two ends of subsequent subgroups are
       in the same block.
    3) The are range predicates over the group attributes.
       Then some groups may be filtered by the range predicates. We use the
       selectivity of the range predicates to decide how many groups will be
       filtered.

  TODO
     - Take into account the optional range predicates over the MIN/MAX
       argument.
     - Check if we have a PK index and we use all cols - then each key is a
       group, and it will be better to use an index scan.

  RETURN
    None
*/

9025 9026 9027 9028 9029
void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
                        uint group_key_parts, SEL_TREE *range_tree,
                        SEL_ARG *index_tree, ha_rows quick_prefix_records,
                        bool have_min, bool have_max,
                        double *read_cost, ha_rows *records)
9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041
{
  uint table_records;
  uint num_groups;
  uint num_blocks;
  uint keys_per_block;
  uint keys_per_group;
  uint keys_per_subgroup; /* Average number of keys in sub-groups */
                          /* formed by a key infix. */
  double p_overlap; /* Probability that a sub-group overlaps two blocks. */
  double quick_prefix_selectivity;
  double io_cost;
  double cpu_cost= 0; /* TODO: CPU cost of index_read calls? */
timour@mysql.com's avatar
timour@mysql.com committed
9042
  DBUG_ENTER("cost_group_min_max");
monty@mysql.com's avatar
monty@mysql.com committed
9043

9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061
  table_records= table->file->records;
  keys_per_block= (table->file->block_size / 2 /
                   (index_info->key_length + table->file->ref_length)
                        + 1);
  num_blocks= (table_records / keys_per_block) + 1;

  /* Compute the number of keys in a group. */
  keys_per_group= index_info->rec_per_key[group_key_parts - 1];
  if (keys_per_group == 0) /* If there is no statistics try to guess */
    /* each group contains 10% of all records */
    keys_per_group= (table_records / 10) + 1;
  num_groups= (table_records / keys_per_group) + 1;

  /* Apply the selectivity of the quick select for group prefixes. */
  if (range_tree && (quick_prefix_records != HA_POS_ERROR))
  {
    quick_prefix_selectivity= (double) quick_prefix_records /
                              (double) table_records;
serg@serg.mylan's avatar
serg@serg.mylan committed
9062
    num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093
  }

  if (used_key_parts > group_key_parts)
  { /*
      Compute the probability that two ends of a subgroup are inside
      different blocks.
    */
    keys_per_subgroup= index_info->rec_per_key[used_key_parts - 1];
    if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
      p_overlap= 1.0;       /* a block, it will overlap at least two blocks. */
    else
    {
      double blocks_per_group= (double) num_blocks / (double) num_groups;
      p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
      p_overlap= min(p_overlap, 1.0);
    }
    io_cost= (double) min(num_groups * (1 + p_overlap), num_blocks);
  }
  else
    io_cost= (keys_per_group > keys_per_block) ?
             (have_min && have_max) ? (double) (num_groups + 1) :
                                      (double) num_groups :
             (double) num_blocks;

  /*
    TODO: If there is no WHERE clause and no other expressions, there should be
    no CPU cost. We leave it here to make this cost comparable to that of index
    scan as computed in SQL_SELECT::test_quick_select().
  */
  cpu_cost= (double) num_groups / TIME_FOR_COMPARE;

9094
  *read_cost= io_cost + cpu_cost;
9095
  *records= num_groups;
9096 9097

  DBUG_PRINT("info",
9098 9099
             ("table rows=%u, keys/block=%u, keys/group=%u, result rows=%u, blocks=%u",
              table_records, keys_per_block, keys_per_group, *records,
9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132
              num_blocks));
  DBUG_VOID_RETURN;
}


/*
  Construct a new quick select object for queries with group by with min/max.

  SYNOPSIS
    TRP_GROUP_MIN_MAX::make_quick()
    param              Parameter from test_quick_select
    retrieve_full_rows ignored
    parent_alloc       Memory pool to use, if any.

  NOTES
    Make_quick ignores the retrieve_full_rows parameter because
    QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
    The other parameter are ignored as well because all necessary
    data to create the QUICK object is computed at this TRP creation
    time.

  RETURN
    New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
    NULL o/w.
*/

QUICK_SELECT_I *
TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
                              MEM_ROOT *parent_alloc)
{
  QUICK_GROUP_MIN_MAX_SELECT *quick;
  DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");

9133 9134 9135 9136 9137
  quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
                                        param->thd->lex->select_lex.join,
                                        have_min, have_max, min_max_arg_part,
                                        group_prefix_len, used_key_parts,
                                        index_info, index, read_cost, records,
monty@mysql.com's avatar
monty@mysql.com committed
9138 9139
                                        key_infix_len, key_infix,
                                        parent_alloc);
9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155
  if (!quick)
    DBUG_RETURN(NULL);

  if (quick->init())
  {
    delete quick;
    DBUG_RETURN(NULL);
  }

  if (range_tree)
  {
    DBUG_ASSERT(quick_prefix_records > 0);
    if (quick_prefix_records == HA_POS_ERROR)
      quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
    else
      /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
9156 9157
      quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                   index_tree,
9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179
                                                   &quick->alloc);

    /*
      Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
      attribute, and create an array of QUICK_RANGES to be used by the
      new quick select.
    */
    if (min_max_arg_part)
    {
      SEL_ARG *min_max_range= index_tree;
      while (min_max_range) /* Find the tree for the MIN/MAX key part. */
      {
        if (min_max_range->field->eq(min_max_arg_part->field))
          break;
        min_max_range= min_max_range->next_key_part;
      }
      /* Scroll to the leftmost interval for the MIN/MAX argument. */
      while (min_max_range && min_max_range->prev)
        min_max_range= min_max_range->prev;
      /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
      while (min_max_range)
      {
9180
        if (quick->add_range(min_max_range))
9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222
        {
          delete quick;
          quick= NULL;
          DBUG_RETURN(NULL);
        }
        min_max_range= min_max_range->next;
      }
    }
  }
  else
    quick->quick_prefix_select= NULL;

  quick->update_key_stat();

  DBUG_RETURN(quick);
}


/*
  Construct new quick select for group queries with min/max.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
    table             The table being accessed
    join              Descriptor of the current query
    have_min          TRUE if the query selects a MIN function
    have_max          TRUE if the query selects a MAX function
    min_max_arg_part  The only argument field of all MIN/MAX functions
    group_prefix_len  Length of all key parts in the group prefix
    prefix_key_parts  All key parts in the group prefix
    index_info        The index chosen for data access
    use_index         The id of index_info
    read_cost         Cost of this access method
    records           Number of records returned
    key_infix_len     Length of the key infix appended to the group prefix
    key_infix         Infix of constants from equality predicates
    parent_alloc      Memory pool for this and quick_prefix_select data

  RETURN
    None
*/

monty@mysql.com's avatar
monty@mysql.com committed
9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235
QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
                           bool have_max_arg,
                           KEY_PART_INFO *min_max_arg_part_arg,
                           uint group_prefix_len_arg,
                           uint used_key_parts_arg, KEY *index_info_arg,
                           uint use_index, double read_cost_arg,
                           ha_rows records_arg, uint key_infix_len_arg,
                           byte *key_infix_arg, MEM_ROOT *parent_alloc)
  :join(join_arg), index_info(index_info_arg),
   group_prefix_len(group_prefix_len_arg), have_min(have_min_arg),
   have_max(have_max_arg), seen_first_key(FALSE),
   min_max_arg_part(min_max_arg_part_arg), key_infix(key_infix_arg),
9236 9237
   key_infix_len(key_infix_len_arg), min_functions_it(NULL),
   max_functions_it(NULL)
9238 9239 9240 9241 9242 9243
{
  head=       table;
  file=       head->file;
  index=      use_index;
  record=     head->record[0];
  tmp_record= head->record[1];
9244 9245 9246
  read_time= read_cost_arg;
  records= records_arg;
  used_key_parts= used_key_parts_arg;
9247 9248 9249
  real_prefix_len= group_prefix_len + key_infix_len;
  group_prefix= NULL;
  min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
monty@mysql.com's avatar
monty@mysql.com committed
9250 9251 9252 9253 9254 9255

  /*
    We can't have parent_alloc set as the init function can't handle this case
    yet.
  */
  DBUG_ASSERT(!parent_alloc);
9256 9257 9258
  if (!parent_alloc)
  {
    init_sql_alloc(&alloc, join->thd->variables.range_alloc_block_size, 0);
monty@mysql.com's avatar
monty@mysql.com committed
9259
    join->thd->mem_root= &alloc;
9260 9261
  }
  else
9262
    bzero(&alloc, sizeof(MEM_ROOT));            // ensure that it's not used
9263 9264 9265 9266 9267 9268 9269 9270 9271
}


/*
  Do post-constructor initialization.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::init()
  
9272 9273 9274 9275 9276 9277
  DESCRIPTION
    The method performs initialization that cannot be done in the constructor
    such as memory allocations that may fail. It allocates memory for the
    group prefix and inifix buffers, and for the lists of MIN/MAX item to be
    updated during execution.

9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312
  RETURN
    0      OK
    other  Error code
*/

int QUICK_GROUP_MIN_MAX_SELECT::init()
{
  if (group_prefix) /* Already initialized. */
    return 0;

  if (!(last_prefix= (byte*) alloc_root(&alloc, group_prefix_len)))
      return 1;
  /*
    We may use group_prefix to store keys with all select fields, so allocate
    enough space for it.
  */
  if (!(group_prefix= (byte*) alloc_root(&alloc,
                                         real_prefix_len + min_max_arg_len)))
    return 1;

  if (key_infix_len > 0)
  {
    /*
      The memory location pointed to by key_infix will be deleted soon, so
      allocate a new buffer and copy the key_infix into it.
    */
    byte *tmp_key_infix= (byte*) alloc_root(&alloc, key_infix_len);
    if (!tmp_key_infix)
      return 1;
    memcpy(tmp_key_infix, this->key_infix, key_infix_len);
    this->key_infix= tmp_key_infix;
  }

  if (min_max_arg_part)
  {
monty@mishka.local's avatar
monty@mishka.local committed
9313
    if (my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16))
9314 9315
      return 1;

9316 9317
    if (have_min)
    {
monty@mishka.local's avatar
monty@mishka.local committed
9318
      if (!(min_functions= new List<Item_sum>))
9319 9320 9321 9322 9323 9324
        return 1;
    }
    else
      min_functions= NULL;
    if (have_max)
    {
monty@mishka.local's avatar
monty@mishka.local committed
9325
      if (!(max_functions= new List<Item_sum>))
9326 9327 9328 9329
        return 1;
    }
    else
      max_functions= NULL;
9330

9331 9332 9333
    Item_sum *min_max_item;
    Item_sum **func_ptr= join->sum_funcs;
    while ((min_max_item= *(func_ptr++)))
9334
    {
9335 9336 9337 9338
      if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
        min_functions->push_back(min_max_item);
      else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
        max_functions->push_back(min_max_item);
9339 9340
    }

9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351
    if (have_min)
    {
      if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
        return 1;
    }

    if (have_max)
    {
      if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
        return 1;
    }
9352
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
9353 9354
  else
    min_max_ranges.elements= 0;
9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367

  return 0;
}


QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
{
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
  if (file->inited != handler::NONE) 
    file->ha_index_end();
  if (min_max_arg_part)
    delete_dynamic(&min_max_ranges);
  free_root(&alloc,MYF(0));
9368 9369
  delete min_functions_it;
  delete max_functions_it;
9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388
  delete quick_prefix_select;
  DBUG_VOID_RETURN; 
}


/*
  Eventually create and add a new quick range object.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::add_range()
    sel_range  Range object from which a 

  NOTES
    Construct a new QUICK_RANGE object from a SEL_ARG object, and
    add it to the array min_max_ranges. If sel_arg is an infinite
    range, e.g. (x < 5 or x > 4), then skip it and do not construct
    a quick range.

  RETURN
9389 9390
    FALSE on success
    TRUE  otherwise
9391 9392 9393 9394 9395 9396 9397 9398
*/

bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
{
  QUICK_RANGE *range;
  uint range_flag= sel_range->min_flag | sel_range->max_flag;

  /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
monty@mishka.local's avatar
monty@mishka.local committed
9399
  if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
9400
    return FALSE;
9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415

  if (!(sel_range->min_flag & NO_MIN_RANGE) &&
      !(sel_range->max_flag & NO_MAX_RANGE))
  {
    if (sel_range->maybe_null &&
        sel_range->min_value[0] && sel_range->max_value[0])
      range_flag|= NULL_RANGE; /* IS NULL condition */
    else if (memcmp(sel_range->min_value, sel_range->max_value,
                    min_max_arg_len) == 0)
      range_flag|= EQ_RANGE;  /* equality condition */
  }
  range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
                         sel_range->max_value, min_max_arg_len,
                         range_flag);
  if (!range)
9416
    return TRUE;
9417
  if (insert_dynamic(&min_max_ranges, (gptr)&range))
9418 9419
    return TRUE;
  return FALSE;
9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448
}


/*
  Determine the total number and length of the keys that will be used for
  index lookup.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()

  DESCRIPTION
    The total length of the keys used for index lookup depends on whether
    there are any predicates referencing the min/max argument, and/or if
    the min/max argument field can be NULL.
    This function does an optimistic analysis whether the search key might
    be extended by a constant for the min/max keypart. It is 'optimistic'
    because during actual execution it may happen that a particular range
    is skipped, and then a shorter key will be used. However this is data
    dependent and can't be easily estimated here.

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
{
  max_used_key_length= real_prefix_len;
  if (min_max_ranges.elements > 0)
  {
9449
    QUICK_RANGE *cur_range;
9450 9451 9452 9453 9454 9455 9456
    if (have_min)
    { /* Check if the right-most range has a lower boundary. */
      get_dynamic(&min_max_ranges, (gptr)&cur_range,
                  min_max_ranges.elements - 1);
      if (!(cur_range->flag & NO_MIN_RANGE))
      {
        max_used_key_length+= min_max_arg_len;
9457
        used_key_parts++;
9458 9459 9460 9461 9462 9463 9464 9465 9466
        return;
      }
    }
    if (have_max)
    { /* Check if the left-most range has an upper boundary. */
      get_dynamic(&min_max_ranges, (gptr)&cur_range, 0);
      if (!(cur_range->flag & NO_MAX_RANGE))
      {
        max_used_key_length+= min_max_arg_len;
9467
        used_key_parts++;
9468 9469 9470 9471
        return;
      }
    }
  }
9472 9473
  else if (have_min && min_max_arg_part &&
           min_max_arg_part->field->real_maybe_null())
9474
  {
9475 9476 9477 9478 9479 9480 9481 9482
    /*
      If a MIN/MAX argument value is NULL, we can quickly determine
      that we're in the beginning of the next group, because NULLs
      are always < any other value. This allows us to quickly
      determine the end of the current group and jump to the next
      group (see next_min()) and thus effectively increases the
      usable key length.
    */
9483
    max_used_key_length+= min_max_arg_len;
9484
    used_key_parts++;
9485 9486 9487 9488 9489 9490 9491 9492 9493 9494
  }
}


/*
  Initialize a quick group min/max select for key retrieval.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::reset()

9495 9496 9497 9498
  DESCRIPTION
    Initialize the index chosen for access and find and store the prefix
    of the last group. The method is expensive since it performs disk access.

9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509
  RETURN
    0      OK
    other  Error code
*/

int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
{
  int result;
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");

  file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */
9510
  result= file->ha_index_init(index, 1);
9511
  result= file->index_last(record);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
9512 9513
  if (result == HA_ERR_END_OF_FILE)
    DBUG_RETURN(0);
9514 9515
  if (result)
    DBUG_RETURN(result);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
9516 9517
  if (quick_prefix_select && quick_prefix_select->reset())
    DBUG_RETURN(1);
9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556
  /* Save the prefix of the last group. */
  key_copy(last_prefix, record, index_info, group_prefix_len);

  DBUG_RETURN(0);
}



/* 
  Get the next key containing the MIN and/or MAX key for the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::get_next()

  DESCRIPTION
    The method finds the next subsequent group of records that satisfies the
    query conditions and finds the keys that contain the MIN/MAX values for
    the key part referenced by the MIN/MAX function(s). Once a group and its
    MIN/MAX values are found, store these values in the Item_sum objects for
    the MIN/MAX functions. The rest of the values in the result row are stored
    in the Item_field::result_field of each select field. If the query does
    not contain MIN and/or MAX functions, then the function only finds the
    group prefix, which is a query answer itself.

  NOTES
    If both MIN and MAX are computed, then we use the fact that if there is
    no MIN key, there can't be a MAX key as well, so we can skip looking
    for a MAX key in this case.

  RETURN
    0                  on success
    HA_ERR_END_OF_FILE if returned all keys
    other              if some error occurred
*/

int QUICK_GROUP_MIN_MAX_SELECT::get_next()
{
  int min_res= 0;
  int max_res= 0;
timour@mysql.com's avatar
timour@mysql.com committed
9557 9558 9559 9560 9561 9562 9563
#ifdef HPUX11
  /*
    volatile is required by a bug in the HP compiler due to which the
    last test of result fails.
  */
  volatile int result;
#else
9564
  int result;
timour@mysql.com's avatar
timour@mysql.com committed
9565
#endif
9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585
  int is_last_prefix;

  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");

  /*
    Loop until a group is found that satisfies all query conditions or the last
    group is reached.
  */
  do
  {
    result= next_prefix();
    /*
      Check if this is the last group prefix. Notice that at this point
      this->record contains the current prefix in record format.
    */
    is_last_prefix= key_cmp(index_info->key_part, last_prefix,
                            group_prefix_len);
    DBUG_ASSERT(is_last_prefix <= 0);
    if (result == HA_ERR_KEY_NOT_FOUND)
      continue;
9586
    if (result)
9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605
      break;

    if (have_min)
    {
      min_res= next_min();
      if (min_res == 0)
        update_min_result();
    }
    /* If there is no MIN in the group, there is no MAX either. */
    if ((have_max && !have_min) ||
        (have_max && have_min && (min_res == 0)))
    {
      max_res= next_max();
      if (max_res == 0)
        update_max_result();
      /* If a MIN was found, a MAX must have been found as well. */
      DBUG_ASSERT((have_max && !have_min) ||
                  (have_max && have_min && (max_res == 0)));
    }
9606
    /*
9607
      If this is just a GROUP BY or DISTINCT without MIN or MAX and there
9608 9609 9610 9611 9612 9613 9614
      are equality predicates for the key parts after the group, find the
      first sub-group with the extended prefix.
    */
    if (!have_min && !have_max && key_infix_len > 0)
      result= file->index_read(record, group_prefix, real_prefix_len,
                               HA_READ_KEY_EXACT);

9615
    result= have_min ? min_res : have_max ? max_res : result;
9616 9617
  } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
           is_last_prefix != 0);
9618 9619

  if (result == 0)
9620
  {
9621 9622 9623 9624 9625 9626 9627
    /*
      Partially mimic the behavior of end_select_send. Copy the
      field data from Item_field::field into Item_field::result_field
      of each non-aggregated field (the group fields, and optionally
      other fields in non-ANSI SQL mode).
    */
    copy_fields(&join->tmp_table_param);
9628
  }
9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642
  else if (result == HA_ERR_KEY_NOT_FOUND)
    result= HA_ERR_END_OF_FILE;

  DBUG_RETURN(result);
}


/*
  Retrieve the minimal key in the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_min()

  DESCRIPTION
9643 9644
    Find the minimal key within this group such that the key satisfies the query
    conditions and NULL semantics. The found key is loaded into this->record.
9645 9646 9647 9648 9649 9650 9651 9652 9653 9654

  IMPLEMENTATION
    Depending on the values of min_max_ranges.elements, key_infix_len, and
    whether there is a  NULL in the MIN field, this function may directly
    return without any data access. In this case we use the key loaded into
    this->record by the call to this->next_prefix() just before this call.

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
9655
    HA_ERR_END_OF_FILE   - "" -
9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671
    other                if some error occurred
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_min()
{
  int result= 0;
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");

  /* Find the MIN key using the eventually extended group prefix. */
  if (min_max_ranges.elements > 0)
  {
    if ((result= next_min_in_range()))
      DBUG_RETURN(result);
  }
  else
  {
9672
    /* Apply the constant equality conditions to the non-group select fields */
9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705
    if (key_infix_len > 0)
    {
      if ((result= file->index_read(record, group_prefix, real_prefix_len,
                                    HA_READ_KEY_EXACT)))
        DBUG_RETURN(result);
    }

    /*
      If the min/max argument field is NULL, skip subsequent rows in the same
      group with NULL in it. Notice that:
      - if the first row in a group doesn't have a NULL in the field, no row
      in the same group has (because NULL < any other value),
      - min_max_arg_part->field->ptr points to some place in 'record'.
    */
    if (min_max_arg_part && min_max_arg_part->field->is_null())
    {
      /* Find the first subsequent record without NULL in the MIN/MAX field. */
      key_copy(tmp_record, record, index_info, 0);
      result= file->index_read(record, tmp_record,
                               real_prefix_len + min_max_arg_len,
                               HA_READ_AFTER_KEY);
      /*
        Check if the new record belongs to the current group by comparing its
        prefix with the group's prefix. If it is from the next group, then the
        whole group has NULLs in the MIN/MAX field, so use the first record in
        the group as a result.
        TODO:
        It is possible to reuse this new record as the result candidate for the
        next call to next_min(), and to save one lookup in the next call. For
        this add a new member 'this->next_group_prefix'.
      */
      if (!result)
      {
9706
        if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
9707
          key_restore(record, tmp_record, index_info, 0);
9708
      }
9709
      else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728
        result= 0; /* There is a result in any case. */
    }
  }

  /*
    If the MIN attribute is non-nullable, this->record already contains the
    MIN key in the group, so just return.
  */
  DBUG_RETURN(result);
}


/* 
  Retrieve the maximal key in the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_max()

  DESCRIPTION
9729
    Lookup the maximal key of the group, and store it into this->record.
9730 9731 9732 9733

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
9734
    HA_ERR_END_OF_FILE	 - "" -
9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834
    other                if some error occurred
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_max()
{
  int result;

  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");

  /* Get the last key in the (possibly extended) group. */
  if (min_max_ranges.elements > 0)
    result= next_max_in_range();
  else
    result= file->index_read(record, group_prefix, real_prefix_len,
                             HA_READ_PREFIX_LAST);
  DBUG_RETURN(result);
}


/*
  Determine the prefix of the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_prefix()

  DESCRIPTION
    Determine the prefix of the next group that satisfies the query conditions.
    If there is a range condition referencing the group attributes, use a
    QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
    condition. If there is a key infix of constants, append this infix
    immediately after the group attributes. The possibly extended prefix is
    stored in this->group_prefix. The first key of the found group is stored in
    this->record, on which relies this->next_min().

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
    HA_ERR_END_OF_FILE   if there are no more keys
    other                if some error occurred
*/
int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
{
  int result;
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");

  if (quick_prefix_select)
  {
    byte *cur_prefix= seen_first_key ? group_prefix : NULL;
    if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
                                                      cur_prefix)))
      DBUG_RETURN(result);
    seen_first_key= TRUE;
  }
  else
  {
    if (!seen_first_key)
    {
      result= file->index_first(record);
      if (result)
        DBUG_RETURN(result);
      seen_first_key= TRUE;
    }
    else
    {
      /* Load the first key in this group into record. */
      result= file->index_read(record, group_prefix, group_prefix_len,
                               HA_READ_AFTER_KEY);
      if (result)
        DBUG_RETURN(result);
    }
  }

  /* Save the prefix of this group for subsequent calls. */
  key_copy(group_prefix, record, index_info, group_prefix_len);
  /* Append key_infix to group_prefix. */
  if (key_infix_len > 0)
    memcpy(group_prefix + group_prefix_len,
           key_infix, key_infix_len);

  DBUG_RETURN(0);
}


/*
  Find the minimal key in a group that satisfies some range conditions for the
  min/max argument field.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()

  DESCRIPTION
    Given the sequence of ranges min_max_ranges, find the minimal key that is
    in the left-most possible range. If there is no such key, then the current
    group does not have a MIN key that satisfies the WHERE clause. If a key is
    found, its value is stored in this->record.

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
                         the ranges
9835
    HA_ERR_END_OF_FILE   - "" -
9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857
    other                if some error
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
{
  ha_rkey_function find_flag;
  uint search_prefix_len;
  QUICK_RANGE *cur_range;
  bool found_null= FALSE;
  int result= HA_ERR_KEY_NOT_FOUND;

  DBUG_ASSERT(min_max_ranges.elements > 0);

  for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
  { /* Search from the left-most range to the right. */
    get_dynamic(&min_max_ranges, (gptr)&cur_range, range_idx);

    /*
      If the current value for the min/max argument is bigger than the right
      boundary of cur_range, there is no need to check this range.
    */
    if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
9858
        (key_cmp(min_max_arg_part, (const byte*) cur_range->max_key,
9859
                 min_max_arg_len) == 1))
9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879
      continue;

    if (cur_range->flag & NO_MIN_RANGE)
    {
      find_flag= HA_READ_KEY_EXACT;
      search_prefix_len= real_prefix_len;
    }
    else
    {
      /* Extend the search key with the lower boundary for this range. */
      memcpy(group_prefix + real_prefix_len, cur_range->min_key,
             cur_range->min_length);
      search_prefix_len= real_prefix_len + min_max_arg_len;
      find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
                 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
                 HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
    }

    result= file->index_read(record, group_prefix, search_prefix_len,
                             find_flag);
9880
    if (result)
9881
    {
9882 9883 9884 9885
      if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
          (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
        continue; /* Check the next range. */

9886 9887 9888 9889 9890
      /*
        In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
        HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
        range, it can't succeed for any other subsequent range.
      */
9891
      break;
9892
    }
9893 9894 9895 9896 9897 9898

    /* A key was found. */
    if (cur_range->flag & EQ_RANGE)
      break; /* No need to perform the checks below for equal keys. */

    if (cur_range->flag & NULL_RANGE)
9899 9900 9901 9902 9903 9904
    {
      /*
        Remember this key, and continue looking for a non-NULL key that
        satisfies some other condition.
      */
      memcpy(tmp_record, record, head->s->rec_buff_length);
9905 9906 9907 9908 9909 9910 9911
      found_null= TRUE;
      continue;
    }

    /* Check if record belongs to the current group. */
    if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
    {
9912
      result= HA_ERR_KEY_NOT_FOUND;
9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929
      continue;
    }

    /* If there is an upper limit, check if the found key is in the range. */
    if ( !(cur_range->flag & NO_MAX_RANGE) )
    {
      /* Compose the MAX key for the range. */
      byte *max_key= (byte*) my_alloca(real_prefix_len + min_max_arg_len);
      memcpy(max_key, group_prefix, real_prefix_len);
      memcpy(max_key + real_prefix_len, cur_range->max_key,
             cur_range->max_length);
      /* Compare the found key with max_key. */
      int cmp_res= key_cmp(index_info->key_part, max_key,
                           real_prefix_len + min_max_arg_len);
      if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) ||
            (cmp_res <= 0)))
      {
9930
        result= HA_ERR_KEY_NOT_FOUND;
9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944
        continue;
      }
    }
    /* If we got to this point, the current key qualifies as MIN. */
    DBUG_ASSERT(result == 0);
    break;
  }
  /*
    If there was a key with NULL in the MIN/MAX field, and there was no other
    key without NULL from the same group that satisfies some other condition,
    then use the key with the NULL.
  */
  if (found_null && result)
  {
9945
    memcpy(record, tmp_record, head->s->rec_buff_length);
9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968
    result= 0;
  }
  return result;
}


/*
  Find the maximal key in a group that satisfies some range conditions for the
  min/max argument field.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()

  DESCRIPTION
    Given the sequence of ranges min_max_ranges, find the maximal key that is
    in the right-most possible range. If there is no such key, then the current
    group does not have a MAX key that satisfies the WHERE clause. If a key is
    found, its value is stored in this->record.

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
                         the ranges
9969
    HA_ERR_END_OF_FILE   - "" -
9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991
    other                if some error
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
{
  ha_rkey_function find_flag;
  uint search_prefix_len;
  QUICK_RANGE *cur_range;
  int result;

  DBUG_ASSERT(min_max_ranges.elements > 0);

  for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
  { /* Search from the right-most range to the left. */
    get_dynamic(&min_max_ranges, (gptr)&cur_range, range_idx - 1);

    /*
      If the current value for the min/max argument is smaller than the left
      boundary of cur_range, there is no need to check this range.
    */
    if (range_idx != min_max_ranges.elements &&
        !(cur_range->flag & NO_MIN_RANGE) &&
9992
        (key_cmp(min_max_arg_part, (const byte*) cur_range->min_key,
9993
                 min_max_arg_len) == -1))
9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014
      continue;

    if (cur_range->flag & NO_MAX_RANGE)
    {
      find_flag= HA_READ_PREFIX_LAST;
      search_prefix_len= real_prefix_len;
    }
    else
    {
      /* Extend the search key with the upper boundary for this range. */
      memcpy(group_prefix + real_prefix_len, cur_range->max_key,
             cur_range->max_length);
      search_prefix_len= real_prefix_len + min_max_arg_len;
      find_flag= (cur_range->flag & EQ_RANGE) ?
                 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
                 HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
    }

    result= file->index_read(record, group_prefix, search_prefix_len,
                             find_flag);

monty@mysql.com's avatar
monty@mysql.com committed
10015 10016
    if (result)
    {
10017 10018 10019 10020
      if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
          (cur_range->flag & EQ_RANGE))
        continue; /* Check the next range. */

10021 10022 10023 10024 10025
      /*
        In no key was found with this upper bound, there certainly are no keys
        in the ranges to the left.
      */
      return result;
monty@mysql.com's avatar
monty@mysql.com committed
10026
    }
10027 10028
    /* A key was found. */
    if (cur_range->flag & EQ_RANGE)
monty@mysql.com's avatar
monty@mysql.com committed
10029
      return 0; /* No need to perform the checks below for equal keys. */
10030 10031 10032

    /* Check if record belongs to the current group. */
    if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
monty@mysql.com's avatar
monty@mysql.com committed
10033
      continue;                                 // Row not found
10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121

    /* If there is a lower limit, check if the found key is in the range. */
    if ( !(cur_range->flag & NO_MIN_RANGE) )
    {
      /* Compose the MIN key for the range. */
      byte *min_key= (byte*) my_alloca(real_prefix_len + min_max_arg_len);
      memcpy(min_key, group_prefix, real_prefix_len);
      memcpy(min_key + real_prefix_len, cur_range->min_key,
             cur_range->min_length);
      /* Compare the found key with min_key. */
      int cmp_res= key_cmp(index_info->key_part, min_key,
                           real_prefix_len + min_max_arg_len);
      if (!((cur_range->flag & NEAR_MIN) && (cmp_res == 1) ||
            (cmp_res >= 0)))
        continue;
    }
    /* If we got to this point, the current key qualifies as MAX. */
    return result;
  }
  return HA_ERR_KEY_NOT_FOUND;
}


/*
  Update all MIN function results with the newly found value.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::update_min_result()

  DESCRIPTION
    The method iterates through all MIN functions and updates the result value
    of each function by calling Item_sum::reset(), which in turn picks the new
    result value from this->head->record[0], previously updated by
    next_min(). The updated value is stored in a member variable of each of the
    Item_sum objects, depending on the value type.

  IMPLEMENTATION
    The update must be done separately for MIN and MAX, immediately after
    next_min() was called and before next_max() is called, because both MIN and
    MAX take their result value from the same buffer this->head->record[0]
    (i.e.  this->record).

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
{
  Item_sum *min_func;

  min_functions_it->rewind();
  while ((min_func= (*min_functions_it)++))
    min_func->reset();
}


/*
  Update all MAX function results with the newly found value.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::update_max_result()

  DESCRIPTION
    The method iterates through all MAX functions and updates the result value
    of each function by calling Item_sum::reset(), which in turn picks the new
    result value from this->head->record[0], previously updated by
    next_max(). The updated value is stored in a member variable of each of the
    Item_sum objects, depending on the value type.

  IMPLEMENTATION
    The update must be done separately for MIN and MAX, immediately after
    next_max() was called, because both MIN and MAX take their result value
    from the same buffer this->head->record[0] (i.e.  this->record).

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
{
  Item_sum *max_func;

  max_functions_it->rewind();
  while ((max_func= (*max_functions_it)++))
    max_func->reset();
}


10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136
/*
  Append comma-separated list of keys this quick select uses to key_names;
  append comma-separated list of corresponding used lengths to used_lengths.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
    key_names    [out] Names of used indexes
    used_lengths [out] Corresponding lengths of the index names

  DESCRIPTION
    This method is used by select_describe to extract the names of the
    indexes used by a quick select.

*/

10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147
void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
                                                      String *used_lengths)
{
  char buf[64];
  uint length;
  key_names->append(index_info->name);
  length= longlong2str(max_used_key_length, buf, 10) - buf;
  used_lengths->append(buf, length);
}


10148
#ifndef DBUG_OFF
10149

10150 10151 10152 10153 10154 10155 10156 10157 10158
static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
                           const char *msg)
{
  SEL_ARG **key,**end;
  int idx;
  char buff[1024];
  DBUG_ENTER("print_sel_tree");
  if (! _db_on_)
    DBUG_VOID_RETURN;
10159

10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174
  String tmp(buff,sizeof(buff),&my_charset_bin);
  tmp.length(0);
  for (idx= 0,key=tree->keys, end=key+param->keys ;
       key != end ;
       key++,idx++)
  {
    if (tree_map->is_set(idx))
    {
      uint keynr= param->real_keynr[idx];
      if (tmp.length())
        tmp.append(',');
      tmp.append(param->table->key_info[keynr].name);
    }
  }
  if (!tmp.length())
10175
    tmp.append(STRING_WITH_LEN("(empty)"));
10176

10177
  DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr()));
10178

10179 10180
  DBUG_VOID_RETURN;
}
10181

10182 10183 10184 10185

static void print_ror_scans_arr(TABLE *table, const char *msg,
                                struct st_ror_scan_info **start,
                                struct st_ror_scan_info **end)
10186
{
10187 10188 10189 10190 10191 10192 10193
  DBUG_ENTER("print_ror_scans");
  if (! _db_on_)
    DBUG_VOID_RETURN;

  char buff[1024];
  String tmp(buff,sizeof(buff),&my_charset_bin);
  tmp.length(0);
10194
  for (;start != end; start++)
10195
  {
10196 10197 10198
    if (tmp.length())
      tmp.append(',');
    tmp.append(table->key_info[(*start)->keynr].name);
10199
  }
10200
  if (!tmp.length())
10201
    tmp.append(STRING_WITH_LEN("(empty)"));
10202 10203
  DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
  DBUG_VOID_RETURN;
10204 10205 10206
}


bk@work.mysql.com's avatar
bk@work.mysql.com committed
10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217
/*****************************************************************************
** Print a quick range for debugging
** TODO:
** This should be changed to use a String to store each row instead
** of locking the DEBUG stream !
*****************************************************************************/

static void
print_key(KEY_PART *key_part,const char *key,uint used_length)
{
  char buff[1024];
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10218
  const char *key_end= key+used_length;
10219
  String tmp(buff,sizeof(buff),&my_charset_bin);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10220
  uint store_length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10221

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10222
  for (; key < key_end; key+=store_length, key_part++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10223
  {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10224 10225 10226
    Field *field=      key_part->field;
    store_length= key_part->store_length;

bk@work.mysql.com's avatar
bk@work.mysql.com committed
10227 10228
    if (field->real_maybe_null())
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10229
      if (*key)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10230 10231 10232 10233
      {
	fwrite("NULL",sizeof(char),4,DBUG_FILE);
	continue;
      }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10234 10235
      key++;					// Skip null byte
      store_length--;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10236
    }
10237
    field->set_key_image((char*) key, key_part->length);
monty@mysql.com's avatar
monty@mysql.com committed
10238 10239 10240 10241
    if (field->type() == MYSQL_TYPE_BIT)
      (void) field->val_int_as_str(&tmp, 1);
    else
      field->val_str(&tmp);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10242
    fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10243 10244
    if (key+store_length < key_end)
      fputc('/',DBUG_FILE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10245 10246 10247
  }
}

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10248

10249
static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
10250
{
10251
  char buf[MAX_KEY/8+1];
10252
  DBUG_ENTER("print_quick");
10253 10254
  if (! _db_on_ || !quick)
    DBUG_VOID_RETURN;
10255
  DBUG_LOCK_FILE;
10256

monty@mysql.com's avatar
monty@mysql.com committed
10257
  quick->dbug_dump(0, TRUE);
10258
  fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
10259

10260
  DBUG_UNLOCK_FILE;
10261 10262 10263
  DBUG_VOID_RETURN;
}

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10264

10265
static void print_rowid(byte* val, int len)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10266
{
10267
  byte *pb;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10268
  DBUG_LOCK_FILE;
10269 10270 10271 10272 10273 10274 10275 10276 10277 10278
  fputc('\"', DBUG_FILE);
  for (pb= val; pb!= val + len; ++pb)
    fprintf(DBUG_FILE, "%c", *pb);
  fprintf(DBUG_FILE, "\", hex: ");

  for (pb= val; pb!= val + len; ++pb)
    fprintf(DBUG_FILE, "%x ", *pb);
  fputc('\n', DBUG_FILE);
  DBUG_UNLOCK_FILE;
}
10279

10280 10281 10282 10283
void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
{
  fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
	  indent, "", head->key_info[index].name, max_used_key_length);
10284

10285
  if (verbose)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10286
  {
10287 10288
    QUICK_RANGE *range;
    QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
10289
    QUICK_RANGE **last_range= pr + ranges.elements;
10290
    for (; pr!=last_range; ++pr)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10291
    {
10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302
      fprintf(DBUG_FILE, "%*s", indent + 2, "");
      range= *pr;
      if (!(range->flag & NO_MIN_RANGE))
      {
        print_key(key_parts,range->min_key,range->min_length);
        if (range->flag & NEAR_MIN)
	  fputs(" < ",DBUG_FILE);
        else
	  fputs(" <= ",DBUG_FILE);
      }
      fputs("X",DBUG_FILE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10303

10304 10305 10306 10307 10308 10309 10310 10311 10312
      if (!(range->flag & NO_MAX_RANGE))
      {
        if (range->flag & NEAR_MAX)
	  fputs(" < ",DBUG_FILE);
        else
	  fputs(" <= ",DBUG_FILE);
        print_key(key_parts,range->max_key,range->max_length);
      }
      fputs("\n",DBUG_FILE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10313 10314
    }
  }
10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326
}

void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
{
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  QUICK_RANGE_SELECT *quick;
  fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
  while ((quick= it++))
    quick->dbug_dump(indent+2, verbose);
  if (pk_quick_select)
  {
10327
    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
10328 10329 10330 10331 10332 10333 10334 10335 10336
    pk_quick_select->dbug_dump(indent+2, verbose);
  }
  fprintf(DBUG_FILE, "%*s}\n", indent, "");
}

void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
{
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  QUICK_RANGE_SELECT *quick;
10337
  fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
10338 10339 10340
          indent, "", need_to_fetch_row? "":"non-");
  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
  while ((quick= it++))
10341
    quick->dbug_dump(indent+2, verbose);
10342 10343
  if (cpk_quick)
  {
10344
    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358
    cpk_quick->dbug_dump(indent+2, verbose);
  }
  fprintf(DBUG_FILE, "%*s}\n", indent, "");
}

void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
{
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  QUICK_SELECT_I *quick;
  fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
  while ((quick= it++))
    quick->dbug_dump(indent+2, verbose);
  fprintf(DBUG_FILE, "%*s}\n", indent, "");
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10359 10360
}

10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404

/*
  Print quick select information to DBUG_FILE.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
    indent  Indentation offset
    verbose If TRUE show more detailed output.

  DESCRIPTION
    Print the contents of this quick select to DBUG_FILE. The method also
    calls dbug_dump() for the used quick select if any.

  IMPLEMENTATION
    Caller is responsible for locking DBUG_FILE before this call and unlocking
    it afterwards.

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
{
  fprintf(DBUG_FILE,
          "%*squick_group_min_max_select: index %s (%d), length: %d\n",
	  indent, "", index_info->name, index, max_used_key_length);
  if (key_infix_len > 0)
  {
    fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
            indent, "", key_infix_len);
  }
  if (quick_prefix_select)
  {
    fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
    quick_prefix_select->dbug_dump(indent + 2, verbose);
  }
  if (min_max_ranges.elements > 0)
  {
    fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
            indent, "", min_max_ranges.elements);
  }
}


monty@mysql.com's avatar
monty@mysql.com committed
10405
#endif /* NOT_USED */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10406 10407

/*****************************************************************************
10408
** Instantiate templates
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10409 10410
*****************************************************************************/

10411
#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10412 10413 10414
template class List<QUICK_RANGE>;
template class List_iterator<QUICK_RANGE>;
#endif